LLVM 22.0.0git
AggressiveInstCombine.cpp
Go to the documentation of this file.
1//===- AggressiveInstCombine.cpp ------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the aggressive expression pattern combiner classes.
10// Currently, it handles expression patterns for:
11// * Truncate instruction
12//
13//===----------------------------------------------------------------------===//
14
17#include "llvm/ADT/Statistic.h"
27#include "llvm/IR/DataLayout.h"
28#include "llvm/IR/Dominators.h"
29#include "llvm/IR/Function.h"
30#include "llvm/IR/IRBuilder.h"
31#include "llvm/IR/Instruction.h"
32#include "llvm/IR/MDBuilder.h"
40
41using namespace llvm;
42using namespace PatternMatch;
43
44#define DEBUG_TYPE "aggressive-instcombine"
45
46namespace llvm {
48}
49
50STATISTIC(NumAnyOrAllBitsSet, "Number of any/all-bits-set patterns folded");
51STATISTIC(NumGuardedRotates,
52 "Number of guarded rotates transformed into funnel shifts");
53STATISTIC(NumGuardedFunnelShifts,
54 "Number of guarded funnel shifts transformed into funnel shifts");
55STATISTIC(NumPopCountRecognized, "Number of popcount idioms recognized");
56
58 "aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden,
59 cl::desc("Max number of instructions to scan for aggressive instcombine."));
60
62 "strncmp-inline-threshold", cl::init(3), cl::Hidden,
63 cl::desc("The maximum length of a constant string for a builtin string cmp "
64 "call eligible for inlining. The default value is 3."));
65
67 MemChrInlineThreshold("memchr-inline-threshold", cl::init(3), cl::Hidden,
68 cl::desc("The maximum length of a constant string to "
69 "inline a memchr call."));
70
71/// Match a pattern for a bitwise funnel/rotate operation that partially guards
72/// against undefined behavior by branching around the funnel-shift/rotation
73/// when the shift amount is 0.
75 if (I.getOpcode() != Instruction::PHI || I.getNumOperands() != 2)
76 return false;
77
78 // As with the one-use checks below, this is not strictly necessary, but we
79 // are being cautious to avoid potential perf regressions on targets that
80 // do not actually have a funnel/rotate instruction (where the funnel shift
81 // would be expanded back into math/shift/logic ops).
82 if (!isPowerOf2_32(I.getType()->getScalarSizeInBits()))
83 return false;
84
85 // Match V to funnel shift left/right and capture the source operands and
86 // shift amount.
87 auto matchFunnelShift = [](Value *V, Value *&ShVal0, Value *&ShVal1,
88 Value *&ShAmt) {
89 unsigned Width = V->getType()->getScalarSizeInBits();
90
91 // fshl(ShVal0, ShVal1, ShAmt)
92 // == (ShVal0 << ShAmt) | (ShVal1 >> (Width -ShAmt))
93 if (match(V, m_OneUse(m_c_Or(
94 m_Shl(m_Value(ShVal0), m_Value(ShAmt)),
95 m_LShr(m_Value(ShVal1), m_Sub(m_SpecificInt(Width),
96 m_Deferred(ShAmt))))))) {
97 return Intrinsic::fshl;
98 }
99
100 // fshr(ShVal0, ShVal1, ShAmt)
101 // == (ShVal0 >> ShAmt) | (ShVal1 << (Width - ShAmt))
102 if (match(V,
104 m_Value(ShAmt))),
105 m_LShr(m_Value(ShVal1), m_Deferred(ShAmt)))))) {
106 return Intrinsic::fshr;
107 }
108
110 };
111
112 // One phi operand must be a funnel/rotate operation, and the other phi
113 // operand must be the source value of that funnel/rotate operation:
114 // phi [ rotate(RotSrc, ShAmt), FunnelBB ], [ RotSrc, GuardBB ]
115 // phi [ fshl(ShVal0, ShVal1, ShAmt), FunnelBB ], [ ShVal0, GuardBB ]
116 // phi [ fshr(ShVal0, ShVal1, ShAmt), FunnelBB ], [ ShVal1, GuardBB ]
117 PHINode &Phi = cast<PHINode>(I);
118 unsigned FunnelOp = 0, GuardOp = 1;
119 Value *P0 = Phi.getOperand(0), *P1 = Phi.getOperand(1);
120 Value *ShVal0, *ShVal1, *ShAmt;
121 Intrinsic::ID IID = matchFunnelShift(P0, ShVal0, ShVal1, ShAmt);
122 if (IID == Intrinsic::not_intrinsic ||
123 (IID == Intrinsic::fshl && ShVal0 != P1) ||
124 (IID == Intrinsic::fshr && ShVal1 != P1)) {
125 IID = matchFunnelShift(P1, ShVal0, ShVal1, ShAmt);
126 if (IID == Intrinsic::not_intrinsic ||
127 (IID == Intrinsic::fshl && ShVal0 != P0) ||
128 (IID == Intrinsic::fshr && ShVal1 != P0))
129 return false;
130 assert((IID == Intrinsic::fshl || IID == Intrinsic::fshr) &&
131 "Pattern must match funnel shift left or right");
132 std::swap(FunnelOp, GuardOp);
133 }
134
135 // The incoming block with our source operand must be the "guard" block.
136 // That must contain a cmp+branch to avoid the funnel/rotate when the shift
137 // amount is equal to 0. The other incoming block is the block with the
138 // funnel/rotate.
139 BasicBlock *GuardBB = Phi.getIncomingBlock(GuardOp);
140 BasicBlock *FunnelBB = Phi.getIncomingBlock(FunnelOp);
141 Instruction *TermI = GuardBB->getTerminator();
142
143 // Ensure that the shift values dominate each block.
144 if (!DT.dominates(ShVal0, TermI) || !DT.dominates(ShVal1, TermI))
145 return false;
146
147 BasicBlock *PhiBB = Phi.getParent();
149 m_ZeroInt()),
150 m_SpecificBB(PhiBB), m_SpecificBB(FunnelBB))))
151 return false;
152
153 IRBuilder<> Builder(PhiBB, PhiBB->getFirstInsertionPt());
154
155 if (ShVal0 == ShVal1)
156 ++NumGuardedRotates;
157 else
158 ++NumGuardedFunnelShifts;
159
160 // If this is not a rotate then the select was blocking poison from the
161 // 'shift-by-zero' non-TVal, but a funnel shift won't - so freeze it.
162 bool IsFshl = IID == Intrinsic::fshl;
163 if (ShVal0 != ShVal1) {
164 if (IsFshl && !llvm::isGuaranteedNotToBePoison(ShVal1))
165 ShVal1 = Builder.CreateFreeze(ShVal1);
166 else if (!IsFshl && !llvm::isGuaranteedNotToBePoison(ShVal0))
167 ShVal0 = Builder.CreateFreeze(ShVal0);
168 }
169
170 // We matched a variation of this IR pattern:
171 // GuardBB:
172 // %cmp = icmp eq i32 %ShAmt, 0
173 // br i1 %cmp, label %PhiBB, label %FunnelBB
174 // FunnelBB:
175 // %sub = sub i32 32, %ShAmt
176 // %shr = lshr i32 %ShVal1, %sub
177 // %shl = shl i32 %ShVal0, %ShAmt
178 // %fsh = or i32 %shr, %shl
179 // br label %PhiBB
180 // PhiBB:
181 // %cond = phi i32 [ %fsh, %FunnelBB ], [ %ShVal0, %GuardBB ]
182 // -->
183 // llvm.fshl.i32(i32 %ShVal0, i32 %ShVal1, i32 %ShAmt)
184 Phi.replaceAllUsesWith(
185 Builder.CreateIntrinsic(IID, Phi.getType(), {ShVal0, ShVal1, ShAmt}));
186 return true;
187}
188
189/// This is used by foldAnyOrAllBitsSet() to capture a source value (Root) and
190/// the bit indexes (Mask) needed by a masked compare. If we're matching a chain
191/// of 'and' ops, then we also need to capture the fact that we saw an
192/// "and X, 1", so that's an extra return value for that case.
193namespace {
194struct MaskOps {
195 Value *Root = nullptr;
196 APInt Mask;
197 bool MatchAndChain;
198 bool FoundAnd1 = false;
199
200 MaskOps(unsigned BitWidth, bool MatchAnds)
201 : Mask(APInt::getZero(BitWidth)), MatchAndChain(MatchAnds) {}
202};
203} // namespace
204
205/// This is a recursive helper for foldAnyOrAllBitsSet() that walks through a
206/// chain of 'and' or 'or' instructions looking for shift ops of a common source
207/// value. Examples:
208/// or (or (or X, (X >> 3)), (X >> 5)), (X >> 8)
209/// returns { X, 0x129 }
210/// and (and (X >> 1), 1), (X >> 4)
211/// returns { X, 0x12 }
212static bool matchAndOrChain(Value *V, MaskOps &MOps) {
213 Value *Op0, *Op1;
214 if (MOps.MatchAndChain) {
215 // Recurse through a chain of 'and' operands. This requires an extra check
216 // vs. the 'or' matcher: we must find an "and X, 1" instruction somewhere
217 // in the chain to know that all of the high bits are cleared.
218 if (match(V, m_And(m_Value(Op0), m_One()))) {
219 MOps.FoundAnd1 = true;
220 return matchAndOrChain(Op0, MOps);
221 }
222 if (match(V, m_And(m_Value(Op0), m_Value(Op1))))
223 return matchAndOrChain(Op0, MOps) && matchAndOrChain(Op1, MOps);
224 } else {
225 // Recurse through a chain of 'or' operands.
226 if (match(V, m_Or(m_Value(Op0), m_Value(Op1))))
227 return matchAndOrChain(Op0, MOps) && matchAndOrChain(Op1, MOps);
228 }
229
230 // We need a shift-right or a bare value representing a compare of bit 0 of
231 // the original source operand.
232 Value *Candidate;
233 const APInt *BitIndex = nullptr;
234 if (!match(V, m_LShr(m_Value(Candidate), m_APInt(BitIndex))))
235 Candidate = V;
236
237 // Initialize result source operand.
238 if (!MOps.Root)
239 MOps.Root = Candidate;
240
241 // The shift constant is out-of-range? This code hasn't been simplified.
242 if (BitIndex && BitIndex->uge(MOps.Mask.getBitWidth()))
243 return false;
244
245 // Fill in the mask bit derived from the shift constant.
246 MOps.Mask.setBit(BitIndex ? BitIndex->getZExtValue() : 0);
247 return MOps.Root == Candidate;
248}
249
250/// Match patterns that correspond to "any-bits-set" and "all-bits-set".
251/// These will include a chain of 'or' or 'and'-shifted bits from a
252/// common source value:
253/// and (or (lshr X, C), ...), 1 --> (X & CMask) != 0
254/// and (and (lshr X, C), ...), 1 --> (X & CMask) == CMask
255/// Note: "any-bits-clear" and "all-bits-clear" are variations of these patterns
256/// that differ only with a final 'not' of the result. We expect that final
257/// 'not' to be folded with the compare that we create here (invert predicate).
259 // The 'any-bits-set' ('or' chain) pattern is simpler to match because the
260 // final "and X, 1" instruction must be the final op in the sequence.
261 bool MatchAllBitsSet;
263 MatchAllBitsSet = true;
264 else if (match(&I, m_And(m_OneUse(m_Or(m_Value(), m_Value())), m_One())))
265 MatchAllBitsSet = false;
266 else
267 return false;
268
269 MaskOps MOps(I.getType()->getScalarSizeInBits(), MatchAllBitsSet);
270 if (MatchAllBitsSet) {
271 if (!matchAndOrChain(cast<BinaryOperator>(&I), MOps) || !MOps.FoundAnd1)
272 return false;
273 } else {
274 if (!matchAndOrChain(cast<BinaryOperator>(&I)->getOperand(0), MOps))
275 return false;
276 }
277
278 // The pattern was found. Create a masked compare that replaces all of the
279 // shift and logic ops.
280 IRBuilder<> Builder(&I);
281 Constant *Mask = ConstantInt::get(I.getType(), MOps.Mask);
282 Value *And = Builder.CreateAnd(MOps.Root, Mask);
283 Value *Cmp = MatchAllBitsSet ? Builder.CreateICmpEQ(And, Mask)
284 : Builder.CreateIsNotNull(And);
285 Value *Zext = Builder.CreateZExt(Cmp, I.getType());
286 I.replaceAllUsesWith(Zext);
287 ++NumAnyOrAllBitsSet;
288 return true;
289}
290
291// Try to recognize below function as popcount intrinsic.
292// This is the "best" algorithm from
293// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
294// Also used in TargetLowering::expandCTPOP().
295//
296// int popcount(unsigned int i) {
297// i = i - ((i >> 1) & 0x55555555);
298// i = (i & 0x33333333) + ((i >> 2) & 0x33333333);
299// i = ((i + (i >> 4)) & 0x0F0F0F0F);
300// return (i * 0x01010101) >> 24;
301// }
303 if (I.getOpcode() != Instruction::LShr)
304 return false;
305
306 Type *Ty = I.getType();
307 if (!Ty->isIntOrIntVectorTy())
308 return false;
309
310 unsigned Len = Ty->getScalarSizeInBits();
311 // FIXME: fix Len == 8 and other irregular type lengths.
312 if (!(Len <= 128 && Len > 8 && Len % 8 == 0))
313 return false;
314
315 APInt Mask55 = APInt::getSplat(Len, APInt(8, 0x55));
316 APInt Mask33 = APInt::getSplat(Len, APInt(8, 0x33));
317 APInt Mask0F = APInt::getSplat(Len, APInt(8, 0x0F));
318 APInt Mask01 = APInt::getSplat(Len, APInt(8, 0x01));
319 APInt MaskShift = APInt(Len, Len - 8);
320
321 Value *Op0 = I.getOperand(0);
322 Value *Op1 = I.getOperand(1);
323 Value *MulOp0;
324 // Matching "(i * 0x01010101...) >> 24".
325 if ((match(Op0, m_Mul(m_Value(MulOp0), m_SpecificInt(Mask01)))) &&
327 Value *ShiftOp0;
328 // Matching "((i + (i >> 4)) & 0x0F0F0F0F...)".
329 if (match(MulOp0, m_And(m_c_Add(m_LShr(m_Value(ShiftOp0), m_SpecificInt(4)),
330 m_Deferred(ShiftOp0)),
331 m_SpecificInt(Mask0F)))) {
332 Value *AndOp0;
333 // Matching "(i & 0x33333333...) + ((i >> 2) & 0x33333333...)".
334 if (match(ShiftOp0,
335 m_c_Add(m_And(m_Value(AndOp0), m_SpecificInt(Mask33)),
337 m_SpecificInt(Mask33))))) {
338 Value *Root, *SubOp1;
339 // Matching "i - ((i >> 1) & 0x55555555...)".
340 const APInt *AndMask;
341 if (match(AndOp0, m_Sub(m_Value(Root), m_Value(SubOp1))) &&
342 match(SubOp1, m_And(m_LShr(m_Specific(Root), m_SpecificInt(1)),
343 m_APInt(AndMask)))) {
344 auto CheckAndMask = [&]() {
345 if (*AndMask == Mask55)
346 return true;
347
348 // Exact match failed, see if any bits are known to be 0 where we
349 // expect a 1 in the mask.
350 if (!AndMask->isSubsetOf(Mask55))
351 return false;
352
353 APInt NeededMask = Mask55 & ~*AndMask;
354 return MaskedValueIsZero(cast<Instruction>(SubOp1)->getOperand(0),
355 NeededMask,
356 SimplifyQuery(I.getDataLayout()));
357 };
358
359 if (CheckAndMask()) {
360 LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n");
361 IRBuilder<> Builder(&I);
362 I.replaceAllUsesWith(
363 Builder.CreateIntrinsic(Intrinsic::ctpop, I.getType(), {Root}));
364 ++NumPopCountRecognized;
365 return true;
366 }
367 }
368 }
369 }
370 }
371
372 return false;
373}
374
375/// Fold smin(smax(fptosi(x), C1), C2) to llvm.fptosi.sat(x), providing C1 and
376/// C2 saturate the value of the fp conversion. The transform is not reversable
377/// as the fptosi.sat is more defined than the input - all values produce a
378/// valid value for the fptosi.sat, where as some produce poison for original
379/// that were out of range of the integer conversion. The reversed pattern may
380/// use fmax and fmin instead. As we cannot directly reverse the transform, and
381/// it is not always profitable, we make it conditional on the cost being
382/// reported as lower by TTI.
384 // Look for min(max(fptosi, converting to fptosi_sat.
385 Value *In;
386 const APInt *MinC, *MaxC;
388 m_APInt(MinC))),
389 m_APInt(MaxC))) &&
391 m_APInt(MaxC))),
392 m_APInt(MinC))))
393 return false;
394
395 // Check that the constants clamp a saturate.
396 if (!(*MinC + 1).isPowerOf2() || -*MaxC != *MinC + 1)
397 return false;
398
399 Type *IntTy = I.getType();
400 Type *FpTy = In->getType();
401 Type *SatTy =
402 IntegerType::get(IntTy->getContext(), (*MinC + 1).exactLogBase2() + 1);
403 if (auto *VecTy = dyn_cast<VectorType>(IntTy))
404 SatTy = VectorType::get(SatTy, VecTy->getElementCount());
405
406 // Get the cost of the intrinsic, and check that against the cost of
407 // fptosi+smin+smax
408 InstructionCost SatCost = TTI.getIntrinsicInstrCost(
409 IntrinsicCostAttributes(Intrinsic::fptosi_sat, SatTy, {In}, {FpTy}),
411 SatCost += TTI.getCastInstrCost(Instruction::SExt, IntTy, SatTy,
414
415 InstructionCost MinMaxCost = TTI.getCastInstrCost(
416 Instruction::FPToSI, IntTy, FpTy, TTI::CastContextHint::None,
418 MinMaxCost += TTI.getIntrinsicInstrCost(
419 IntrinsicCostAttributes(Intrinsic::smin, IntTy, {IntTy}),
421 MinMaxCost += TTI.getIntrinsicInstrCost(
422 IntrinsicCostAttributes(Intrinsic::smax, IntTy, {IntTy}),
424
425 if (SatCost >= MinMaxCost)
426 return false;
427
428 IRBuilder<> Builder(&I);
429 Value *Sat =
430 Builder.CreateIntrinsic(Intrinsic::fptosi_sat, {SatTy, FpTy}, In);
431 I.replaceAllUsesWith(Builder.CreateSExt(Sat, IntTy));
432 return true;
433}
434
435/// Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids
436/// pessimistic codegen that has to account for setting errno and can enable
437/// vectorization.
440 DominatorTree &DT) {
441 // If (1) this is a sqrt libcall, (2) we can assume that NAN is not created
442 // (because NNAN or the operand arg must not be less than -0.0) and (2) we
443 // would not end up lowering to a libcall anyway (which could change the value
444 // of errno), then:
445 // (1) errno won't be set.
446 // (2) it is safe to convert this to an intrinsic call.
447 Type *Ty = Call->getType();
448 Value *Arg = Call->getArgOperand(0);
449 if (TTI.haveFastSqrt(Ty) &&
450 (Call->hasNoNaNs() ||
452 Arg, SimplifyQuery(Call->getDataLayout(), &TLI, &DT, &AC, Call)))) {
453 IRBuilder<> Builder(Call);
454 Value *NewSqrt =
455 Builder.CreateIntrinsic(Intrinsic::sqrt, Ty, Arg, Call, "sqrt");
456 Call->replaceAllUsesWith(NewSqrt);
457
458 // Explicitly erase the old call because a call with side effects is not
459 // trivially dead.
460 Call->eraseFromParent();
461 return true;
462 }
463
464 return false;
465}
466
467// Check if this array of constants represents a cttz table.
468// Iterate over the elements from \p Table by trying to find/match all
469// the numbers from 0 to \p InputBits that should represent cttz results.
470static bool isCTTZTable(Constant *Table, const APInt &Mul, const APInt &Shift,
471 const APInt &AndMask, Type *AccessTy,
472 unsigned InputBits, const APInt &GEPIdxFactor,
473 const DataLayout &DL) {
474 for (unsigned Idx = 0; Idx < InputBits; Idx++) {
475 APInt Index = (APInt(InputBits, 1).shl(Idx) * Mul).lshr(Shift) & AndMask;
477 ConstantFoldLoadFromConst(Table, AccessTy, Index * GEPIdxFactor, DL));
478 if (!C || C->getValue() != Idx)
479 return false;
480 }
481
482 return true;
483}
484
485// Try to recognize table-based ctz implementation.
486// E.g., an example in C (for more cases please see the llvm/tests):
487// int f(unsigned x) {
488// static const char table[32] =
489// {0, 1, 28, 2, 29, 14, 24, 3, 30,
490// 22, 20, 15, 25, 17, 4, 8, 31, 27,
491// 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9};
492// return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27];
493// }
494// this can be lowered to `cttz` instruction.
495// There is also a special case when the element is 0.
496//
497// The (x & -x) sets the lowest non-zero bit to 1. The multiply is a de-bruijn
498// sequence that contains each pattern of bits in it. The shift extracts
499// the top bits after the multiply, and that index into the table should
500// represent the number of trailing zeros in the original number.
501//
502// Here are some examples or LLVM IR for a 64-bit target:
503//
504// CASE 1:
505// %sub = sub i32 0, %x
506// %and = and i32 %sub, %x
507// %mul = mul i32 %and, 125613361
508// %shr = lshr i32 %mul, 27
509// %idxprom = zext i32 %shr to i64
510// %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @ctz1.table, i64 0,
511// i64 %idxprom
512// %0 = load i8, i8* %arrayidx, align 1, !tbaa !8
513//
514// CASE 2:
515// %sub = sub i32 0, %x
516// %and = and i32 %sub, %x
517// %mul = mul i32 %and, 72416175
518// %shr = lshr i32 %mul, 26
519// %idxprom = zext i32 %shr to i64
520// %arrayidx = getelementptr inbounds [64 x i16], [64 x i16]* @ctz2.table,
521// i64 0, i64 %idxprom
522// %0 = load i16, i16* %arrayidx, align 2, !tbaa !8
523//
524// CASE 3:
525// %sub = sub i32 0, %x
526// %and = and i32 %sub, %x
527// %mul = mul i32 %and, 81224991
528// %shr = lshr i32 %mul, 27
529// %idxprom = zext i32 %shr to i64
530// %arrayidx = getelementptr inbounds [32 x i32], [32 x i32]* @ctz3.table,
531// i64 0, i64 %idxprom
532// %0 = load i32, i32* %arrayidx, align 4, !tbaa !8
533//
534// CASE 4:
535// %sub = sub i64 0, %x
536// %and = and i64 %sub, %x
537// %mul = mul i64 %and, 283881067100198605
538// %shr = lshr i64 %mul, 58
539// %arrayidx = getelementptr inbounds [64 x i8], [64 x i8]* @table, i64 0,
540// i64 %shr
541// %0 = load i8, i8* %arrayidx, align 1, !tbaa !8
542//
543// All these can be lowered to @llvm.cttz.i32/64 intrinsics.
546 if (!LI)
547 return false;
548
549 Type *AccessType = LI->getType();
550 if (!AccessType->isIntegerTy())
551 return false;
552
554 if (!GEP || !GEP->hasNoUnsignedSignedWrap())
555 return false;
556
557 GlobalVariable *GVTable = dyn_cast<GlobalVariable>(GEP->getPointerOperand());
558 if (!GVTable || !GVTable->hasInitializer() || !GVTable->isConstant())
559 return false;
560
561 unsigned BW = DL.getIndexTypeSizeInBits(GEP->getType());
562 APInt ModOffset(BW, 0);
564 if (!GEP->collectOffset(DL, BW, VarOffsets, ModOffset) ||
565 VarOffsets.size() != 1 || ModOffset != 0)
566 return false;
567 auto [GepIdx, GEPScale] = VarOffsets.front();
568
569 Value *X1;
570 const APInt *MulConst, *ShiftConst, *AndCst = nullptr;
571 // Check that the gep variable index is ((x & -x) * MulConst) >> ShiftConst.
572 // This might be extended to the pointer index type, and if the gep index type
573 // has been replaced with an i8 then a new And (and different ShiftConst) will
574 // be present.
575 auto MatchInner = m_LShr(
576 m_Mul(m_c_And(m_Neg(m_Value(X1)), m_Deferred(X1)), m_APInt(MulConst)),
577 m_APInt(ShiftConst));
578 if (!match(GepIdx, m_CastOrSelf(MatchInner)) &&
579 !match(GepIdx, m_CastOrSelf(m_And(MatchInner, m_APInt(AndCst)))))
580 return false;
581
582 unsigned InputBits = X1->getType()->getScalarSizeInBits();
583 if (InputBits != 16 && InputBits != 32 && InputBits != 64 && InputBits != 128)
584 return false;
585
586 if (!GEPScale.isIntN(InputBits) ||
587 !isCTTZTable(GVTable->getInitializer(), *MulConst, *ShiftConst,
588 AndCst ? *AndCst : APInt::getAllOnes(InputBits), AccessType,
589 InputBits, GEPScale.zextOrTrunc(InputBits), DL))
590 return false;
591
592 ConstantInt *ZeroTableElem = cast<ConstantInt>(
593 ConstantFoldLoadFromConst(GVTable->getInitializer(), AccessType, DL));
594 bool DefinedForZero = ZeroTableElem->getZExtValue() == InputBits;
595
596 IRBuilder<> B(LI);
597 ConstantInt *BoolConst = B.getInt1(!DefinedForZero);
598 Type *XType = X1->getType();
599 auto Cttz = B.CreateIntrinsic(Intrinsic::cttz, {XType}, {X1, BoolConst});
600 Value *ZExtOrTrunc = nullptr;
601
602 if (DefinedForZero) {
603 ZExtOrTrunc = B.CreateZExtOrTrunc(Cttz, AccessType);
604 } else {
605 // If the value in elem 0 isn't the same as InputBits, we still want to
606 // produce the value from the table.
607 auto Cmp = B.CreateICmpEQ(X1, ConstantInt::get(XType, 0));
608 auto Select = B.CreateSelect(Cmp, B.CreateZExt(ZeroTableElem, XType), Cttz);
609
610 // The true branch of select handles the cttz(0) case, which is rare.
613 SelectI->setMetadata(
614 LLVMContext::MD_prof,
615 MDBuilder(SelectI->getContext()).createUnlikelyBranchWeights());
616 }
617
618 // NOTE: If the table[0] is 0, but the cttz(0) is defined by the Target
619 // it should be handled as: `cttz(x) & (typeSize - 1)`.
620
621 ZExtOrTrunc = B.CreateZExtOrTrunc(Select, AccessType);
622 }
623
624 LI->replaceAllUsesWith(ZExtOrTrunc);
625
626 return true;
627}
628
629/// This is used by foldLoadsRecursive() to capture a Root Load node which is
630/// of type or(load, load) and recursively build the wide load. Also capture the
631/// shift amount, zero extend type and loadSize.
641
642// Identify and Merge consecutive loads recursively which is of the form
643// (ZExt(L1) << shift1) | (ZExt(L2) << shift2) -> ZExt(L3) << shift1
644// (ZExt(L1) << shift1) | ZExt(L2) -> ZExt(L3)
645static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
646 AliasAnalysis &AA) {
647 uint64_t ShAmt2;
648 Value *X;
649 Instruction *L1, *L2;
650
651 // Go to the last node with loads.
652 if (match(V,
655 ShAmt2)))))) {
656 if (!foldLoadsRecursive(X, LOps, DL, AA) && LOps.FoundRoot)
657 // Avoid Partial chain merge.
658 return false;
659 } else
660 return false;
661
662 // Check if the pattern has loads
663 LoadInst *LI1 = LOps.Root;
664 uint64_t ShAmt1 = LOps.Shift;
665 if (LOps.FoundRoot == false &&
667 m_ShlOrSelf(m_OneUse(m_ZExt(m_Instruction(L1))), ShAmt1)))) {
668 LI1 = dyn_cast<LoadInst>(L1);
669 }
670 LoadInst *LI2 = dyn_cast<LoadInst>(L2);
671
672 // Check if loads are same, atomic, volatile and having same address space.
673 if (LI1 == LI2 || !LI1 || !LI2 || !LI1->isSimple() || !LI2->isSimple() ||
675 return false;
676
677 // Check if Loads come from same BB.
678 if (LI1->getParent() != LI2->getParent())
679 return false;
680
681 // Find the data layout
682 bool IsBigEndian = DL.isBigEndian();
683
684 // Check if loads are consecutive and same size.
685 Value *Load1Ptr = LI1->getPointerOperand();
686 APInt Offset1(DL.getIndexTypeSizeInBits(Load1Ptr->getType()), 0);
687 Load1Ptr =
688 Load1Ptr->stripAndAccumulateConstantOffsets(DL, Offset1,
689 /* AllowNonInbounds */ true);
690
691 Value *Load2Ptr = LI2->getPointerOperand();
692 APInt Offset2(DL.getIndexTypeSizeInBits(Load2Ptr->getType()), 0);
693 Load2Ptr =
694 Load2Ptr->stripAndAccumulateConstantOffsets(DL, Offset2,
695 /* AllowNonInbounds */ true);
696
697 // Verify if both loads have same base pointers
698 uint64_t LoadSize1 = LI1->getType()->getPrimitiveSizeInBits();
699 uint64_t LoadSize2 = LI2->getType()->getPrimitiveSizeInBits();
700 if (Load1Ptr != Load2Ptr)
701 return false;
702
703 // Make sure that there are no padding bits.
704 if (!DL.typeSizeEqualsStoreSize(LI1->getType()) ||
705 !DL.typeSizeEqualsStoreSize(LI2->getType()))
706 return false;
707
708 // Alias Analysis to check for stores b/w the loads.
709 LoadInst *Start = LOps.FoundRoot ? LOps.RootInsert : LI1, *End = LI2;
711 if (!Start->comesBefore(End)) {
712 std::swap(Start, End);
714 if (LOps.FoundRoot)
715 Loc = Loc.getWithNewSize(LOps.LoadSize);
716 } else
718 unsigned NumScanned = 0;
719 for (Instruction &Inst :
720 make_range(Start->getIterator(), End->getIterator())) {
721 if (Inst.mayWriteToMemory() && isModSet(AA.getModRefInfo(&Inst, Loc)))
722 return false;
723
724 if (++NumScanned > MaxInstrsToScan)
725 return false;
726 }
727
728 // Make sure Load with lower Offset is at LI1
729 bool Reverse = false;
730 if (Offset2.slt(Offset1)) {
731 std::swap(LI1, LI2);
732 std::swap(ShAmt1, ShAmt2);
733 std::swap(Offset1, Offset2);
734 std::swap(Load1Ptr, Load2Ptr);
735 std::swap(LoadSize1, LoadSize2);
736 Reverse = true;
737 }
738
739 // Big endian swap the shifts
740 if (IsBigEndian)
741 std::swap(ShAmt1, ShAmt2);
742
743 // First load is always LI1. This is where we put the new load.
744 // Use the merged load size available from LI1 for forward loads.
745 if (LOps.FoundRoot) {
746 if (!Reverse)
747 LoadSize1 = LOps.LoadSize;
748 else
749 LoadSize2 = LOps.LoadSize;
750 }
751
752 // Verify if shift amount and load index aligns and verifies that loads
753 // are consecutive.
754 uint64_t ShiftDiff = IsBigEndian ? LoadSize2 : LoadSize1;
755 uint64_t PrevSize =
756 DL.getTypeStoreSize(IntegerType::get(LI1->getContext(), LoadSize1));
757 if ((ShAmt2 - ShAmt1) != ShiftDiff || (Offset2 - Offset1) != PrevSize)
758 return false;
759
760 // Update LOps
761 AAMDNodes AATags1 = LOps.AATags;
762 AAMDNodes AATags2 = LI2->getAAMetadata();
763 if (LOps.FoundRoot == false) {
764 LOps.FoundRoot = true;
765 AATags1 = LI1->getAAMetadata();
766 }
767 LOps.LoadSize = LoadSize1 + LoadSize2;
768 LOps.RootInsert = Start;
769
770 // Concatenate the AATags of the Merged Loads.
771 LOps.AATags = AATags1.concat(AATags2);
772
773 LOps.Root = LI1;
774 LOps.Shift = ShAmt1;
775 LOps.ZextType = X->getType();
776 return true;
777}
778
779// For a given BB instruction, evaluate all loads in the chain that form a
780// pattern which suggests that the loads can be combined. The one and only use
781// of the loads is to form a wider load.
784 const DominatorTree &DT) {
785 // Only consider load chains of scalar values.
786 if (isa<VectorType>(I.getType()))
787 return false;
788
789 LoadOps LOps;
790 if (!foldLoadsRecursive(&I, LOps, DL, AA) || !LOps.FoundRoot)
791 return false;
792
793 IRBuilder<> Builder(&I);
794 LoadInst *NewLoad = nullptr, *LI1 = LOps.Root;
795
796 IntegerType *WiderType = IntegerType::get(I.getContext(), LOps.LoadSize);
797 // TTI based checks if we want to proceed with wider load
798 bool Allowed = TTI.isTypeLegal(WiderType);
799 if (!Allowed)
800 return false;
801
802 unsigned AS = LI1->getPointerAddressSpace();
803 unsigned Fast = 0;
804 Allowed = TTI.allowsMisalignedMemoryAccesses(I.getContext(), LOps.LoadSize,
805 AS, LI1->getAlign(), &Fast);
806 if (!Allowed || !Fast)
807 return false;
808
809 // Get the Index and Ptr for the new GEP.
810 Value *Load1Ptr = LI1->getPointerOperand();
811 Builder.SetInsertPoint(LOps.RootInsert);
812 if (!DT.dominates(Load1Ptr, LOps.RootInsert)) {
813 APInt Offset1(DL.getIndexTypeSizeInBits(Load1Ptr->getType()), 0);
814 Load1Ptr = Load1Ptr->stripAndAccumulateConstantOffsets(
815 DL, Offset1, /* AllowNonInbounds */ true);
816 Load1Ptr = Builder.CreatePtrAdd(Load1Ptr, Builder.getInt(Offset1));
817 }
818 // Generate wider load.
819 NewLoad = Builder.CreateAlignedLoad(WiderType, Load1Ptr, LI1->getAlign(),
820 LI1->isVolatile(), "");
821 NewLoad->takeName(LI1);
822 // Set the New Load AATags Metadata.
823 if (LOps.AATags)
824 NewLoad->setAAMetadata(LOps.AATags);
825
826 Value *NewOp = NewLoad;
827 // Check if zero extend needed.
828 if (LOps.ZextType)
829 NewOp = Builder.CreateZExt(NewOp, LOps.ZextType);
830
831 // Check if shift needed. We need to shift with the amount of load1
832 // shift if not zero.
833 if (LOps.Shift)
834 NewOp = Builder.CreateShl(NewOp, LOps.Shift);
835 I.replaceAllUsesWith(NewOp);
836
837 return true;
838}
839
840/// ValWidth bits starting at ValOffset of Val stored at PtrBase+PtrOffset.
841struct PartStore {
848
849 bool isCompatibleWith(const PartStore &Other) const {
850 return PtrBase == Other.PtrBase && Val == Other.Val;
851 }
852
853 bool operator<(const PartStore &Other) const {
854 return PtrOffset.slt(Other.PtrOffset);
855 }
856};
857
858static std::optional<PartStore> matchPartStore(Instruction &I,
859 const DataLayout &DL) {
860 auto *Store = dyn_cast<StoreInst>(&I);
861 if (!Store || !Store->isSimple())
862 return std::nullopt;
863
864 Value *StoredVal = Store->getValueOperand();
865 Type *StoredTy = StoredVal->getType();
866 if (!StoredTy->isIntegerTy() || !DL.typeSizeEqualsStoreSize(StoredTy))
867 return std::nullopt;
868
869 uint64_t ValWidth = StoredTy->getPrimitiveSizeInBits();
870 uint64_t ValOffset;
871 Value *Val;
872 if (!match(StoredVal, m_Trunc(m_LShrOrSelf(m_Value(Val), ValOffset))))
873 return std::nullopt;
874
875 Value *Ptr = Store->getPointerOperand();
876 APInt PtrOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
877 Value *PtrBase = Ptr->stripAndAccumulateConstantOffsets(
878 DL, PtrOffset, /*AllowNonInbounds=*/true);
879 return {{PtrBase, PtrOffset, Val, ValOffset, ValWidth, Store}};
880}
881
883 unsigned Width, const DataLayout &DL,
885 if (Parts.size() < 2)
886 return false;
887
888 // Check whether combining the stores is profitable.
889 // FIXME: We could generate smaller stores if we can't produce a large one.
890 const PartStore &First = Parts.front();
891 LLVMContext &Ctx = First.Store->getContext();
892 Type *NewTy = Type::getIntNTy(Ctx, Width);
893 unsigned Fast = 0;
894 if (!TTI.isTypeLegal(NewTy) ||
895 !TTI.allowsMisalignedMemoryAccesses(Ctx, Width,
896 First.Store->getPointerAddressSpace(),
897 First.Store->getAlign(), &Fast) ||
898 !Fast)
899 return false;
900
901 // Generate the combined store.
902 IRBuilder<> Builder(First.Store);
903 Value *Val = First.Val;
904 if (First.ValOffset != 0)
905 Val = Builder.CreateLShr(Val, First.ValOffset);
906 Val = Builder.CreateTrunc(Val, NewTy);
907 StoreInst *Store = Builder.CreateAlignedStore(
908 Val, First.Store->getPointerOperand(), First.Store->getAlign());
909
910 AAMDNodes AATags = First.Store->getAAMetadata();
911 for (const PartStore &Part : drop_begin(Parts))
912 AATags = AATags.concat(Part.Store->getAAMetadata());
913 Store->setAAMetadata(AATags);
914
915 // Remove the old stores.
916 for (const PartStore &Part : Parts)
917 Part.Store->eraseFromParent();
918
919 return true;
920}
921
924 if (Parts.size() < 2)
925 return false;
926
927 // We now have multiple parts of the same value stored to the same pointer.
928 // Sort the parts by pointer offset, and make sure they are consistent with
929 // the value offsets. Also check that the value is fully covered without
930 // overlaps.
931 bool Changed = false;
932 llvm::sort(Parts);
933 int64_t LastEndOffsetFromFirst = 0;
934 const PartStore *First = &Parts[0];
935 for (const PartStore &Part : Parts) {
936 APInt PtrOffsetFromFirst = Part.PtrOffset - First->PtrOffset;
937 int64_t ValOffsetFromFirst = Part.ValOffset - First->ValOffset;
938 if (PtrOffsetFromFirst * 8 != ValOffsetFromFirst ||
939 LastEndOffsetFromFirst != ValOffsetFromFirst) {
941 LastEndOffsetFromFirst, DL, TTI);
942 First = &Part;
943 LastEndOffsetFromFirst = Part.ValWidth;
944 continue;
945 }
946
947 LastEndOffsetFromFirst = ValOffsetFromFirst + Part.ValWidth;
948 }
949
951 LastEndOffsetFromFirst, DL, TTI);
952 return Changed;
953}
954
957 // FIXME: Add big endian support.
958 if (DL.isBigEndian())
959 return false;
960
961 BatchAAResults BatchAA(AA);
963 bool MadeChange = false;
964 for (Instruction &I : make_early_inc_range(BB)) {
965 if (std::optional<PartStore> Part = matchPartStore(I, DL)) {
966 if (Parts.empty() || Part->isCompatibleWith(Parts[0])) {
967 Parts.push_back(std::move(*Part));
968 continue;
969 }
970
971 MadeChange |= mergePartStores(Parts, DL, TTI);
972 Parts.clear();
973 Parts.push_back(std::move(*Part));
974 continue;
975 }
976
977 if (Parts.empty())
978 continue;
979
980 if (I.mayThrow() ||
981 (I.mayReadOrWriteMemory() &&
983 &I, MemoryLocation::getBeforeOrAfter(Parts[0].PtrBase))))) {
984 MadeChange |= mergePartStores(Parts, DL, TTI);
985 Parts.clear();
986 continue;
987 }
988 }
989
990 MadeChange |= mergePartStores(Parts, DL, TTI);
991 return MadeChange;
992}
993
994/// Combine away instructions providing they are still equivalent when compared
995/// against 0. i.e do they have any bits set.
997 auto *I = dyn_cast<Instruction>(V);
998 if (!I || I->getOpcode() != Instruction::Or || !I->hasOneUse())
999 return nullptr;
1000
1001 Value *A;
1002
1003 // Look deeper into the chain of or's, combining away shl (so long as they are
1004 // nuw or nsw).
1005 Value *Op0 = I->getOperand(0);
1006 if (match(Op0, m_CombineOr(m_NSWShl(m_Value(A), m_Value()),
1007 m_NUWShl(m_Value(A), m_Value()))))
1008 Op0 = A;
1009 else if (auto *NOp = optimizeShiftInOrChain(Op0, Builder))
1010 Op0 = NOp;
1011
1012 Value *Op1 = I->getOperand(1);
1013 if (match(Op1, m_CombineOr(m_NSWShl(m_Value(A), m_Value()),
1014 m_NUWShl(m_Value(A), m_Value()))))
1015 Op1 = A;
1016 else if (auto *NOp = optimizeShiftInOrChain(Op1, Builder))
1017 Op1 = NOp;
1018
1019 if (Op0 != I->getOperand(0) || Op1 != I->getOperand(1))
1020 return Builder.CreateOr(Op0, Op1);
1021 return nullptr;
1022}
1023
1026 const DominatorTree &DT) {
1027 CmpPredicate Pred;
1028 Value *Op0;
1029 if (!match(&I, m_ICmp(Pred, m_Value(Op0), m_Zero())) ||
1030 !ICmpInst::isEquality(Pred))
1031 return false;
1032
1033 // If the chain or or's matches a load, combine to that before attempting to
1034 // remove shifts.
1035 if (auto OpI = dyn_cast<Instruction>(Op0))
1036 if (OpI->getOpcode() == Instruction::Or)
1037 if (foldConsecutiveLoads(*OpI, DL, TTI, AA, DT))
1038 return true;
1039
1040 IRBuilder<> Builder(&I);
1041 // icmp eq/ne or(shl(a), b), 0 -> icmp eq/ne or(a, b), 0
1042 if (auto *Res = optimizeShiftInOrChain(Op0, Builder)) {
1043 I.replaceAllUsesWith(Builder.CreateICmp(Pred, Res, I.getOperand(1)));
1044 return true;
1045 }
1046
1047 return false;
1048}
1049
1050// Calculate GEP Stride and accumulated const ModOffset. Return Stride and
1051// ModOffset
1052static std::pair<APInt, APInt>
1054 unsigned BW = DL.getIndexTypeSizeInBits(PtrOp->getType());
1055 std::optional<APInt> Stride;
1056 APInt ModOffset(BW, 0);
1057 // Return a minimum gep stride, greatest common divisor of consective gep
1058 // index scales(c.f. Bézout's identity).
1059 while (auto *GEP = dyn_cast<GEPOperator>(PtrOp)) {
1061 if (!GEP->collectOffset(DL, BW, VarOffsets, ModOffset))
1062 break;
1063
1064 for (auto [V, Scale] : VarOffsets) {
1065 // Only keep a power of two factor for non-inbounds
1066 if (!GEP->hasNoUnsignedSignedWrap())
1067 Scale = APInt::getOneBitSet(Scale.getBitWidth(), Scale.countr_zero());
1068
1069 if (!Stride)
1070 Stride = Scale;
1071 else
1072 Stride = APIntOps::GreatestCommonDivisor(*Stride, Scale);
1073 }
1074
1075 PtrOp = GEP->getPointerOperand();
1076 }
1077
1078 // Check whether pointer arrives back at Global Variable via at least one GEP.
1079 // Even if it doesn't, we can check by alignment.
1080 if (!isa<GlobalVariable>(PtrOp) || !Stride)
1081 return {APInt(BW, 1), APInt(BW, 0)};
1082
1083 // In consideration of signed GEP indices, non-negligible offset become
1084 // remainder of division by minimum GEP stride.
1085 ModOffset = ModOffset.srem(*Stride);
1086 if (ModOffset.isNegative())
1087 ModOffset += *Stride;
1088
1089 return {*Stride, ModOffset};
1090}
1091
1092/// If C is a constant patterned array and all valid loaded results for given
1093/// alignment are same to a constant, return that constant.
1095 auto *LI = dyn_cast<LoadInst>(&I);
1096 if (!LI || LI->isVolatile())
1097 return false;
1098
1099 // We can only fold the load if it is from a constant global with definitive
1100 // initializer. Skip expensive logic if this is not the case.
1101 auto *PtrOp = LI->getPointerOperand();
1103 if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
1104 return false;
1105
1106 // Bail for large initializers in excess of 4K to avoid too many scans.
1107 Constant *C = GV->getInitializer();
1108 uint64_t GVSize = DL.getTypeAllocSize(C->getType());
1109 if (!GVSize || 4096 < GVSize)
1110 return false;
1111
1112 Type *LoadTy = LI->getType();
1113 unsigned BW = DL.getIndexTypeSizeInBits(PtrOp->getType());
1114 auto [Stride, ConstOffset] = getStrideAndModOffsetOfGEP(PtrOp, DL);
1115
1116 // Any possible offset could be multiple of GEP stride. And any valid
1117 // offset is multiple of load alignment, so checking only multiples of bigger
1118 // one is sufficient to say results' equality.
1119 if (auto LA = LI->getAlign();
1120 LA <= GV->getAlign().valueOrOne() && Stride.getZExtValue() < LA.value()) {
1121 ConstOffset = APInt(BW, 0);
1122 Stride = APInt(BW, LA.value());
1123 }
1124
1125 Constant *Ca = ConstantFoldLoadFromConst(C, LoadTy, ConstOffset, DL);
1126 if (!Ca)
1127 return false;
1128
1129 unsigned E = GVSize - DL.getTypeStoreSize(LoadTy);
1130 for (; ConstOffset.getZExtValue() <= E; ConstOffset += Stride)
1131 if (Ca != ConstantFoldLoadFromConst(C, LoadTy, ConstOffset, DL))
1132 return false;
1133
1134 I.replaceAllUsesWith(Ca);
1135
1136 return true;
1137}
1138
1139namespace {
1140class StrNCmpInliner {
1141public:
1142 StrNCmpInliner(CallInst *CI, LibFunc Func, DomTreeUpdater *DTU,
1143 const DataLayout &DL)
1144 : CI(CI), Func(Func), DTU(DTU), DL(DL) {}
1145
1146 bool optimizeStrNCmp();
1147
1148private:
1149 void inlineCompare(Value *LHS, StringRef RHS, uint64_t N, bool Swapped);
1150
1151 CallInst *CI;
1152 LibFunc Func;
1153 DomTreeUpdater *DTU;
1154 const DataLayout &DL;
1155};
1156
1157} // namespace
1158
1159/// First we normalize calls to strncmp/strcmp to the form of
1160/// compare(s1, s2, N), which means comparing first N bytes of s1 and s2
1161/// (without considering '\0').
1162///
1163/// Examples:
1164///
1165/// \code
1166/// strncmp(s, "a", 3) -> compare(s, "a", 2)
1167/// strncmp(s, "abc", 3) -> compare(s, "abc", 3)
1168/// strncmp(s, "a\0b", 3) -> compare(s, "a\0b", 2)
1169/// strcmp(s, "a") -> compare(s, "a", 2)
1170///
1171/// char s2[] = {'a'}
1172/// strncmp(s, s2, 3) -> compare(s, s2, 3)
1173///
1174/// char s2[] = {'a', 'b', 'c', 'd'}
1175/// strncmp(s, s2, 3) -> compare(s, s2, 3)
1176/// \endcode
1177///
1178/// We only handle cases where N and exactly one of s1 and s2 are constant.
1179/// Cases that s1 and s2 are both constant are already handled by the
1180/// instcombine pass.
1181///
1182/// We do not handle cases where N > StrNCmpInlineThreshold.
1183///
1184/// We also do not handles cases where N < 2, which are already
1185/// handled by the instcombine pass.
1186///
1187bool StrNCmpInliner::optimizeStrNCmp() {
1188 if (StrNCmpInlineThreshold < 2)
1189 return false;
1190
1192 return false;
1193
1194 Value *Str1P = CI->getArgOperand(0);
1195 Value *Str2P = CI->getArgOperand(1);
1196 // Should be handled elsewhere.
1197 if (Str1P == Str2P)
1198 return false;
1199
1200 StringRef Str1, Str2;
1201 bool HasStr1 = getConstantStringInfo(Str1P, Str1, /*TrimAtNul=*/false);
1202 bool HasStr2 = getConstantStringInfo(Str2P, Str2, /*TrimAtNul=*/false);
1203 if (HasStr1 == HasStr2)
1204 return false;
1205
1206 // Note that '\0' and characters after it are not trimmed.
1207 StringRef Str = HasStr1 ? Str1 : Str2;
1208 Value *StrP = HasStr1 ? Str2P : Str1P;
1209
1210 size_t Idx = Str.find('\0');
1211 uint64_t N = Idx == StringRef::npos ? UINT64_MAX : Idx + 1;
1212 if (Func == LibFunc_strncmp) {
1213 if (auto *ConstInt = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
1214 N = std::min(N, ConstInt->getZExtValue());
1215 else
1216 return false;
1217 }
1218 // Now N means how many bytes we need to compare at most.
1219 if (N > Str.size() || N < 2 || N > StrNCmpInlineThreshold)
1220 return false;
1221
1222 // Cases where StrP has two or more dereferenceable bytes might be better
1223 // optimized elsewhere.
1224 bool CanBeNull = false, CanBeFreed = false;
1225 if (StrP->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed) > 1)
1226 return false;
1227 inlineCompare(StrP, Str, N, HasStr1);
1228 return true;
1229}
1230
1231/// Convert
1232///
1233/// \code
1234/// ret = compare(s1, s2, N)
1235/// \endcode
1236///
1237/// into
1238///
1239/// \code
1240/// ret = (int)s1[0] - (int)s2[0]
1241/// if (ret != 0)
1242/// goto NE
1243/// ...
1244/// ret = (int)s1[N-2] - (int)s2[N-2]
1245/// if (ret != 0)
1246/// goto NE
1247/// ret = (int)s1[N-1] - (int)s2[N-1]
1248/// NE:
1249/// \endcode
1250///
1251/// CFG before and after the transformation:
1252///
1253/// (before)
1254/// BBCI
1255///
1256/// (after)
1257/// BBCI -> BBSubs[0] (sub,icmp) --NE-> BBNE -> BBTail
1258/// | ^
1259/// E |
1260/// | |
1261/// BBSubs[1] (sub,icmp) --NE-----+
1262/// ... |
1263/// BBSubs[N-1] (sub) ---------+
1264///
1265void StrNCmpInliner::inlineCompare(Value *LHS, StringRef RHS, uint64_t N,
1266 bool Swapped) {
1267 auto &Ctx = CI->getContext();
1268 IRBuilder<> B(Ctx);
1269 // We want these instructions to be recognized as inlined instructions for the
1270 // compare call, but we don't have a source location for the definition of
1271 // that function, since we're generating that code now. Because the generated
1272 // code is a viable point for a memory access error, we make the pragmatic
1273 // choice here to directly use CI's location so that we have useful
1274 // attribution for the generated code.
1275 B.SetCurrentDebugLocation(CI->getDebugLoc());
1276
1277 BasicBlock *BBCI = CI->getParent();
1278 BasicBlock *BBTail =
1279 SplitBlock(BBCI, CI, DTU, nullptr, nullptr, BBCI->getName() + ".tail");
1280
1282 for (uint64_t I = 0; I < N; ++I)
1283 BBSubs.push_back(
1284 BasicBlock::Create(Ctx, "sub_" + Twine(I), BBCI->getParent(), BBTail));
1285 BasicBlock *BBNE = BasicBlock::Create(Ctx, "ne", BBCI->getParent(), BBTail);
1286
1287 cast<BranchInst>(BBCI->getTerminator())->setSuccessor(0, BBSubs[0]);
1288
1289 B.SetInsertPoint(BBNE);
1290 PHINode *Phi = B.CreatePHI(CI->getType(), N);
1291 B.CreateBr(BBTail);
1292
1293 Value *Base = LHS;
1294 for (uint64_t i = 0; i < N; ++i) {
1295 B.SetInsertPoint(BBSubs[i]);
1296 Value *VL =
1297 B.CreateZExt(B.CreateLoad(B.getInt8Ty(),
1298 B.CreateInBoundsPtrAdd(Base, B.getInt64(i))),
1299 CI->getType());
1300 Value *VR =
1301 ConstantInt::get(CI->getType(), static_cast<unsigned char>(RHS[i]));
1302 Value *Sub = Swapped ? B.CreateSub(VR, VL) : B.CreateSub(VL, VR);
1303 if (i < N - 1) {
1304 BranchInst *CondBrInst = B.CreateCondBr(
1305 B.CreateICmpNE(Sub, ConstantInt::get(CI->getType(), 0)), BBNE,
1306 BBSubs[i + 1]);
1307
1308 Function *F = CI->getFunction();
1309 assert(F && "Instruction does not belong to a function!");
1310 std::optional<Function::ProfileCount> EC = F->getEntryCount();
1311 if (EC && EC->getCount() > 0)
1313 } else {
1314 B.CreateBr(BBNE);
1315 }
1316
1317 Phi->addIncoming(Sub, BBSubs[i]);
1318 }
1319
1320 CI->replaceAllUsesWith(Phi);
1321 CI->eraseFromParent();
1322
1323 if (DTU) {
1325 Updates.push_back({DominatorTree::Insert, BBCI, BBSubs[0]});
1326 for (uint64_t i = 0; i < N; ++i) {
1327 if (i < N - 1)
1328 Updates.push_back({DominatorTree::Insert, BBSubs[i], BBSubs[i + 1]});
1329 Updates.push_back({DominatorTree::Insert, BBSubs[i], BBNE});
1330 }
1331 Updates.push_back({DominatorTree::Insert, BBNE, BBTail});
1332 Updates.push_back({DominatorTree::Delete, BBCI, BBTail});
1333 DTU->applyUpdates(Updates);
1334 }
1335}
1336
1337/// Convert memchr with a small constant string into a switch
1339 const DataLayout &DL) {
1340 if (isa<Constant>(Call->getArgOperand(1)))
1341 return false;
1342
1343 StringRef Str;
1344 Value *Base = Call->getArgOperand(0);
1345 if (!getConstantStringInfo(Base, Str, /*TrimAtNul=*/false))
1346 return false;
1347
1348 uint64_t N = Str.size();
1349 if (auto *ConstInt = dyn_cast<ConstantInt>(Call->getArgOperand(2))) {
1350 uint64_t Val = ConstInt->getZExtValue();
1351 // Ignore the case that n is larger than the size of string.
1352 if (Val > N)
1353 return false;
1354 N = Val;
1355 } else
1356 return false;
1357
1359 return false;
1360
1361 BasicBlock *BB = Call->getParent();
1362 BasicBlock *BBNext = SplitBlock(BB, Call, DTU);
1363 IRBuilder<> IRB(BB);
1364 IRB.SetCurrentDebugLocation(Call->getDebugLoc());
1365 IntegerType *ByteTy = IRB.getInt8Ty();
1367 SwitchInst *SI = IRB.CreateSwitch(
1368 IRB.CreateTrunc(Call->getArgOperand(1), ByteTy), BBNext, N);
1369 // We can't know the precise weights here, as they would depend on the value
1370 // distribution of Call->getArgOperand(1). So we just mark it as "unknown".
1372 DEBUG_TYPE);
1373 Type *IndexTy = DL.getIndexType(Call->getType());
1375
1376 BasicBlock *BBSuccess = BasicBlock::Create(
1377 Call->getContext(), "memchr.success", BB->getParent(), BBNext);
1378 IRB.SetInsertPoint(BBSuccess);
1379 PHINode *IndexPHI = IRB.CreatePHI(IndexTy, N, "memchr.idx");
1380 Value *FirstOccursLocation = IRB.CreateInBoundsPtrAdd(Base, IndexPHI);
1381 IRB.CreateBr(BBNext);
1382 if (DTU)
1383 Updates.push_back({DominatorTree::Insert, BBSuccess, BBNext});
1384
1386 for (uint64_t I = 0; I < N; ++I) {
1387 ConstantInt *CaseVal = ConstantInt::get(ByteTy, Str[I]);
1388 if (!Cases.insert(CaseVal).second)
1389 continue;
1390
1391 BasicBlock *BBCase = BasicBlock::Create(Call->getContext(), "memchr.case",
1392 BB->getParent(), BBSuccess);
1393 SI->addCase(CaseVal, BBCase);
1394 IRB.SetInsertPoint(BBCase);
1395 IndexPHI->addIncoming(ConstantInt::get(IndexTy, I), BBCase);
1396 IRB.CreateBr(BBSuccess);
1397 if (DTU) {
1398 Updates.push_back({DominatorTree::Insert, BB, BBCase});
1399 Updates.push_back({DominatorTree::Insert, BBCase, BBSuccess});
1400 }
1401 }
1402
1403 PHINode *PHI =
1404 PHINode::Create(Call->getType(), 2, Call->getName(), BBNext->begin());
1405 PHI->addIncoming(Constant::getNullValue(Call->getType()), BB);
1406 PHI->addIncoming(FirstOccursLocation, BBSuccess);
1407
1408 Call->replaceAllUsesWith(PHI);
1409 Call->eraseFromParent();
1410
1411 if (DTU)
1412 DTU->applyUpdates(Updates);
1413
1414 return true;
1415}
1416
1419 DominatorTree &DT, const DataLayout &DL,
1420 bool &MadeCFGChange) {
1421
1422 auto *CI = dyn_cast<CallInst>(&I);
1423 if (!CI || CI->isNoBuiltin())
1424 return false;
1425
1426 Function *CalledFunc = CI->getCalledFunction();
1427 if (!CalledFunc)
1428 return false;
1429
1430 LibFunc LF;
1431 if (!TLI.getLibFunc(*CalledFunc, LF) ||
1432 !isLibFuncEmittable(CI->getModule(), &TLI, LF))
1433 return false;
1434
1435 DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Lazy);
1436
1437 switch (LF) {
1438 case LibFunc_sqrt:
1439 case LibFunc_sqrtf:
1440 case LibFunc_sqrtl:
1441 return foldSqrt(CI, LF, TTI, TLI, AC, DT);
1442 case LibFunc_strcmp:
1443 case LibFunc_strncmp:
1444 if (StrNCmpInliner(CI, LF, &DTU, DL).optimizeStrNCmp()) {
1445 MadeCFGChange = true;
1446 return true;
1447 }
1448 break;
1449 case LibFunc_memchr:
1450 if (foldMemChr(CI, &DTU, DL)) {
1451 MadeCFGChange = true;
1452 return true;
1453 }
1454 break;
1455 default:;
1456 }
1457 return false;
1458}
1459
1460/// This is the entry point for folds that could be implemented in regular
1461/// InstCombine, but they are separated because they are not expected to
1462/// occur frequently and/or have more than a constant-length pattern match.
1466 AssumptionCache &AC, bool &MadeCFGChange) {
1467 bool MadeChange = false;
1468 for (BasicBlock &BB : F) {
1469 // Ignore unreachable basic blocks.
1470 if (!DT.isReachableFromEntry(&BB))
1471 continue;
1472
1473 const DataLayout &DL = F.getDataLayout();
1474
1475 // Walk the block backwards for efficiency. We're matching a chain of
1476 // use->defs, so we're more likely to succeed by starting from the bottom.
1477 // Also, we want to avoid matching partial patterns.
1478 // TODO: It would be more efficient if we removed dead instructions
1479 // iteratively in this loop rather than waiting until the end.
1481 MadeChange |= foldAnyOrAllBitsSet(I);
1482 MadeChange |= foldGuardedFunnelShift(I, DT);
1483 MadeChange |= tryToRecognizePopCount(I);
1484 MadeChange |= tryToFPToSat(I, TTI);
1485 MadeChange |= tryToRecognizeTableBasedCttz(I, DL);
1486 MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA, DT);
1487 MadeChange |= foldPatternedLoads(I, DL);
1488 MadeChange |= foldICmpOrChain(I, DL, TTI, AA, DT);
1489 // NOTE: This function introduces erasing of the instruction `I`, so it
1490 // needs to be called at the end of this sequence, otherwise we may make
1491 // bugs.
1492 MadeChange |= foldLibCalls(I, TTI, TLI, AC, DT, DL, MadeCFGChange);
1493 }
1494
1495 // Do this separately to avoid redundantly scanning stores multiple times.
1496 MadeChange |= foldConsecutiveStores(BB, DL, TTI, AA);
1497 }
1498
1499 // We're done with transforms, so remove dead instructions.
1500 if (MadeChange)
1501 for (BasicBlock &BB : F)
1503
1504 return MadeChange;
1505}
1506
1507/// This is the entry point for all transforms. Pass manager differences are
1508/// handled in the callers of this function.
1511 AliasAnalysis &AA, bool &MadeCFGChange) {
1512 bool MadeChange = false;
1513 const DataLayout &DL = F.getDataLayout();
1514 TruncInstCombine TIC(AC, TLI, DL, DT);
1515 MadeChange |= TIC.run(F);
1516 MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA, AC, MadeCFGChange);
1517 return MadeChange;
1518}
1519
1522 auto &AC = AM.getResult<AssumptionAnalysis>(F);
1523 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1524 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1525 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1526 auto &AA = AM.getResult<AAManager>(F);
1527 bool MadeCFGChange = false;
1528 if (!runImpl(F, AC, TTI, TLI, DT, AA, MadeCFGChange)) {
1529 // No changes, all analyses are preserved.
1530 return PreservedAnalyses::all();
1531 }
1532 // Mark all the analyses that instcombine updates as preserved.
1534 if (MadeCFGChange)
1536 else
1538 return PA;
1539}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool tryToRecognizePopCount(Instruction &I)
static bool foldSqrt(CallInst *Call, LibFunc Func, TargetTransformInfo &TTI, TargetLibraryInfo &TLI, AssumptionCache &AC, DominatorTree &DT)
Try to replace a mathlib call to sqrt with the LLVM intrinsic.
static bool foldAnyOrAllBitsSet(Instruction &I)
Match patterns that correspond to "any-bits-set" and "all-bits-set".
static cl::opt< unsigned > MemChrInlineThreshold("memchr-inline-threshold", cl::init(3), cl::Hidden, cl::desc("The maximum length of a constant string to " "inline a memchr call."))
static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI)
Fold smin(smax(fptosi(x), C1), C2) to llvm.fptosi.sat(x), providing C1 and C2 saturate the value of t...
static cl::opt< unsigned > StrNCmpInlineThreshold("strncmp-inline-threshold", cl::init(3), cl::Hidden, cl::desc("The maximum length of a constant string for a builtin string cmp " "call eligible for inlining. The default value is 3."))
static bool matchAndOrChain(Value *V, MaskOps &MOps)
This is a recursive helper for foldAnyOrAllBitsSet() that walks through a chain of 'and' or 'or' inst...
static bool foldMemChr(CallInst *Call, DomTreeUpdater *DTU, const DataLayout &DL)
Convert memchr with a small constant string into a switch.
static Value * optimizeShiftInOrChain(Value *V, IRBuilder<> &Builder)
Combine away instructions providing they are still equivalent when compared against 0.
static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL, TargetTransformInfo &TTI, AliasAnalysis &AA, const DominatorTree &DT)
static bool foldGuardedFunnelShift(Instruction &I, const DominatorTree &DT)
Match a pattern for a bitwise funnel/rotate operation that partially guards against undefined behavio...
static bool tryToRecognizeTableBasedCttz(Instruction &I, const DataLayout &DL)
static bool mergePartStores(SmallVectorImpl< PartStore > &Parts, const DataLayout &DL, TargetTransformInfo &TTI)
static bool mergeConsecutivePartStores(ArrayRef< PartStore > Parts, unsigned Width, const DataLayout &DL, TargetTransformInfo &TTI)
static cl::opt< unsigned > MaxInstrsToScan("aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden, cl::desc("Max number of instructions to scan for aggressive instcombine."))
static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL, AliasAnalysis &AA)
static bool foldICmpOrChain(Instruction &I, const DataLayout &DL, TargetTransformInfo &TTI, AliasAnalysis &AA, const DominatorTree &DT)
static bool isCTTZTable(Constant *Table, const APInt &Mul, const APInt &Shift, const APInt &AndMask, Type *AccessTy, unsigned InputBits, const APInt &GEPIdxFactor, const DataLayout &DL)
static std::optional< PartStore > matchPartStore(Instruction &I, const DataLayout &DL)
static bool foldConsecutiveStores(BasicBlock &BB, const DataLayout &DL, TargetTransformInfo &TTI, AliasAnalysis &AA)
static std::pair< APInt, APInt > getStrideAndModOffsetOfGEP(Value *PtrOp, const DataLayout &DL)
static bool foldPatternedLoads(Instruction &I, const DataLayout &DL)
If C is a constant patterned array and all valid loaded results for given alignment are same to a con...
static bool foldLibCalls(Instruction &I, TargetTransformInfo &TTI, TargetLibraryInfo &TLI, AssumptionCache &AC, DominatorTree &DT, const DataLayout &DL, bool &MadeCFGChange)
static bool foldUnusualPatterns(Function &F, DominatorTree &DT, TargetTransformInfo &TTI, TargetLibraryInfo &TLI, AliasAnalysis &AA, AssumptionCache &AC, bool &MadeCFGChange)
This is the entry point for folds that could be implemented in regular InstCombine,...
AggressiveInstCombiner - Combine expression patterns to form expressions with fewer,...
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool runImpl(Function &F, const TargetLowering &TLI, AssumptionCache *AC)
Definition ExpandFp.cpp:993
#define DEBUG_TYPE
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
static MaybeAlign getAlign(Value *Ptr)
static Instruction * matchFunnelShift(Instruction &Or, InstCombinerImpl &IC)
Match UB-safe variants of the funnel shift intrinsic.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
This file contains the declarations for profiling metadata utility functions.
static const MCExpr * MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
BinaryOperator * Mul
A manager for alias analyses.
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1330
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:329
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
LLVM_ABI APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition APInt.cpp:1736
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:873
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1130
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:239
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1221
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition ArrayRef.h:150
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
Value * getArgOperand(unsigned i) const
This class represents a function call, abstracting a target machine's calling convention.
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
Analysis pass which computes a DominatorTree.
Definition Dominators.h:284
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:165
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
bool isConstant() const
If the value is a global constant, its value is immutable throughout the runtime execution of the pro...
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition IRBuilder.h:247
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition IRBuilder.h:2497
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition IRBuilder.h:1220
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition IRBuilder.h:2071
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition IRBuilder.h:1191
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
Value * CreateInBoundsPtrAdd(Value *Ptr, Value *Offset, const Twine &Name="")
Definition IRBuilder.h:2044
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition IRBuilder.h:552
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2783
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:319
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Value * getPointerOperand()
bool isSimple() const
LLVM_ABI MDNode * createUnlikelyBranchWeights()
Return metadata containing two branch weights, with significant bias towards false destination.
Definition MDBuilder.cpp:48
size_type size() const
Definition MapVector.h:56
std::pair< KeyT, ValueT > & front()
Definition MapVector.h:79
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location before or after Ptr, while remaining within the underl...
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
static constexpr size_t npos
Definition StringRef.h:57
Multiway switch.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
@ TCK_RecipThroughput
Reciprocal throughput.
@ None
The cast is not used with a load/store of any kind.
bool run(Function &F)
Perform TruncInst pattern optimization on given function.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:231
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:301
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
LLVM_ABI const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr, bool LookThroughIntToPtr=false) const
Accumulate the constant offset this value has compared to a base pointer.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1099
LLVM_ABI uint64_t getPointerDereferenceableBytes(const DataLayout &DL, bool &CanBeNull, bool &CanBeFreed) const
Returns the number of bytes known to be dereferenceable for the pointer value.
Definition Value.cpp:881
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
const ParentTy * getParent() const
Definition ilist_node.h:34
CallInst * Call
Changed
#define UINT64_MAX
Definition DataTypes.h:77
Abstract Attribute helper functions.
Definition Attributor.h:165
LLVM_ABI APInt GreatestCommonDivisor(APInt A, APInt B)
Compute GCD of two unsigned APInt values.
Definition APInt.cpp:798
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
ShiftLike_match< LHS, Instruction::LShr > m_LShrOrSelf(const LHS &L, uint64_t &R)
Matches lshr L, ConstShAmt or L itself (R will be set to zero in this case).
match_combine_or< CastInst_match< OpTy, CastInst >, OpTy > m_CastOrSelf(const OpTy &Op)
Matches any cast or self. Used to ignore casts.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
ShiftLike_match< LHS, Instruction::Shl > m_ShlOrSelf(const LHS &L, uint64_t &R)
Matches shl L, ConstShAmt or L itself (R will be set to zero in this case).
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
specific_bbval m_SpecificBB(BasicBlock *BB)
Match a specific basic block value.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Shl, OverflowingBinaryOperator::NoSignedWrap > m_NSWShl(const LHS &L, const RHS &R)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Shl, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWShl(const LHS &L, const RHS &R)
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CastInst_match< OpTy, FPToSIInst > m_FPToSI(const OpTy &Op)
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
initializer< Ty > init(const Ty &Val)
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:644
LLVM_ABI bool isOnlyUsedInZeroComparison(const Instruction *CxtI)
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, Function &F, StringRef PassName)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
LLVM_ABI bool getConstantStringInfo(const Value *V, StringRef &Str, bool TrimAtNul=true)
This function computes the length of a null-terminated C string pointed to by V.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
LLVM_ABI bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
Definition Local.cpp:721
LLVM_ABI void setExplicitlyUnknownBranchWeights(Instruction &I, StringRef PassName)
Specify that the branch weights for this terminator cannot be known at compile time.
LLVM_ABI bool MaskedValueIsZero(const Value *V, const APInt &Mask, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if 'V & Mask' is known to be zero.
LLVM_ABI bool isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI, LibFunc TheLibFunc)
Check whether the library function is available on target and also that it in the current Module is a...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:754
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1622
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool isModOrRefSet(const ModRefInfo MRI)
Definition ModRef.h:43
LLVM_ABI Constant * ConstantFoldLoadFromConst(Constant *C, Type *Ty, const APInt &Offset, const DataLayout &DL)
Extract value of C at the given Offset reinterpreted as Ty.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
@ Other
Any other memory.
Definition ModRef.h:68
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:71
TargetTransformInfo TTI
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
@ Sub
Subtraction of integers.
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:560
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
LLVM_ABI bool cannotBeOrderedLessThanZero(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if we can prove that the specified FP value is either NaN or never less than -0....
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
LoadInst * RootInsert
ValWidth bits starting at ValOffset of Val stored at PtrBase+PtrOffset.
bool operator<(const PartStore &Other) const
bool isCompatibleWith(const PartStore &Other) const
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition Metadata.h:761
LLVM_ABI AAMDNodes concat(const AAMDNodes &Other) const
Determine the best AAMDNodes after concatenating two different locations together.
Matching combinators.
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:257