Go to the documentation of this file.
34 using namespace PatternMatch;
36 #define DEBUG_TYPE "aggressive-instcombine"
38 STATISTIC(NumAnyOrAllBitsSet,
"Number of any/all-bits-set patterns folded");
40 "Number of guarded rotates transformed into funnel shifts");
42 "Number of guarded funnel shifts transformed into funnel shifts");
43 STATISTIC(NumPopCountRecognized,
"Number of popcount idioms recognized");
47 cl::desc(
"Max number of instructions to scan for aggressive instcombine."));
77 return Intrinsic::fshl;
87 return Intrinsic::fshr;
99 unsigned FunnelOp = 0, GuardOp = 1;
101 Value *ShVal0, *ShVal1, *ShAmt;
104 (IID == Intrinsic::fshl && ShVal0 != P1) ||
105 (IID == Intrinsic::fshr && ShVal1 != P1)) {
108 (IID == Intrinsic::fshl && ShVal0 != P0) ||
109 (IID == Intrinsic::fshr && ShVal1 != P0))
111 assert((IID == Intrinsic::fshl || IID == Intrinsic::fshr) &&
112 "Pattern must match funnel shift left or right");
139 if (ShVal0 == ShVal1)
142 ++NumGuardedFunnelShifts;
146 bool IsFshl = IID == Intrinsic::fshl;
147 if (ShVal0 != ShVal1) {
149 ShVal1 =
Builder.CreateFreeze(ShVal1);
151 ShVal0 =
Builder.CreateFreeze(ShVal0);
181 bool FoundAnd1 =
false;
215 const APInt *BitIndex =
nullptr;
221 MOps.
Root = Candidate;
229 return MOps.
Root == Candidate;
243 bool MatchAllBitsSet;
245 MatchAllBitsSet =
true;
247 MatchAllBitsSet =
false;
251 MaskOps MOps(
I.getType()->getScalarSizeInBits(), MatchAllBitsSet);
252 if (MatchAllBitsSet) {
268 I.replaceAllUsesWith(Zext);
269 ++NumAnyOrAllBitsSet;
285 if (
I.getOpcode() != Instruction::LShr)
288 Type *Ty =
I.getType();
294 if (!(Len <= 128 && Len > 8 && Len % 8 == 0))
303 Value *Op0 =
I.getOperand(0);
304 Value *Op1 =
I.getOperand(1);
320 Value *Root, *SubOp1;
328 I.getModule(), Intrinsic::ctpop,
I.getType());
329 I.replaceAllUsesWith(
Builder.CreateCall(Func, {Root}));
330 ++NumPopCountRecognized;
351 const APInt *MinC, *MaxC;
361 if (!(*MinC + 1).isPowerOf2() || -*MaxC != *MinC + 1)
364 Type *IntTy =
I.getType();
365 Type *FpTy =
In->getType();
368 if (
auto *VecTy = dyn_cast<VectorType>(IntTy))
390 if (SatCost >= MinMaxCost)
397 I.replaceAllUsesWith(
Builder.CreateSExt(Sat, IntTy));
407 auto *Call = dyn_cast<CallInst>(&
I);
416 if (Func != LibFunc_sqrt && Func != LibFunc_sqrtf && Func != LibFunc_sqrtl)
425 Type *Ty = Call->getType();
426 Value *
Arg = Call->getArgOperand(0);
431 Builder.setFastMathFlags(Call->getFastMathFlags());
435 I.replaceAllUsesWith(NewSqrt);
452 if (Length < InputBits || Length > InputBits * 2)
456 unsigned Matched = 0;
460 if (Element >= InputBits)
467 if ((((
Mul << Element) &
Mask.getZExtValue()) >>
Shift) ==
i)
471 return Matched == InputBits;
534 if (!
GEP || !
GEP->isInBounds() ||
GEP->getNumIndices() != 2)
537 if (!
GEP->getSourceElementType()->isArrayTy())
540 uint64_t ArraySize =
GEP->getSourceElementType()->getArrayNumElements();
541 if (ArraySize != 32 && ArraySize != 64)
556 Value *Idx2 = std::next(
GEP->idx_begin())->get();
568 if (InputBits != 32 && InputBits != 64)
572 if (InputBits -
Log2_32(InputBits) != ShiftConst &&
573 InputBits -
Log2_32(InputBits) - 1 != ShiftConst)
576 if (!
isCTTZTable(*ConstData, MulConst, ShiftConst, InputBits))
580 bool DefinedForZero = ZeroTableElem == InputBits;
585 auto Cttz =
B.CreateIntrinsic(Intrinsic::cttz, {XType}, {X1, BoolConst});
586 Value *ZExtOrTrunc =
nullptr;
588 if (DefinedForZero) {
589 ZExtOrTrunc =
B.CreateZExtOrTrunc(Cttz, AccessType);
600 ZExtOrTrunc =
B.CreateZExtOrTrunc(
Select, AccessType);
614 bool FoundRoot =
false;
626 Value *ShAmt2 =
nullptr;
650 LI1 = dyn_cast<LoadInst>(L1);
664 bool IsBigEndian =
DL.isBigEndian();
682 if (Load1Ptr != Load2Ptr || LoadSize1 != LoadSize2)
692 if (!Start->comesBefore(End)) {
699 unsigned NumScanned = 0;
701 make_range(Start->getIterator(), End->getIterator())) {
709 bool Reverse =
false;
710 if (Offset2.
slt(Offset1)) {
742 uint64_t ShiftDiff = IsBigEndian ? LoadSize2 : LoadSize1;
745 if ((Shift2 - Shift1) != ShiftDiff || (Offset2 - Offset1) != PrevSize)
755 LOps.
LoadSize = LoadSize1 + LoadSize2;
773 if (isa<VectorType>(
I.getType()))
789 unsigned AS = LI1->getPointerAddressSpace();
792 AS, LI1->getAlign(), &Fast);
797 Instruction *Inst = dyn_cast<Instruction>(LI1->getPointerOperand());
803 Value *Load1Ptr = LI1->getPointerOperand();
806 NewLoad =
Builder.CreateAlignedLoad(WiderType, NewPtr, LI1->getAlign(),
807 LI1->isVolatile(),
"");
813 Value *NewOp = NewLoad;
822 I.replaceAllUsesWith(NewOp);
833 bool MadeChange =
false;
873 bool MadeChange =
false;
876 MadeChange |= TIC.
run(
F);
A set of analyses that are preserved following a run of a transformation pass.
A manager for alias analyses.
Analysis pass providing the TargetTransformInfo.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
This is an optimization pass for GlobalISel generic memory operations.
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
A parsed version of the target data layout string in and methods for querying it.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
const Function * getParent() const
Return the enclosing method, or null if none.
static bool foldGuardedFunnelShift(Instruction &I, const DominatorTree &DT)
Match a pattern for a bitwise funnel/rotate operation that partially guards against undefined behavio...
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
uint64_t getElementAsInteger(unsigned i) const
If this is a sequential container of integers (of any size), return the specified element in the low ...
static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL, AliasAnalysis &AA)
bool isModSet(const ModRefInfo MRI)
MemoryLocation getWithNewSize(LocationSize NewSize) const
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getBitWidth() const
Return the number of bits in the APInt.
static bool tryToRecognizePopCount(Instruction &I)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr) const
Accumulate the constant offset this value has compared to a base pointer.
specific_bbval m_SpecificBB(BasicBlock *BB)
Match a specific basic block value.
Value * getPointerOperand()
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
bool run(Function &F)
Perform TruncInst pattern optimization on given function.
bool hasInitializer() const
Definitions have initializers, declarations don't.
LLVM Basic Block Representation.
OneUse_match< T > m_OneUse(const T &SubPattern)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
This is the shared class of boolean and integer constants.
static cl::opt< unsigned > MaxInstrsToScan("aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden, cl::desc("Max number of instructions to scan for aggressive instcombine."))
bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
@ And
Bitwise or logical AND of integers.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
bool match(Val *V, const Pattern &P)
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
CastClass_match< OpTy, Instruction::ZExt > m_ZExt(const OpTy &Op)
Matches ZExt.
static bool foldSqrt(Instruction &I, TargetTransformInfo &TTI, TargetLibraryInfo &TLI)
Try to replace a mathlib call to sqrt with the LLVM intrinsic.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Class to represent integer types.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
uint64_t getZExtValue() const
Get zero extended value.
STATISTIC(NumFunctions, "Total number of functions")
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static bool matchAndOrChain(Value *V, MaskOps &MOps)
This is a recursive helper for foldAnyOrAllBitsSet() that walks through a chain of 'and' or 'or' inst...
match_combine_or< CastClass_match< OpTy, Instruction::ZExt >, OpTy > m_ZExtOrSelf(const OpTy &Op)
bool slt(const APInt &RHS) const
Signed less than comparison.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
static bool runImpl(Function &F, AssumptionCache &AC, TargetTransformInfo &TTI, TargetLibraryInfo &TLI, DominatorTree &DT, AliasAnalysis &AA)
This is the entry point for all transforms.
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
static Instruction * matchFunnelShift(Instruction &Or, InstCombinerImpl &IC)
Match UB-safe variants of the funnel shift intrinsic.
This is an important base class in LLVM.
MaskOps(unsigned BitWidth, bool MatchAnds)
A function analysis which provides an AssumptionCache.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
An array constant whose element type is a simple 1/2/4/8-byte integer or float/double,...
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
initializer< Ty > init(const Ty &Val)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI)
Fold smin(smax(fptosi(x), C1), C2) to llvm.fptosi.sat(x), providing C1 and C2 saturate the value of t...
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
static bool tryToRecognizeTableBasedCttz(Instruction &I)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
A Module instance is used to store all the information related to an LLVM module.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Class for arbitrary precision integers.
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
specific_intval< false > m_SpecificInt(APInt V)
Match a specific integer value or vector with all elements equal to the value.
A cache of @llvm.assume calls within a function.
Type * getType() const
All values are typed, get the type of this value.
Represents analyses that only rely on functions' control flow.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
An instruction for reading from memory.
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
This is used by foldAnyOrAllBitsSet() to capture a source value (Root) and the bit indexes (Mask) nee...
bool isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI, LibFunc TheLibFunc)
Check whether the library function is available on target and also that it in the current Module is a...
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
CastClass_match< OpTy, Instruction::FPToSI > m_FPToSI(const OpTy &Op)
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
constexpr unsigned BitWidth
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Provides information about what library functions are available for the current target.
bool CannotBeOrderedLessThanZero(const Value *V, const TargetLibraryInfo *TLI)
Return true if we can prove that the specified FP value is either NaN or never less than -0....
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
AAMDNodes concat(const AAMDNodes &Other) const
Determine the best AAMDNodes after concatenating two different locations together.
Analysis pass which computes a DominatorTree.
void preserveSet()
Mark an analysis set as preserved.
static bool foldAnyOrAllBitsSet(Instruction &I)
Match patterns that correspond to "any-bits-set" and "all-bits-set".
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
bool isConstant() const
If the value is a global constant, its value is immutable throughout the runtime execution of the pro...
const BasicBlock * getParent() const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
static bool foldUnusualPatterns(Function &F, DominatorTree &DT, TargetTransformInfo &TTI, TargetLibraryInfo &TLI, AliasAnalysis &AA)
This is the entry point for folds that could be implemented in regular InstCombine,...
auto reverse(ContainerTy &&C)
A container for analyses that lazily runs them and caches their results.
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
void takeName(Value *V)
Transfer the name from V to this value.
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Value * getOperand(unsigned i) const
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
static bool isCTTZTable(const ConstantDataArray &Table, uint64_t Mul, uint64_t Shift, uint64_t InputBits)
LLVM Value Representation.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
Analysis pass providing the TargetLibraryInfo.
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Representation for a specific memory location.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL, TargetTransformInfo &TTI, AliasAnalysis &AA)
unsigned getNumElements() const
Return the number of elements in the array or vector.