Go to the documentation of this file.
114 using namespace llvm;
115 using namespace PatternMatch;
117 #define DEBUG_TYPE "nary-reassociate"
129 bool doInitialization(
Module &
M)
override {
156 "Nary reassociation",
false,
false)
166 return new NaryReassociateLegacyPass();
173 auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
174 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
175 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
176 auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
F);
177 auto *
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
179 return Impl.runImpl(
F, AC, DT, SE, TLI,
TTI);
208 DL = &
F.getParent()->getDataLayout();
210 bool Changed =
false, ChangedInThisIteration;
212 ChangedInThisIteration = doOneIteration(
F);
213 Changed |= ChangedInThisIteration;
214 }
while (ChangedInThisIteration);
218 bool NaryReassociatePass::doOneIteration(
Function &
F) {
219 bool Changed =
false;
228 const SCEV *OrigSCEV =
nullptr;
229 if (
Instruction *NewI = tryReassociate(&OrigI, OrigSCEV)) {
231 OrigI.replaceAllUsesWith(NewI);
237 const SCEV *NewSCEV = SE->getSCEV(NewI);
259 if (NewSCEV != OrigSCEV)
268 DeadInsts, TLI,
nullptr, [
this](
Value *V) { SE->forgetValue(V); });
273 template <
typename PredT>
275 NaryReassociatePass::matchAndReassociateMinOrMax(
Instruction *
I,
276 const SCEV *&OrigSCEV) {
283 if (
match(
I, MinMaxMatcher)) {
284 OrigSCEV = SE->getSCEV(
I);
285 if (
auto *NewMinMax = dyn_cast_or_null<Instruction>(
286 tryReassociateMinOrMax(
I, MinMaxMatcher,
LHS,
RHS)))
288 if (
auto *NewMinMax = dyn_cast_or_null<Instruction>(
289 tryReassociateMinOrMax(
I, MinMaxMatcher,
RHS,
LHS)))
296 const SCEV *&OrigSCEV) {
298 if (!SE->isSCEVable(
I->getType()))
301 switch (
I->getOpcode()) {
304 OrigSCEV = SE->getSCEV(
I);
305 return tryReassociateBinaryOp(cast<BinaryOperator>(
I));
306 case Instruction::GetElementPtr:
307 OrigSCEV = SE->getSCEV(
I);
308 return tryReassociateGEP(cast<GetElementPtrInst>(
I));
318 if (
I->getType()->isIntegerTy())
319 if ((ResI = matchAndReassociateMinOrMax<umin_pred_ty>(
I, OrigSCEV)) ||
320 (ResI = matchAndReassociateMinOrMax<smin_pred_ty>(
I, OrigSCEV)) ||
321 (ResI = matchAndReassociateMinOrMax<umax_pred_ty>(
I, OrigSCEV)) ||
322 (ResI = matchAndReassociateMinOrMax<smax_pred_ty>(
I, OrigSCEV)))
341 for (
unsigned I = 1,
E =
GEP->getNumOperands();
I !=
E; ++
I, ++GTI) {
343 if (
auto *NewGEP = tryReassociateGEPAtIndex(
GEP,
I - 1,
352 bool NaryReassociatePass::requiresSignExtension(
Value *Index,
354 unsigned PointerSizeInBits =
355 DL->getPointerSizeInBits(
GEP->getType()->getPointerAddressSpace());
356 return cast<IntegerType>(
Index->getType())->getBitWidth() < PointerSizeInBits;
361 unsigned I,
Type *IndexedType) {
362 Value *IndexToSplit =
GEP->getOperand(
I + 1);
363 if (
SExtInst *SExt = dyn_cast<SExtInst>(IndexToSplit)) {
364 IndexToSplit = SExt->getOperand(0);
365 }
else if (
ZExtInst *ZExt = dyn_cast<ZExtInst>(IndexToSplit)) {
368 IndexToSplit = ZExt->getOperand(0);
371 if (
AddOperator *AO = dyn_cast<AddOperator>(IndexToSplit)) {
375 if (requiresSignExtension(IndexToSplit,
GEP) &&
380 Value *
LHS = AO->getOperand(0), *
RHS = AO->getOperand(1);
382 if (
auto *NewGEP = tryReassociateGEPAtIndex(
GEP,
I,
LHS,
RHS, IndexedType))
387 tryReassociateGEPAtIndex(
GEP,
I,
RHS,
LHS, IndexedType))
401 for (
Use &Index :
GEP->indices())
402 IndexExprs.push_back(SE->getSCEV(Index));
404 IndexExprs[
I] = SE->getSCEV(
LHS);
407 DL->getTypeSizeInBits(
GEP->getOperand(
I)->getType()).getFixedSize()) {
413 SE->getZeroExtendExpr(IndexExprs[
I],
GEP->getOperand(
I)->getType());
415 const SCEV *CandidateExpr = SE->getGEPExpr(cast<GEPOperator>(
GEP),
418 Value *Candidate = findClosestMatchingDominator(CandidateExpr,
GEP);
419 if (Candidate ==
nullptr)
426 Candidate =
Builder.CreateBitOrPointerCast(Candidate,
GEP->getType());
430 uint64_t IndexedSize =
DL->getTypeAllocSize(IndexedType);
431 Type *ElementType =
GEP->getResultElementType();
432 uint64_t ElementSize =
DL->getTypeAllocSize(ElementType);
447 if (IndexedSize % ElementSize != 0)
451 Type *IntPtrTy =
DL->getIntPtrType(
GEP->getType());
454 if (IndexedSize != ElementSize) {
459 Builder.CreateGEP(
GEP->getResultElementType(), Candidate,
RHS));
468 if (SE->getSCEV(
I)->isZero())
470 if (
auto *NewI = tryReassociateBinaryOp(
LHS,
RHS,
I))
472 if (
auto *NewI = tryReassociateBinaryOp(
RHS,
LHS,
I))
479 Value *
A =
nullptr, *
B =
nullptr;
485 const SCEV *AExpr = SE->getSCEV(A), *BExpr = SE->getSCEV(
B);
486 const SCEV *RHSExpr = SE->getSCEV(
RHS);
487 if (BExpr != RHSExpr) {
489 tryReassociatedBinaryOp(getBinarySCEV(
I, AExpr, RHSExpr),
B,
I))
492 if (AExpr != RHSExpr) {
494 tryReassociatedBinaryOp(getBinarySCEV(
I, BExpr, RHSExpr), A,
I))
501 Instruction *NaryReassociatePass::tryReassociatedBinaryOp(
const SCEV *LHSExpr,
506 auto *
LHS = findClosestMatchingDominator(LHSExpr,
I);
511 switch (
I->getOpcode()) {
527 switch (
I->getOpcode()) {
541 switch (
I->getOpcode()) {
543 return SE->getAddExpr(
LHS,
RHS);
545 return SE->getMulExpr(
LHS,
RHS);
553 NaryReassociatePass::findClosestMatchingDominator(
const SCEV *CandidateExpr,
555 auto Pos = SeenExprs.find(CandidateExpr);
556 if (Pos == SeenExprs.end())
559 auto &Candidates = Pos->second;
564 while (!Candidates.empty()) {
568 if (
Value *Candidate = Candidates.back()) {
569 Instruction *CandidateInstruction = cast<Instruction>(Candidate);
570 if (DT->dominates(CandidateInstruction, Dominatee))
571 return CandidateInstruction;
573 Candidates.pop_back();
579 if (std::is_same<smax_pred_ty, typename MaxMinT::PredType>::value)
581 else if (std::is_same<umax_pred_ty, typename MaxMinT::PredType>::value)
583 else if (std::is_same<smin_pred_ty, typename MaxMinT::PredType>::value)
585 else if (std::is_same<umin_pred_ty, typename MaxMinT::PredType>::value)
597 template <
typename MaxMinT>
601 Value *
A =
nullptr, *
B =
nullptr;
610 !(U->hasOneUser() && *U->users().begin() == I);
620 const SCEV *R1Expr = SE->getMinMaxExpr(SCEVType, Ops1);
622 Instruction *R1MinMax = findClosestMatchingDominator(R1Expr,
I);
627 LLVM_DEBUG(
dbgs() <<
"NARY: Found common sub-expr: " << *R1MinMax <<
"\n");
630 SE->getUnknown(R1MinMax)};
631 const SCEV *R2Expr = SE->getMinMaxExpr(SCEVType, Ops2);
634 Value *NewMinMax = Expander.expandCodeFor(R2Expr,
I->getType(),
I);
638 <<
"NARY: Inserting: " << *NewMinMax <<
"\n");
642 const SCEV *AExpr = SE->getSCEV(A);
643 const SCEV *BExpr = SE->getSCEV(
B);
644 const SCEV *RHSExpr = SE->getSCEV(
RHS);
646 if (BExpr != RHSExpr) {
648 if (
auto *NewMinMax = tryCombination(A, AExpr,
RHS, RHSExpr,
B, BExpr))
652 if (AExpr != RHSExpr) {
654 if (
auto *NewMinMax = tryCombination(
RHS, RHSExpr,
B, BExpr, A, AExpr))
A set of analyses that are preserved following a run of a transformation pass.
Analysis pass providing the TargetTransformInfo.
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
Analysis pass that exposes the ScalarEvolution for a function.
This is an optimization pass for GlobalISel generic memory operations.
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
bool isKnownNonNegative(const Value *V, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Returns true if the give value is known to be non-negative.
@ NeverOverflows
Never overflows.
Value handle that is nullable, but tries to track the Value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
This class uses information about analyze scalars to rewrite expressions in canonical form.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
The main scalar evolution driver.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
The instances of the Type class are immutable: once they are created, they are never changed.
static SCEVTypes convertToSCEVype(MaxMinT &MM)
gep_type_iterator gep_type_begin(const User *GEP)
void initializeNaryReassociateLegacyPassPass(PassRegistry &)
LLVM Basic Block Representation.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
OverflowResult computeOverflowForSignedAdd(const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr)
bool match(Val *V, const Pattern &P)
FunctionPass * createNaryReassociatePass()
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
(vector float) vec_cmpeq(*A, *B) C
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Represent the analysis usage information of a pass.
static bool runImpl(const TargetLibraryInfo &TLI, Function &F)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Legacy analysis pass which computes a DominatorTree.
void setName(const Twine &Name)
Change the name of the value.
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
This class represents an analyzed expression in the program.
A function analysis which provides an AssumptionCache.
void preserve()
Mark an analysis as preserved.
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
bool hasNUsesOrMore(unsigned N) const
Return true if this value has N uses or more.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Type * getIndexedType() const
bool runImpl(Function &F, AssumptionCache *AC_, DominatorTree *DT_, ScalarEvolution *SE_, TargetLibraryInfo *TLI_, TargetTransformInfo *TTI_)
INITIALIZE_PASS_BEGIN(NaryReassociateLegacyPass, "nary-reassociate", "Nary reassociation", false, false) INITIALIZE_PASS_END(NaryReassociateLegacyPass
A Module instance is used to store all the information related to an LLVM module.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
This class represents zero extension of integer types.
An immutable pass that tracks lazily created AssumptionCache objects.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
A cache of @llvm.assume calls within a function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Type * getType() const
All values are typed, get the type of this value.
Represents analyses that only rely on functions' control flow.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
bool isSequential() const
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This class represents a sign extension of integer types.
iterator_range< df_iterator< T > > depth_first(const T &G)
static bool runOnFunction(Function &F, bool PostInlining)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Provides information about what library functions are available for the current target.
Analysis pass which computes a DominatorTree.
void preserveSet()
Mark an analysis set as preserved.
static BinaryOperator * CreateMul(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
static bool isGEPFoldable(GetElementPtrInst *GEP, const TargetTransformInfo *TTI)
A container for analyses that lazily runs them and caches their results.
FunctionPass class - This class is used to implement most global optimizations.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
AnalysisUsage & addRequired()
static BinaryOperator * CreateAdd(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
void takeName(Value *V)
Transfer the name from V to this value.
bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
LLVM Value Representation.
Analysis pass providing the TargetLibraryInfo.
iterator_range< user_iterator > users()
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
A Use represents the edge between a Value definition and its users.
Twine concat(const Twine &Suffix) const