LLVM  15.0.0git
NaryReassociate.cpp
Go to the documentation of this file.
1 //===- NaryReassociate.cpp - Reassociate n-ary expressions ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass reassociates n-ary add expressions and eliminates the redundancy
10 // exposed by the reassociation.
11 //
12 // A motivating example:
13 //
14 // void foo(int a, int b) {
15 // bar(a + b);
16 // bar((a + 2) + b);
17 // }
18 //
19 // An ideal compiler should reassociate (a + 2) + b to (a + b) + 2 and simplify
20 // the above code to
21 //
22 // int t = a + b;
23 // bar(t);
24 // bar(t + 2);
25 //
26 // However, the Reassociate pass is unable to do that because it processes each
27 // instruction individually and believes (a + 2) + b is the best form according
28 // to its rank system.
29 //
30 // To address this limitation, NaryReassociate reassociates an expression in a
31 // form that reuses existing instructions. As a result, NaryReassociate can
32 // reassociate (a + 2) + b in the example to (a + b) + 2 because it detects that
33 // (a + b) is computed before.
34 //
35 // NaryReassociate works as follows. For every instruction in the form of (a +
36 // b) + c, it checks whether a + c or b + c is already computed by a dominating
37 // instruction. If so, it then reassociates (a + b) + c into (a + c) + b or (b +
38 // c) + a and removes the redundancy accordingly. To efficiently look up whether
39 // an expression is computed before, we store each instruction seen and its SCEV
40 // into an SCEV-to-instruction map.
41 //
42 // Although the algorithm pattern-matches only ternary additions, it
43 // automatically handles many >3-ary expressions by walking through the function
44 // in the depth-first order. For example, given
45 //
46 // (a + c) + d
47 // ((a + b) + c) + d
48 //
49 // NaryReassociate first rewrites (a + b) + c to (a + c) + b, and then rewrites
50 // ((a + c) + b) + d into ((a + c) + d) + b.
51 //
52 // Finally, the above dominator-based algorithm may need to be run multiple
53 // iterations before emitting optimal code. One source of this need is that we
54 // only split an operand when it is used only once. The above algorithm can
55 // eliminate an instruction and decrease the usage count of its operands. As a
56 // result, an instruction that previously had multiple uses may become a
57 // single-use instruction and thus eligible for split consideration. For
58 // example,
59 //
60 // ac = a + c
61 // ab = a + b
62 // abc = ab + c
63 // ab2 = ab + b
64 // ab2c = ab2 + c
65 //
66 // In the first iteration, we cannot reassociate abc to ac+b because ab is used
67 // twice. However, we can reassociate ab2c to abc+b in the first iteration. As a
68 // result, ab2 becomes dead and ab will be used only once in the second
69 // iteration.
70 //
71 // Limitations and TODO items:
72 //
73 // 1) We only considers n-ary adds and muls for now. This should be extended
74 // and generalized.
75 //
76 //===----------------------------------------------------------------------===//
77 
80 #include "llvm/ADT/SmallVector.h"
87 #include "llvm/IR/BasicBlock.h"
88 #include "llvm/IR/Constants.h"
89 #include "llvm/IR/DataLayout.h"
90 #include "llvm/IR/DerivedTypes.h"
91 #include "llvm/IR/Dominators.h"
92 #include "llvm/IR/Function.h"
94 #include "llvm/IR/IRBuilder.h"
95 #include "llvm/IR/InstrTypes.h"
96 #include "llvm/IR/Instruction.h"
97 #include "llvm/IR/Instructions.h"
98 #include "llvm/IR/Module.h"
99 #include "llvm/IR/Operator.h"
100 #include "llvm/IR/PatternMatch.h"
101 #include "llvm/IR/Type.h"
102 #include "llvm/IR/Value.h"
103 #include "llvm/IR/ValueHandle.h"
104 #include "llvm/InitializePasses.h"
105 #include "llvm/Pass.h"
106 #include "llvm/Support/Casting.h"
108 #include "llvm/Transforms/Scalar.h"
111 #include <cassert>
112 #include <cstdint>
113 
114 using namespace llvm;
115 using namespace PatternMatch;
116 
117 #define DEBUG_TYPE "nary-reassociate"
118 
119 namespace {
120 
121 class NaryReassociateLegacyPass : public FunctionPass {
122 public:
123  static char ID;
124 
125  NaryReassociateLegacyPass() : FunctionPass(ID) {
127  }
128 
129  bool doInitialization(Module &M) override {
130  return false;
131  }
132 
133  bool runOnFunction(Function &F) override;
134 
135  void getAnalysisUsage(AnalysisUsage &AU) const override {
144  AU.setPreservesCFG();
145  }
146 
147 private:
148  NaryReassociatePass Impl;
149 };
150 
151 } // end anonymous namespace
152 
154 
155 INITIALIZE_PASS_BEGIN(NaryReassociateLegacyPass, "nary-reassociate",
156  "Nary reassociation", false, false)
162 INITIALIZE_PASS_END(NaryReassociateLegacyPass, "nary-reassociate",
164 
166  return new NaryReassociateLegacyPass();
167 }
168 
170  if (skipFunction(F))
171  return false;
172 
173  auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
174  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
175  auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
176  auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
177  auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
178 
179  return Impl.runImpl(F, AC, DT, SE, TLI, TTI);
180 }
181 
184  auto *AC = &AM.getResult<AssumptionAnalysis>(F);
185  auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
186  auto *SE = &AM.getResult<ScalarEvolutionAnalysis>(F);
187  auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
188  auto *TTI = &AM.getResult<TargetIRAnalysis>(F);
189 
190  if (!runImpl(F, AC, DT, SE, TLI, TTI))
191  return PreservedAnalyses::all();
192 
194  PA.preserveSet<CFGAnalyses>();
196  return PA;
197 }
198 
200  DominatorTree *DT_, ScalarEvolution *SE_,
201  TargetLibraryInfo *TLI_,
202  TargetTransformInfo *TTI_) {
203  AC = AC_;
204  DT = DT_;
205  SE = SE_;
206  TLI = TLI_;
207  TTI = TTI_;
208  DL = &F.getParent()->getDataLayout();
209 
210  bool Changed = false, ChangedInThisIteration;
211  do {
212  ChangedInThisIteration = doOneIteration(F);
213  Changed |= ChangedInThisIteration;
214  } while (ChangedInThisIteration);
215  return Changed;
216 }
217 
218 bool NaryReassociatePass::doOneIteration(Function &F) {
219  bool Changed = false;
220  SeenExprs.clear();
221  // Process the basic blocks in a depth first traversal of the dominator
222  // tree. This order ensures that all bases of a candidate are in Candidates
223  // when we process it.
225  for (const auto Node : depth_first(DT)) {
226  BasicBlock *BB = Node->getBlock();
227  for (Instruction &OrigI : *BB) {
228  const SCEV *OrigSCEV = nullptr;
229  if (Instruction *NewI = tryReassociate(&OrigI, OrigSCEV)) {
230  Changed = true;
231  OrigI.replaceAllUsesWith(NewI);
232 
233  // Add 'OrigI' to the list of dead instructions.
234  DeadInsts.push_back(WeakTrackingVH(&OrigI));
235  // Add the rewritten instruction to SeenExprs; the original
236  // instruction is deleted.
237  const SCEV *NewSCEV = SE->getSCEV(NewI);
238  SeenExprs[NewSCEV].push_back(WeakTrackingVH(NewI));
239 
240  // Ideally, NewSCEV should equal OldSCEV because tryReassociate(I)
241  // is equivalent to I. However, ScalarEvolution::getSCEV may
242  // weaken nsw causing NewSCEV not to equal OldSCEV. For example,
243  // suppose we reassociate
244  // I = &a[sext(i +nsw j)] // assuming sizeof(a[0]) = 4
245  // to
246  // NewI = &a[sext(i)] + sext(j).
247  //
248  // ScalarEvolution computes
249  // getSCEV(I) = a + 4 * sext(i + j)
250  // getSCEV(newI) = a + 4 * sext(i) + 4 * sext(j)
251  // which are different SCEVs.
252  //
253  // To alleviate this issue of ScalarEvolution not always capturing
254  // equivalence, we add I to SeenExprs[OldSCEV] as well so that we can
255  // map both SCEV before and after tryReassociate(I) to I.
256  //
257  // This improvement is exercised in @reassociate_gep_nsw in
258  // nary-gep.ll.
259  if (NewSCEV != OrigSCEV)
260  SeenExprs[OrigSCEV].push_back(WeakTrackingVH(NewI));
261  } else if (OrigSCEV)
262  SeenExprs[OrigSCEV].push_back(WeakTrackingVH(&OrigI));
263  }
264  }
265  // Delete all dead instructions from 'DeadInsts'.
266  // Please note ScalarEvolution is updated along the way.
268  DeadInsts, TLI, nullptr, [this](Value *V) { SE->forgetValue(V); });
269 
270  return Changed;
271 }
272 
273 template <typename PredT>
274 Instruction *
275 NaryReassociatePass::matchAndReassociateMinOrMax(Instruction *I,
276  const SCEV *&OrigSCEV) {
277  Value *LHS = nullptr;
278  Value *RHS = nullptr;
279 
280  auto MinMaxMatcher =
282  m_Value(LHS), m_Value(RHS));
283  if (match(I, MinMaxMatcher)) {
284  OrigSCEV = SE->getSCEV(I);
285  if (auto *NewMinMax = dyn_cast_or_null<Instruction>(
286  tryReassociateMinOrMax(I, MinMaxMatcher, LHS, RHS)))
287  return NewMinMax;
288  if (auto *NewMinMax = dyn_cast_or_null<Instruction>(
289  tryReassociateMinOrMax(I, MinMaxMatcher, RHS, LHS)))
290  return NewMinMax;
291  }
292  return nullptr;
293 }
294 
295 Instruction *NaryReassociatePass::tryReassociate(Instruction * I,
296  const SCEV *&OrigSCEV) {
297 
298  if (!SE->isSCEVable(I->getType()))
299  return nullptr;
300 
301  switch (I->getOpcode()) {
302  case Instruction::Add:
303  case Instruction::Mul:
304  OrigSCEV = SE->getSCEV(I);
305  return tryReassociateBinaryOp(cast<BinaryOperator>(I));
306  case Instruction::GetElementPtr:
307  OrigSCEV = SE->getSCEV(I);
308  return tryReassociateGEP(cast<GetElementPtrInst>(I));
309  default:
310  break;
311  }
312 
313  // Try to match signed/unsigned Min/Max.
314  Instruction *ResI = nullptr;
315  // TODO: Currently min/max reassociation is restricted to integer types only
316  // due to use of SCEVExpander which my introduce incompatible forms of min/max
317  // for pointer types.
318  if (I->getType()->isIntegerTy())
319  if ((ResI = matchAndReassociateMinOrMax<umin_pred_ty>(I, OrigSCEV)) ||
320  (ResI = matchAndReassociateMinOrMax<smin_pred_ty>(I, OrigSCEV)) ||
321  (ResI = matchAndReassociateMinOrMax<umax_pred_ty>(I, OrigSCEV)) ||
322  (ResI = matchAndReassociateMinOrMax<smax_pred_ty>(I, OrigSCEV)))
323  return ResI;
324 
325  return nullptr;
326 }
327 
329  const TargetTransformInfo *TTI) {
330  SmallVector<const Value *, 4> Indices(GEP->indices());
331  return TTI->getGEPCost(GEP->getSourceElementType(), GEP->getPointerOperand(),
332  Indices) == TargetTransformInfo::TCC_Free;
333 }
334 
335 Instruction *NaryReassociatePass::tryReassociateGEP(GetElementPtrInst *GEP) {
336  // Not worth reassociating GEP if it is foldable.
337  if (isGEPFoldable(GEP, TTI))
338  return nullptr;
339 
341  for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
342  if (GTI.isSequential()) {
343  if (auto *NewGEP = tryReassociateGEPAtIndex(GEP, I - 1,
344  GTI.getIndexedType())) {
345  return NewGEP;
346  }
347  }
348  }
349  return nullptr;
350 }
351 
352 bool NaryReassociatePass::requiresSignExtension(Value *Index,
354  unsigned PointerSizeInBits =
355  DL->getPointerSizeInBits(GEP->getType()->getPointerAddressSpace());
356  return cast<IntegerType>(Index->getType())->getBitWidth() < PointerSizeInBits;
357 }
358 
360 NaryReassociatePass::tryReassociateGEPAtIndex(GetElementPtrInst *GEP,
361  unsigned I, Type *IndexedType) {
362  Value *IndexToSplit = GEP->getOperand(I + 1);
363  if (SExtInst *SExt = dyn_cast<SExtInst>(IndexToSplit)) {
364  IndexToSplit = SExt->getOperand(0);
365  } else if (ZExtInst *ZExt = dyn_cast<ZExtInst>(IndexToSplit)) {
366  // zext can be treated as sext if the source is non-negative.
367  if (isKnownNonNegative(ZExt->getOperand(0), *DL, 0, AC, GEP, DT))
368  IndexToSplit = ZExt->getOperand(0);
369  }
370 
371  if (AddOperator *AO = dyn_cast<AddOperator>(IndexToSplit)) {
372  // If the I-th index needs sext and the underlying add is not equipped with
373  // nsw, we cannot split the add because
374  // sext(LHS + RHS) != sext(LHS) + sext(RHS).
375  if (requiresSignExtension(IndexToSplit, GEP) &&
376  computeOverflowForSignedAdd(AO, *DL, AC, GEP, DT) !=
378  return nullptr;
379 
380  Value *LHS = AO->getOperand(0), *RHS = AO->getOperand(1);
381  // IndexToSplit = LHS + RHS.
382  if (auto *NewGEP = tryReassociateGEPAtIndex(GEP, I, LHS, RHS, IndexedType))
383  return NewGEP;
384  // Symmetrically, try IndexToSplit = RHS + LHS.
385  if (LHS != RHS) {
386  if (auto *NewGEP =
387  tryReassociateGEPAtIndex(GEP, I, RHS, LHS, IndexedType))
388  return NewGEP;
389  }
390  }
391  return nullptr;
392 }
393 
395 NaryReassociatePass::tryReassociateGEPAtIndex(GetElementPtrInst *GEP,
396  unsigned I, Value *LHS,
397  Value *RHS, Type *IndexedType) {
398  // Look for GEP's closest dominator that has the same SCEV as GEP except that
399  // the I-th index is replaced with LHS.
400  SmallVector<const SCEV *, 4> IndexExprs;
401  for (Use &Index : GEP->indices())
402  IndexExprs.push_back(SE->getSCEV(Index));
403  // Replace the I-th index with LHS.
404  IndexExprs[I] = SE->getSCEV(LHS);
405  if (isKnownNonNegative(LHS, *DL, 0, AC, GEP, DT) &&
406  DL->getTypeSizeInBits(LHS->getType()).getFixedSize() <
407  DL->getTypeSizeInBits(GEP->getOperand(I)->getType()).getFixedSize()) {
408  // Zero-extend LHS if it is non-negative. InstCombine canonicalizes sext to
409  // zext if the source operand is proved non-negative. We should do that
410  // consistently so that CandidateExpr more likely appears before. See
411  // @reassociate_gep_assume for an example of this canonicalization.
412  IndexExprs[I] =
413  SE->getZeroExtendExpr(IndexExprs[I], GEP->getOperand(I)->getType());
414  }
415  const SCEV *CandidateExpr = SE->getGEPExpr(cast<GEPOperator>(GEP),
416  IndexExprs);
417 
418  Value *Candidate = findClosestMatchingDominator(CandidateExpr, GEP);
419  if (Candidate == nullptr)
420  return nullptr;
421 
423  // Candidate does not necessarily have the same pointer type as GEP. Use
424  // bitcast or pointer cast to make sure they have the same type, so that the
425  // later RAUW doesn't complain.
426  Candidate = Builder.CreateBitOrPointerCast(Candidate, GEP->getType());
427  assert(Candidate->getType() == GEP->getType());
428 
429  // NewGEP = (char *)Candidate + RHS * sizeof(IndexedType)
430  uint64_t IndexedSize = DL->getTypeAllocSize(IndexedType);
431  Type *ElementType = GEP->getResultElementType();
432  uint64_t ElementSize = DL->getTypeAllocSize(ElementType);
433  // Another less rare case: because I is not necessarily the last index of the
434  // GEP, the size of the type at the I-th index (IndexedSize) is not
435  // necessarily divisible by ElementSize. For example,
436  //
437  // #pragma pack(1)
438  // struct S {
439  // int a[3];
440  // int64 b[8];
441  // };
442  // #pragma pack()
443  //
444  // sizeof(S) = 100 is indivisible by sizeof(int64) = 8.
445  //
446  // TODO: bail out on this case for now. We could emit uglygep.
447  if (IndexedSize % ElementSize != 0)
448  return nullptr;
449 
450  // NewGEP = &Candidate[RHS * (sizeof(IndexedType) / sizeof(Candidate[0])));
451  Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
452  if (RHS->getType() != IntPtrTy)
453  RHS = Builder.CreateSExtOrTrunc(RHS, IntPtrTy);
454  if (IndexedSize != ElementSize) {
455  RHS = Builder.CreateMul(
456  RHS, ConstantInt::get(IntPtrTy, IndexedSize / ElementSize));
457  }
458  GetElementPtrInst *NewGEP = cast<GetElementPtrInst>(
459  Builder.CreateGEP(GEP->getResultElementType(), Candidate, RHS));
460  NewGEP->setIsInBounds(GEP->isInBounds());
461  NewGEP->takeName(GEP);
462  return NewGEP;
463 }
464 
465 Instruction *NaryReassociatePass::tryReassociateBinaryOp(BinaryOperator *I) {
466  Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
467  // There is no need to reassociate 0.
468  if (SE->getSCEV(I)->isZero())
469  return nullptr;
470  if (auto *NewI = tryReassociateBinaryOp(LHS, RHS, I))
471  return NewI;
472  if (auto *NewI = tryReassociateBinaryOp(RHS, LHS, I))
473  return NewI;
474  return nullptr;
475 }
476 
477 Instruction *NaryReassociatePass::tryReassociateBinaryOp(Value *LHS, Value *RHS,
478  BinaryOperator *I) {
479  Value *A = nullptr, *B = nullptr;
480  // To be conservative, we reassociate I only when it is the only user of (A op
481  // B).
482  if (LHS->hasOneUse() && matchTernaryOp(I, LHS, A, B)) {
483  // I = (A op B) op RHS
484  // = (A op RHS) op B or (B op RHS) op A
485  const SCEV *AExpr = SE->getSCEV(A), *BExpr = SE->getSCEV(B);
486  const SCEV *RHSExpr = SE->getSCEV(RHS);
487  if (BExpr != RHSExpr) {
488  if (auto *NewI =
489  tryReassociatedBinaryOp(getBinarySCEV(I, AExpr, RHSExpr), B, I))
490  return NewI;
491  }
492  if (AExpr != RHSExpr) {
493  if (auto *NewI =
494  tryReassociatedBinaryOp(getBinarySCEV(I, BExpr, RHSExpr), A, I))
495  return NewI;
496  }
497  }
498  return nullptr;
499 }
500 
501 Instruction *NaryReassociatePass::tryReassociatedBinaryOp(const SCEV *LHSExpr,
502  Value *RHS,
503  BinaryOperator *I) {
504  // Look for the closest dominator LHS of I that computes LHSExpr, and replace
505  // I with LHS op RHS.
506  auto *LHS = findClosestMatchingDominator(LHSExpr, I);
507  if (LHS == nullptr)
508  return nullptr;
509 
510  Instruction *NewI = nullptr;
511  switch (I->getOpcode()) {
512  case Instruction::Add:
513  NewI = BinaryOperator::CreateAdd(LHS, RHS, "", I);
514  break;
515  case Instruction::Mul:
516  NewI = BinaryOperator::CreateMul(LHS, RHS, "", I);
517  break;
518  default:
519  llvm_unreachable("Unexpected instruction.");
520  }
521  NewI->takeName(I);
522  return NewI;
523 }
524 
525 bool NaryReassociatePass::matchTernaryOp(BinaryOperator *I, Value *V,
526  Value *&Op1, Value *&Op2) {
527  switch (I->getOpcode()) {
528  case Instruction::Add:
529  return match(V, m_Add(m_Value(Op1), m_Value(Op2)));
530  case Instruction::Mul:
531  return match(V, m_Mul(m_Value(Op1), m_Value(Op2)));
532  default:
533  llvm_unreachable("Unexpected instruction.");
534  }
535  return false;
536 }
537 
538 const SCEV *NaryReassociatePass::getBinarySCEV(BinaryOperator *I,
539  const SCEV *LHS,
540  const SCEV *RHS) {
541  switch (I->getOpcode()) {
542  case Instruction::Add:
543  return SE->getAddExpr(LHS, RHS);
544  case Instruction::Mul:
545  return SE->getMulExpr(LHS, RHS);
546  default:
547  llvm_unreachable("Unexpected instruction.");
548  }
549  return nullptr;
550 }
551 
552 Instruction *
553 NaryReassociatePass::findClosestMatchingDominator(const SCEV *CandidateExpr,
554  Instruction *Dominatee) {
555  auto Pos = SeenExprs.find(CandidateExpr);
556  if (Pos == SeenExprs.end())
557  return nullptr;
558 
559  auto &Candidates = Pos->second;
560  // Because we process the basic blocks in pre-order of the dominator tree, a
561  // candidate that doesn't dominate the current instruction won't dominate any
562  // future instruction either. Therefore, we pop it out of the stack. This
563  // optimization makes the algorithm O(n).
564  while (!Candidates.empty()) {
565  // Candidates stores WeakTrackingVHs, so a candidate can be nullptr if it's
566  // removed
567  // during rewriting.
568  if (Value *Candidate = Candidates.back()) {
569  Instruction *CandidateInstruction = cast<Instruction>(Candidate);
570  if (DT->dominates(CandidateInstruction, Dominatee))
571  return CandidateInstruction;
572  }
573  Candidates.pop_back();
574  }
575  return nullptr;
576 }
577 
578 template <typename MaxMinT> static SCEVTypes convertToSCEVype(MaxMinT &MM) {
579  if (std::is_same<smax_pred_ty, typename MaxMinT::PredType>::value)
580  return scSMaxExpr;
581  else if (std::is_same<umax_pred_ty, typename MaxMinT::PredType>::value)
582  return scUMaxExpr;
583  else if (std::is_same<smin_pred_ty, typename MaxMinT::PredType>::value)
584  return scSMinExpr;
585  else if (std::is_same<umin_pred_ty, typename MaxMinT::PredType>::value)
586  return scUMinExpr;
587 
588  llvm_unreachable("Can't convert MinMax pattern to SCEV type");
589  return scUnknown;
590 }
591 
592 // Parameters:
593 // I - instruction matched by MaxMinMatch matcher
594 // MaxMinMatch - min/max idiom matcher
595 // LHS - first operand of I
596 // RHS - second operand of I
597 template <typename MaxMinT>
598 Value *NaryReassociatePass::tryReassociateMinOrMax(Instruction *I,
599  MaxMinT MaxMinMatch,
600  Value *LHS, Value *RHS) {
601  Value *A = nullptr, *B = nullptr;
602  MaxMinT m_MaxMin(m_Value(A), m_Value(B));
603 
604  if (LHS->hasNUsesOrMore(3) ||
605  // The optimization is profitable only if LHS can be removed in the end.
606  // In other words LHS should be used (directly or indirectly) by I only.
608  [&](auto *U) {
609  return U != I &&
610  !(U->hasOneUser() && *U->users().begin() == I);
611  }) ||
612  !match(LHS, m_MaxMin))
613  return nullptr;
614 
615  auto tryCombination = [&](Value *A, const SCEV *AExpr, Value *B,
616  const SCEV *BExpr, Value *C,
617  const SCEV *CExpr) -> Value * {
618  SmallVector<const SCEV *, 2> Ops1{BExpr, AExpr};
619  const SCEVTypes SCEVType = convertToSCEVype(m_MaxMin);
620  const SCEV *R1Expr = SE->getMinMaxExpr(SCEVType, Ops1);
621 
622  Instruction *R1MinMax = findClosestMatchingDominator(R1Expr, I);
623 
624  if (!R1MinMax)
625  return nullptr;
626 
627  LLVM_DEBUG(dbgs() << "NARY: Found common sub-expr: " << *R1MinMax << "\n");
628 
629  SmallVector<const SCEV *, 2> Ops2{SE->getUnknown(C),
630  SE->getUnknown(R1MinMax)};
631  const SCEV *R2Expr = SE->getMinMaxExpr(SCEVType, Ops2);
632 
633  SCEVExpander Expander(*SE, *DL, "nary-reassociate");
634  Value *NewMinMax = Expander.expandCodeFor(R2Expr, I->getType(), I);
635  NewMinMax->setName(Twine(I->getName()).concat(".nary"));
636 
637  LLVM_DEBUG(dbgs() << "NARY: Deleting: " << *I << "\n"
638  << "NARY: Inserting: " << *NewMinMax << "\n");
639  return NewMinMax;
640  };
641 
642  const SCEV *AExpr = SE->getSCEV(A);
643  const SCEV *BExpr = SE->getSCEV(B);
644  const SCEV *RHSExpr = SE->getSCEV(RHS);
645 
646  if (BExpr != RHSExpr) {
647  // Try (A op RHS) op B
648  if (auto *NewMinMax = tryCombination(A, AExpr, RHS, RHSExpr, B, BExpr))
649  return NewMinMax;
650  }
651 
652  if (AExpr != RHSExpr) {
653  // Try (RHS op B) op A
654  if (auto *NewMinMax = tryCombination(RHS, RHSExpr, B, BExpr, A, AExpr))
655  return NewMinMax;
656  }
657 
658  return nullptr;
659 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm::PatternMatch::MaxMin_match
Definition: PatternMatch.h:1758
AssumptionCache.h
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2458
llvm::GetElementPtrInst::setIsInBounds
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
Definition: Instructions.cpp:1828
llvm::ScalarEvolutionAnalysis
Analysis pass that exposes the ScalarEvolution for a function.
Definition: ScalarEvolution.h:2115
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::PatternMatch::bind_ty
Definition: PatternMatch.h:707
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::isKnownNonNegative
bool isKnownNonNegative(const Value *V, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Returns true if the give value is known to be non-negative.
Definition: ValueTracking.cpp:347
llvm::OverflowResult::NeverOverflows
@ NeverOverflows
Never overflows.
llvm::WeakTrackingVH
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:204
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
llvm::generic_gep_type_iterator
Definition: GetElementPtrTypeIterator.h:31
llvm::scSMinExpr
@ scSMinExpr
Definition: ScalarEvolutionExpressions.h:52
ScalarEvolutionExpander.h
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:780
Scalar.h
llvm::Function
Definition: Function.h:60
NaryReassociate.h
Pass.h
llvm::SCEVExpander
This class uses information about analyze scalars to rewrite expressions in canonical form.
Definition: ScalarEvolutionExpander.h:63
GetElementPtrTypeIterator.h
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
llvm::PatternMatch::m_Add
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:988
ErrorHandling.h
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:167
llvm::IRBuilder<>
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
ValueTracking.h
Local.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::SCEVTypes
SCEVTypes
Definition: ScalarEvolutionExpressions.h:38
ScalarEvolution.h
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Module.h
llvm::scSMaxExpr
@ scSMaxExpr
Definition: ScalarEvolutionExpressions.h:50
Operator.h
llvm::scUnknown
@ scUnknown
Definition: ScalarEvolutionExpressions.h:55
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
convertToSCEVype
static SCEVTypes convertToSCEVype(MaxMinT &MM)
Definition: NaryReassociate.cpp:578
llvm::gep_type_begin
gep_type_iterator gep_type_begin(const User *GEP)
Definition: GetElementPtrTypeIterator.h:123
llvm::initializeNaryReassociateLegacyPassPass
void initializeNaryReassociateLegacyPassPass(PassRegistry &)
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
DepthFirstIterator.h
F
#define F(x, y, z)
Definition: MD5.cpp:55
reassociation
nary Nary reassociation
Definition: NaryReassociate.cpp:163
llvm::NaryReassociatePass
Definition: NaryReassociate.h:102
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Instruction.h
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Constants.h
llvm::computeOverflowForSignedAdd
OverflowResult computeOverflowForSignedAdd(const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr)
Definition: ValueTracking.cpp:5268
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::createNaryReassociatePass
FunctionPass * createNaryReassociatePass()
Definition: NaryReassociate.cpp:165
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
InstrTypes.h
llvm::NaryReassociatePass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: NaryReassociate.cpp:182
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
runImpl
static bool runImpl(const TargetLibraryInfo &TLI, Function &F)
Definition: ReplaceWithVeclib.cpp:176
llvm::scUMaxExpr
@ scUMaxExpr
Definition: ScalarEvolutionExpressions.h:49
TargetLibraryInfo.h
false
Definition: StackSlotColoring.cpp:141
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::Instruction
Definition: Instruction.h:42
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:302
llvm::Value::setName
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:372
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:919
llvm::ScalarEvolutionWrapperPass
Definition: ScalarEvolution.h:2145
PatternMatch.h
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Type.h
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
BasicBlock.h
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:75
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:468
uint64_t
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2514
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:173
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:173
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::Value::hasNUsesOrMore
bool hasNUsesOrMore(unsigned N) const
Return true if this value has N uses or more.
Definition: Value.cpp:149
llvm::AddOperator
Definition: Operator.h:347
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:929
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::generic_gep_type_iterator::getIndexedType
Type * getIndexedType() const
Definition: GetElementPtrTypeIterator.h:70
llvm::TargetTransformInfo::getGEPCost
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, TargetCostKind CostKind=TCK_SizeAndLatency) const
Estimate the cost of a GEP operation when lowered.
Definition: TargetTransformInfo.cpp:204
llvm::NaryReassociatePass::runImpl
bool runImpl(Function &F, AssumptionCache *AC_, DominatorTree *DT_, ScalarEvolution *SE_, TargetLibraryInfo *TLI_, TargetTransformInfo *TTI_)
Definition: NaryReassociate.cpp:199
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(NaryReassociateLegacyPass, "nary-reassociate", "Nary reassociation", false, false) INITIALIZE_PASS_END(NaryReassociateLegacyPass
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::ZExtInst
This class represents zero extension of integer types.
Definition: Instructions.h:4819
llvm::TargetTransformInfo::TCC_Free
@ TCC_Free
Expected to fold away in lowering.
Definition: TargetTransformInfo.h:261
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:202
llvm::BinaryOperator
Definition: InstrTypes.h:188
Mul
BinaryOperator * Mul
Definition: X86PartialReduction.cpp:70
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1612
DataLayout.h
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:263
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:113
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::generic_gep_type_iterator::isSequential
bool isSequential() const
Definition: GetElementPtrTypeIterator.h:112
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::SExtInst
This class represents a sign extension of integer types.
Definition: Instructions.h:4858
ValueHandle.h
llvm::depth_first
iterator_range< df_iterator< T > > depth_first(const T &G)
Definition: DepthFirstIterator.h:230
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
Casting.h
Function.h
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:222
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:185
ScalarEvolutionExpressions.h
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:267
Instructions.h
llvm::PreservedAnalyses::preserveSet
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:188
SmallVector.h
Dominators.h
CreateMul
static BinaryOperator * CreateMul(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
Definition: Reassociate.cpp:243
TargetTransformInfo.h
isGEPFoldable
static bool isGEPFoldable(GetElementPtrInst *GEP, const TargetTransformInfo *TTI)
Definition: NaryReassociate.cpp:328
DerivedTypes.h
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:172
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
CreateAdd
static BinaryOperator * CreateAdd(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
Definition: Reassociate.cpp:231
llvm::Value::takeName
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:378
llvm::scUMinExpr
@ scUMinExpr
Definition: ScalarEvolutionExpressions.h:51
Value.h
InitializePasses.h
llvm::RecursivelyDeleteTriviallyDeadInstructionsPermissive
bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
Definition: Local.cpp:532
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:443
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
reassociate
nary reassociate
Definition: NaryReassociate.cpp:162
llvm::PatternMatch::m_Mul
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1055
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37
llvm::Twine::concat
Twine concat(const Twine &Suffix) const
Definition: Twine.h:510