LLVM  9.0.0svn
LICM.cpp
Go to the documentation of this file.
1 //===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs loop invariant code motion, attempting to remove as much
10 // code from the body of a loop as possible. It does this by either hoisting
11 // code into the preheader block, or by sinking code to the exit blocks if it is
12 // safe. This pass also promotes must-aliased memory locations in the loop to
13 // live in registers, thus hoisting and sinking "invariant" loads and stores.
14 //
15 // This pass uses alias analysis for two purposes:
16 //
17 // 1. Moving loop invariant loads and calls out of loops. If we can determine
18 // that a load or call inside of a loop never aliases anything stored to,
19 // we can hoist it or sink it like any other instruction.
20 // 2. Scalar Promotion of Memory - If there is a store instruction inside of
21 // the loop, we try to move the store to happen AFTER the loop instead of
22 // inside of the loop. This can only happen if a few conditions are true:
23 // A. The pointer stored through is loop invariant
24 // B. There are no stores or loads in the loop which _may_ alias the
25 // pointer. There are no calls in the loop which mod/ref the pointer.
26 // If these conditions are true, we can promote the loads and stores in the
27 // loop of the pointer to use a temporary alloca'd variable. We then use
28 // the SSAUpdater to construct the appropriate SSA form for the value.
29 //
30 //===----------------------------------------------------------------------===//
31 
33 #include "llvm/ADT/SetOperations.h"
34 #include "llvm/ADT/Statistic.h"
42 #include "llvm/Analysis/Loads.h"
43 #include "llvm/Analysis/LoopInfo.h"
45 #include "llvm/Analysis/LoopPass.h"
54 #include "llvm/IR/CFG.h"
55 #include "llvm/IR/Constants.h"
56 #include "llvm/IR/DataLayout.h"
57 #include "llvm/IR/DerivedTypes.h"
58 #include "llvm/IR/Dominators.h"
59 #include "llvm/IR/Instructions.h"
60 #include "llvm/IR/IntrinsicInst.h"
61 #include "llvm/IR/LLVMContext.h"
62 #include "llvm/IR/Metadata.h"
63 #include "llvm/IR/PatternMatch.h"
66 #include "llvm/Support/Debug.h"
68 #include "llvm/Transforms/Scalar.h"
74 #include <algorithm>
75 #include <utility>
76 using namespace llvm;
77 
78 #define DEBUG_TYPE "licm"
79 
80 STATISTIC(NumCreatedBlocks, "Number of blocks created");
81 STATISTIC(NumClonedBranches, "Number of branches cloned");
82 STATISTIC(NumSunk, "Number of instructions sunk out of loop");
83 STATISTIC(NumHoisted, "Number of instructions hoisted out of loop");
84 STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
85 STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
86 STATISTIC(NumPromoted, "Number of memory locations promoted to registers");
87 
88 /// Memory promotion is enabled by default.
89 static cl::opt<bool>
90  DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false),
91  cl::desc("Disable memory promotion in LICM pass"));
92 
94  "licm-control-flow-hoisting", cl::Hidden, cl::init(false),
95  cl::desc("Enable control flow (and PHI) hoisting in LICM"));
96 
98  "licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
99  cl::desc("Max num uses visited for identifying load "
100  "invariance in loop using invariant start (default = 8)"));
101 
102 // Default value of zero implies we use the regular alias set tracker mechanism
103 // instead of the cross product using AA to identify aliasing of the memory
104 // location we are interested in.
105 static cl::opt<int>
106 LICMN2Theshold("licm-n2-threshold", cl::Hidden, cl::init(0),
107  cl::desc("How many instruction to cross product using AA"));
108 
109 // Experimental option to allow imprecision in LICM in pathological cases, in
110 // exchange for faster compile. This is to be removed if MemorySSA starts to
111 // address the same issue. This flag applies only when LICM uses MemorySSA
112 // instead on AliasSetTracker. LICM calls MemorySSAWalker's
113 // getClobberingMemoryAccess, up to the value of the Cap, getting perfect
114 // accuracy. Afterwards, LICM will call into MemorySSA's getDefiningAccess,
115 // which may not be precise, since optimizeUses is capped. The result is
116 // correct, but we may not get as "far up" as possible to get which access is
117 // clobbering the one queried.
119  "licm-mssa-optimization-cap", cl::init(100), cl::Hidden,
120  cl::desc("Enable imprecision in LICM in pathological cases, in exchange "
121  "for faster compile. Caps the MemorySSA clobbering calls."));
122 
123 // Experimentally, memory promotion carries less importance than sinking and
124 // hoisting. Limit when we do promotion when using MemorySSA, in order to save
125 // compile time.
127  "max-acc-licm-promotion", cl::init(250), cl::Hidden,
128  cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no "
129  "effect. When MSSA in LICM is enabled, then this is the maximum "
130  "number of accesses allowed to be present in a loop in order to "
131  "enable memory promotion."));
132 
133 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
134 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
135  const LoopSafetyInfo *SafetyInfo,
136  TargetTransformInfo *TTI, bool &FreeInLoop);
137 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
138  BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
140 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
141  const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
143  bool FreeInLoop);
145  const DominatorTree *DT,
146  const Loop *CurLoop,
147  const LoopSafetyInfo *SafetyInfo,
149  const Instruction *CtxI = nullptr);
150 static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
151  AliasSetTracker *CurAST, Loop *CurLoop,
152  AliasAnalysis *AA);
154  Loop *CurLoop,
155  int &LicmMssaOptCounter);
157  Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
158  const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU);
159 
160 static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
161  AliasSetTracker *AST, MemorySSAUpdater *MSSAU);
162 
163 static void moveInstructionBefore(Instruction &I, Instruction &Dest,
164  ICFLoopSafetyInfo &SafetyInfo,
165  MemorySSAUpdater *MSSAU);
166 
167 namespace {
168 struct LoopInvariantCodeMotion {
170  bool runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
172  ScalarEvolution *SE, MemorySSA *MSSA,
173  OptimizationRemarkEmitter *ORE, bool DeleteAST);
174 
175  ASTrackerMapTy &getLoopToAliasSetMap() { return LoopToAliasSetMap; }
176 
177 private:
178  ASTrackerMapTy LoopToAliasSetMap;
179 
180  std::unique_ptr<AliasSetTracker>
181  collectAliasInfoForLoop(Loop *L, LoopInfo *LI, AliasAnalysis *AA);
182  std::unique_ptr<AliasSetTracker>
183  collectAliasInfoForLoopWithMSSA(Loop *L, AliasAnalysis *AA,
184  MemorySSAUpdater *MSSAU);
185 };
186 
187 struct LegacyLICMPass : public LoopPass {
188  static char ID; // Pass identification, replacement for typeid
189  LegacyLICMPass() : LoopPass(ID) {
191  }
192 
193  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
194  if (skipLoop(L)) {
195  // If we have run LICM on a previous loop but now we are skipping
196  // (because we've hit the opt-bisect limit), we need to clear the
197  // loop alias information.
198  LICM.getLoopToAliasSetMap().clear();
199  return false;
200  }
201 
202  auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
204  ? (&getAnalysis<MemorySSAWrapperPass>().getMSSA())
205  : nullptr;
206  // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
207  // pass. Function analyses need to be preserved across loop transformations
208  // but ORE cannot be preserved (see comment before the pass definition).
210  return LICM.runOnLoop(L,
211  &getAnalysis<AAResultsWrapperPass>().getAAResults(),
212  &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
213  &getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
214  &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
215  &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
216  *L->getHeader()->getParent()),
217  SE ? &SE->getSE() : nullptr, MSSA, &ORE, false);
218  }
219 
220  /// This transformation requires natural loop information & requires that
221  /// loop preheaders be inserted into the CFG...
222  ///
223  void getAnalysisUsage(AnalysisUsage &AU) const override {
230  }
233  }
234 
236 
237  bool doFinalization() override {
238  assert(LICM.getLoopToAliasSetMap().empty() &&
239  "Didn't free loop alias sets");
240  return false;
241  }
242 
243 private:
244  LoopInvariantCodeMotion LICM;
245 
246  /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
247  void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To,
248  Loop *L) override;
249 
250  /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
251  /// set.
252  void deleteAnalysisValue(Value *V, Loop *L) override;
253 
254  /// Simple Analysis hook. Delete loop L from alias set map.
255  void deleteAnalysisLoop(Loop *L) override;
256 };
257 } // namespace
258 
261  const auto &FAM =
262  AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
263  Function *F = L.getHeader()->getParent();
264 
265  auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
266  // FIXME: This should probably be optional rather than required.
267  if (!ORE)
268  report_fatal_error("LICM: OptimizationRemarkEmitterAnalysis not "
269  "cached at a higher level");
270 
271  LoopInvariantCodeMotion LICM;
272  if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.TTI, &AR.SE,
273  AR.MSSA, ORE, true))
274  return PreservedAnalyses::all();
275 
276  auto PA = getLoopPassPreservedAnalyses();
277 
278  PA.preserve<DominatorTreeAnalysis>();
279  PA.preserve<LoopAnalysis>();
280 
281  return PA;
282 }
283 
284 char LegacyLICMPass::ID = 0;
285 INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion",
286  false, false)
291 INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
292  false)
293 
294 Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }
295 
296 /// Hoist expressions out of the specified loop. Note, alias info for inner
297 /// loop is not preserved so it is not a good idea to run LICM multiple
298 /// times on one loop.
299 /// We should delete AST for inner loops in the new pass manager to avoid
300 /// memory leak.
301 ///
302 bool LoopInvariantCodeMotion::runOnLoop(
303  Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
305  MemorySSA *MSSA, OptimizationRemarkEmitter *ORE, bool DeleteAST) {
306  bool Changed = false;
307 
308  assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
309 
310  std::unique_ptr<AliasSetTracker> CurAST;
311  std::unique_ptr<MemorySSAUpdater> MSSAU;
312  bool NoOfMemAccTooLarge = false;
313  int LicmMssaOptCounter = 0;
314 
315  if (!MSSA) {
316  LLVM_DEBUG(dbgs() << "LICM: Using Alias Set Tracker.\n");
317  CurAST = collectAliasInfoForLoop(L, LI, AA);
318  } else {
319  LLVM_DEBUG(dbgs() << "LICM: Using MemorySSA.\n");
320  MSSAU = make_unique<MemorySSAUpdater>(MSSA);
321 
322  unsigned AccessCapCount = 0;
323  for (auto *BB : L->getBlocks()) {
324  if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
325  for (const auto &MA : *Accesses) {
326  (void)MA;
327  AccessCapCount++;
328  if (AccessCapCount > AccessCapForMSSAPromotion) {
329  NoOfMemAccTooLarge = true;
330  break;
331  }
332  }
333  }
334  if (NoOfMemAccTooLarge)
335  break;
336  }
337  }
338 
339  // Get the preheader block to move instructions into...
340  BasicBlock *Preheader = L->getLoopPreheader();
341 
342  // Compute loop safety information.
343  ICFLoopSafetyInfo SafetyInfo(DT);
344  SafetyInfo.computeLoopSafetyInfo(L);
345 
346  // We want to visit all of the instructions in this loop... that are not parts
347  // of our subloops (they have already had their invariants hoisted out of
348  // their loop, into this loop, so there is no need to process the BODIES of
349  // the subloops).
350  //
351  // Traverse the body of the loop in depth first order on the dominator tree so
352  // that we are guaranteed to see definitions before we see uses. This allows
353  // us to sink instructions in one pass, without iteration. After sinking
354  // instructions, we perform another pass to hoist them out of the loop.
355  //
356  if (L->hasDedicatedExits())
357  Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
358  CurAST.get(), MSSAU.get(), &SafetyInfo,
359  NoOfMemAccTooLarge, LicmMssaOptCounter, ORE);
360  if (Preheader)
361  Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
362  CurAST.get(), MSSAU.get(), &SafetyInfo,
363  NoOfMemAccTooLarge, LicmMssaOptCounter, ORE);
364 
365  // Now that all loop invariants have been removed from the loop, promote any
366  // memory references to scalars that we can.
367  // Don't sink stores from loops without dedicated block exits. Exits
368  // containing indirect branches are not transformed by loop simplify,
369  // make sure we catch that. An additional load may be generated in the
370  // preheader for SSA updater, so also avoid sinking when no preheader
371  // is available.
372  if (!DisablePromotion && Preheader && L->hasDedicatedExits() &&
373  !NoOfMemAccTooLarge) {
374  // Figure out the loop exits and their insertion points
375  SmallVector<BasicBlock *, 8> ExitBlocks;
376  L->getUniqueExitBlocks(ExitBlocks);
377 
378  // We can't insert into a catchswitch.
379  bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) {
380  return isa<CatchSwitchInst>(Exit->getTerminator());
381  });
382 
383  if (!HasCatchSwitch) {
385  SmallVector<MemoryAccess *, 8> MSSAInsertPts;
386  InsertPts.reserve(ExitBlocks.size());
387  if (MSSAU)
388  MSSAInsertPts.reserve(ExitBlocks.size());
389  for (BasicBlock *ExitBlock : ExitBlocks) {
390  InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
391  if (MSSAU)
392  MSSAInsertPts.push_back(nullptr);
393  }
394 
395  PredIteratorCache PIC;
396 
397  bool Promoted = false;
398 
399  // Build an AST using MSSA.
400  if (!CurAST.get())
401  CurAST = collectAliasInfoForLoopWithMSSA(L, AA, MSSAU.get());
402 
403  // Loop over all of the alias sets in the tracker object.
404  for (AliasSet &AS : *CurAST) {
405  // We can promote this alias set if it has a store, if it is a "Must"
406  // alias set, if the pointer is loop invariant, and if we are not
407  // eliminating any volatile loads or stores.
408  if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
409  !L->isLoopInvariant(AS.begin()->getValue()))
410  continue;
411 
412  assert(
413  !AS.empty() &&
414  "Must alias set should have at least one pointer element in it!");
415 
416  SmallSetVector<Value *, 8> PointerMustAliases;
417  for (const auto &ASI : AS)
418  PointerMustAliases.insert(ASI.getValue());
419 
420  Promoted |= promoteLoopAccessesToScalars(
421  PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
422  DT, TLI, L, CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
423  }
424 
425  // Once we have promoted values across the loop body we have to
426  // recursively reform LCSSA as any nested loop may now have values defined
427  // within the loop used in the outer loop.
428  // FIXME: This is really heavy handed. It would be a bit better to use an
429  // SSAUpdater strategy during promotion that was LCSSA aware and reformed
430  // it as it went.
431  if (Promoted)
432  formLCSSARecursively(*L, *DT, LI, SE);
433 
434  Changed |= Promoted;
435  }
436  }
437 
438  // Check that neither this loop nor its parent have had LCSSA broken. LICM is
439  // specifically moving instructions across the loop boundary and so it is
440  // especially in need of sanity checking here.
441  assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!");
442  assert((!L->getParentLoop() || L->getParentLoop()->isLCSSAForm(*DT)) &&
443  "Parent loop not left in LCSSA form after LICM!");
444 
445  // If this loop is nested inside of another one, save the alias information
446  // for when we process the outer loop.
447  if (!MSSAU.get() && CurAST.get() && L->getParentLoop() && !DeleteAST)
448  LoopToAliasSetMap[L] = std::move(CurAST);
449 
450  if (MSSAU.get() && VerifyMemorySSA)
451  MSSAU->getMemorySSA()->verifyMemorySSA();
452 
453  if (Changed && SE)
454  SE->forgetLoopDispositions(L);
455  return Changed;
456 }
457 
458 /// Walk the specified region of the CFG (defined by all blocks dominated by
459 /// the specified block, and that are in the current loop) in reverse depth
460 /// first order w.r.t the DominatorTree. This allows us to visit uses before
461 /// definitions, allowing us to sink a loop body in one pass without iteration.
462 ///
465  TargetTransformInfo *TTI, Loop *CurLoop,
466  AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
467  ICFLoopSafetyInfo *SafetyInfo, bool NoOfMemAccTooLarge,
468  int &LicmMssaOptCounter, OptimizationRemarkEmitter *ORE) {
469 
470  // Verify inputs.
471  assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
472  CurLoop != nullptr && SafetyInfo != nullptr &&
473  "Unexpected input to sinkRegion.");
474  assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
475  "Either AliasSetTracker or MemorySSA should be initialized.");
476 
477  // We want to visit children before parents. We will enque all the parents
478  // before their children in the worklist and process the worklist in reverse
479  // order.
481 
482  bool Changed = false;
483  for (DomTreeNode *DTN : reverse(Worklist)) {
484  BasicBlock *BB = DTN->getBlock();
485  // Only need to process the contents of this block if it is not part of a
486  // subloop (which would already have been processed).
487  if (inSubLoop(BB, CurLoop, LI))
488  continue;
489 
490  for (BasicBlock::iterator II = BB->end(); II != BB->begin();) {
491  Instruction &I = *--II;
492 
493  // If the instruction is dead, we would try to sink it because it isn't
494  // used in the loop, instead, just delete it.
495  if (isInstructionTriviallyDead(&I, TLI)) {
496  LLVM_DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
497  salvageDebugInfo(I);
498  ++II;
499  eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
500  Changed = true;
501  continue;
502  }
503 
504  // Check to see if we can sink this instruction to the exit blocks
505  // of the loop. We can do this if the all users of the instruction are
506  // outside of the loop. In this case, it doesn't even matter if the
507  // operands of the instruction are loop invariant.
508  //
509  bool FreeInLoop = false;
510  if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) &&
511  canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true,
512  NoOfMemAccTooLarge, &LicmMssaOptCounter, ORE) &&
513  !I.mayHaveSideEffects()) {
514  if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE, FreeInLoop)) {
515  if (!FreeInLoop) {
516  ++II;
517  eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
518  }
519  Changed = true;
520  }
521  }
522  }
523  }
524  if (MSSAU && VerifyMemorySSA)
525  MSSAU->getMemorySSA()->verifyMemorySSA();
526  return Changed;
527 }
528 
529 namespace {
530 // This is a helper class for hoistRegion to make it able to hoist control flow
531 // in order to be able to hoist phis. The way this works is that we initially
532 // start hoisting to the loop preheader, and when we see a loop invariant branch
533 // we make note of this. When we then come to hoist an instruction that's
534 // conditional on such a branch we duplicate the branch and the relevant control
535 // flow, then hoist the instruction into the block corresponding to its original
536 // block in the duplicated control flow.
537 class ControlFlowHoister {
538 private:
539  // Information about the loop we are hoisting from
540  LoopInfo *LI;
541  DominatorTree *DT;
542  Loop *CurLoop;
543  MemorySSAUpdater *MSSAU;
544 
545  // A map of blocks in the loop to the block their instructions will be hoisted
546  // to.
547  DenseMap<BasicBlock *, BasicBlock *> HoistDestinationMap;
548 
549  // The branches that we can hoist, mapped to the block that marks a
550  // convergence point of their control flow.
551  DenseMap<BranchInst *, BasicBlock *> HoistableBranches;
552 
553 public:
554  ControlFlowHoister(LoopInfo *LI, DominatorTree *DT, Loop *CurLoop,
555  MemorySSAUpdater *MSSAU)
556  : LI(LI), DT(DT), CurLoop(CurLoop), MSSAU(MSSAU) {}
557 
558  void registerPossiblyHoistableBranch(BranchInst *BI) {
559  // We can only hoist conditional branches with loop invariant operands.
560  if (!ControlFlowHoisting || !BI->isConditional() ||
561  !CurLoop->hasLoopInvariantOperands(BI))
562  return;
563 
564  // The branch destinations need to be in the loop, and we don't gain
565  // anything by duplicating conditional branches with duplicate successors,
566  // as it's essentially the same as an unconditional branch.
567  BasicBlock *TrueDest = BI->getSuccessor(0);
568  BasicBlock *FalseDest = BI->getSuccessor(1);
569  if (!CurLoop->contains(TrueDest) || !CurLoop->contains(FalseDest) ||
570  TrueDest == FalseDest)
571  return;
572 
573  // We can hoist BI if one branch destination is the successor of the other,
574  // or both have common successor which we check by seeing if the
575  // intersection of their successors is non-empty.
576  // TODO: This could be expanded to allowing branches where both ends
577  // eventually converge to a single block.
578  SmallPtrSet<BasicBlock *, 4> TrueDestSucc, FalseDestSucc;
579  TrueDestSucc.insert(succ_begin(TrueDest), succ_end(TrueDest));
580  FalseDestSucc.insert(succ_begin(FalseDest), succ_end(FalseDest));
581  BasicBlock *CommonSucc = nullptr;
582  if (TrueDestSucc.count(FalseDest)) {
583  CommonSucc = FalseDest;
584  } else if (FalseDestSucc.count(TrueDest)) {
585  CommonSucc = TrueDest;
586  } else {
587  set_intersect(TrueDestSucc, FalseDestSucc);
588  // If there's one common successor use that.
589  if (TrueDestSucc.size() == 1)
590  CommonSucc = *TrueDestSucc.begin();
591  // If there's more than one pick whichever appears first in the block list
592  // (we can't use the value returned by TrueDestSucc.begin() as it's
593  // unpredicatable which element gets returned).
594  else if (!TrueDestSucc.empty()) {
595  Function *F = TrueDest->getParent();
596  auto IsSucc = [&](BasicBlock &BB) { return TrueDestSucc.count(&BB); };
597  auto It = std::find_if(F->begin(), F->end(), IsSucc);
598  assert(It != F->end() && "Could not find successor in function");
599  CommonSucc = &*It;
600  }
601  }
602  // The common successor has to be dominated by the branch, as otherwise
603  // there will be some other path to the successor that will not be
604  // controlled by this branch so any phi we hoist would be controlled by the
605  // wrong condition. This also takes care of avoiding hoisting of loop back
606  // edges.
607  // TODO: In some cases this could be relaxed if the successor is dominated
608  // by another block that's been hoisted and we can guarantee that the
609  // control flow has been replicated exactly.
610  if (CommonSucc && DT->dominates(BI, CommonSucc))
611  HoistableBranches[BI] = CommonSucc;
612  }
613 
614  bool canHoistPHI(PHINode *PN) {
615  // The phi must have loop invariant operands.
616  if (!ControlFlowHoisting || !CurLoop->hasLoopInvariantOperands(PN))
617  return false;
618  // We can hoist phis if the block they are in is the target of hoistable
619  // branches which cover all of the predecessors of the block.
620  SmallPtrSet<BasicBlock *, 8> PredecessorBlocks;
621  BasicBlock *BB = PN->getParent();
622  for (BasicBlock *PredBB : predecessors(BB))
623  PredecessorBlocks.insert(PredBB);
624  // If we have less predecessor blocks than predecessors then the phi will
625  // have more than one incoming value for the same block which we can't
626  // handle.
627  // TODO: This could be handled be erasing some of the duplicate incoming
628  // values.
629  if (PredecessorBlocks.size() != pred_size(BB))
630  return false;
631  for (auto &Pair : HoistableBranches) {
632  if (Pair.second == BB) {
633  // Which blocks are predecessors via this branch depends on if the
634  // branch is triangle-like or diamond-like.
635  if (Pair.first->getSuccessor(0) == BB) {
636  PredecessorBlocks.erase(Pair.first->getParent());
637  PredecessorBlocks.erase(Pair.first->getSuccessor(1));
638  } else if (Pair.first->getSuccessor(1) == BB) {
639  PredecessorBlocks.erase(Pair.first->getParent());
640  PredecessorBlocks.erase(Pair.first->getSuccessor(0));
641  } else {
642  PredecessorBlocks.erase(Pair.first->getSuccessor(0));
643  PredecessorBlocks.erase(Pair.first->getSuccessor(1));
644  }
645  }
646  }
647  // PredecessorBlocks will now be empty if for every predecessor of BB we
648  // found a hoistable branch source.
649  return PredecessorBlocks.empty();
650  }
651 
652  BasicBlock *getOrCreateHoistedBlock(BasicBlock *BB) {
653  if (!ControlFlowHoisting)
654  return CurLoop->getLoopPreheader();
655  // If BB has already been hoisted, return that
656  if (HoistDestinationMap.count(BB))
657  return HoistDestinationMap[BB];
658 
659  // Check if this block is conditional based on a pending branch
660  auto HasBBAsSuccessor =
662  return BB != Pair.second && (Pair.first->getSuccessor(0) == BB ||
663  Pair.first->getSuccessor(1) == BB);
664  };
665  auto It = std::find_if(HoistableBranches.begin(), HoistableBranches.end(),
666  HasBBAsSuccessor);
667 
668  // If not involved in a pending branch, hoist to preheader
669  BasicBlock *InitialPreheader = CurLoop->getLoopPreheader();
670  if (It == HoistableBranches.end()) {
671  LLVM_DEBUG(dbgs() << "LICM using " << InitialPreheader->getName()
672  << " as hoist destination for " << BB->getName()
673  << "\n");
674  HoistDestinationMap[BB] = InitialPreheader;
675  return InitialPreheader;
676  }
677  BranchInst *BI = It->first;
678  assert(std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor) ==
679  HoistableBranches.end() &&
680  "BB is expected to be the target of at most one branch");
681 
682  LLVMContext &C = BB->getContext();
683  BasicBlock *TrueDest = BI->getSuccessor(0);
684  BasicBlock *FalseDest = BI->getSuccessor(1);
685  BasicBlock *CommonSucc = HoistableBranches[BI];
686  BasicBlock *HoistTarget = getOrCreateHoistedBlock(BI->getParent());
687 
688  // Create hoisted versions of blocks that currently don't have them
689  auto CreateHoistedBlock = [&](BasicBlock *Orig) {
690  if (HoistDestinationMap.count(Orig))
691  return HoistDestinationMap[Orig];
692  BasicBlock *New =
693  BasicBlock::Create(C, Orig->getName() + ".licm", Orig->getParent());
694  HoistDestinationMap[Orig] = New;
695  DT->addNewBlock(New, HoistTarget);
696  if (CurLoop->getParentLoop())
697  CurLoop->getParentLoop()->addBasicBlockToLoop(New, *LI);
698  ++NumCreatedBlocks;
699  LLVM_DEBUG(dbgs() << "LICM created " << New->getName()
700  << " as hoist destination for " << Orig->getName()
701  << "\n");
702  return New;
703  };
704  BasicBlock *HoistTrueDest = CreateHoistedBlock(TrueDest);
705  BasicBlock *HoistFalseDest = CreateHoistedBlock(FalseDest);
706  BasicBlock *HoistCommonSucc = CreateHoistedBlock(CommonSucc);
707 
708  // Link up these blocks with branches.
709  if (!HoistCommonSucc->getTerminator()) {
710  // The new common successor we've generated will branch to whatever that
711  // hoist target branched to.
712  BasicBlock *TargetSucc = HoistTarget->getSingleSuccessor();
713  assert(TargetSucc && "Expected hoist target to have a single successor");
714  HoistCommonSucc->moveBefore(TargetSucc);
715  BranchInst::Create(TargetSucc, HoistCommonSucc);
716  }
717  if (!HoistTrueDest->getTerminator()) {
718  HoistTrueDest->moveBefore(HoistCommonSucc);
719  BranchInst::Create(HoistCommonSucc, HoistTrueDest);
720  }
721  if (!HoistFalseDest->getTerminator()) {
722  HoistFalseDest->moveBefore(HoistCommonSucc);
723  BranchInst::Create(HoistCommonSucc, HoistFalseDest);
724  }
725 
726  // If BI is being cloned to what was originally the preheader then
727  // HoistCommonSucc will now be the new preheader.
728  if (HoistTarget == InitialPreheader) {
729  // Phis in the loop header now need to use the new preheader.
730  InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc);
731  if (MSSAU)
733  HoistTarget->getSingleSuccessor(), HoistCommonSucc, {HoistTarget});
734  // The new preheader dominates the loop header.
735  DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc);
736  DomTreeNode *HeaderNode = DT->getNode(CurLoop->getHeader());
737  DT->changeImmediateDominator(HeaderNode, PreheaderNode);
738  // The preheader hoist destination is now the new preheader, with the
739  // exception of the hoist destination of this branch.
740  for (auto &Pair : HoistDestinationMap)
741  if (Pair.second == InitialPreheader && Pair.first != BI->getParent())
742  Pair.second = HoistCommonSucc;
743  }
744 
745  // Now finally clone BI.
747  HoistTarget->getTerminator(),
748  BranchInst::Create(HoistTrueDest, HoistFalseDest, BI->getCondition()));
749  ++NumClonedBranches;
750 
751  assert(CurLoop->getLoopPreheader() &&
752  "Hoisting blocks should not have destroyed preheader");
753  return HoistDestinationMap[BB];
754  }
755 };
756 } // namespace
757 
758 /// Walk the specified region of the CFG (defined by all blocks dominated by
759 /// the specified block, and that are in the current loop) in depth first
760 /// order w.r.t the DominatorTree. This allows us to visit definitions before
761 /// uses, allowing us to hoist a loop body in one pass without iteration.
762 ///
764  DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
765  AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
766  ICFLoopSafetyInfo *SafetyInfo, bool NoOfMemAccTooLarge,
767  int &LicmMssaOptCounter,
769  // Verify inputs.
770  assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
771  CurLoop != nullptr && SafetyInfo != nullptr &&
772  "Unexpected input to hoistRegion.");
773  assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
774  "Either AliasSetTracker or MemorySSA should be initialized.");
775 
776  ControlFlowHoister CFH(LI, DT, CurLoop, MSSAU);
777 
778  // Keep track of instructions that have been hoisted, as they may need to be
779  // re-hoisted if they end up not dominating all of their uses.
780  SmallVector<Instruction *, 16> HoistedInstructions;
781 
782  // For PHI hoisting to work we need to hoist blocks before their successors.
783  // We can do this by iterating through the blocks in the loop in reverse
784  // post-order.
785  LoopBlocksRPO Worklist(CurLoop);
786  Worklist.perform(LI);
787  bool Changed = false;
788  for (BasicBlock *BB : Worklist) {
789  // Only need to process the contents of this block if it is not part of a
790  // subloop (which would already have been processed).
791  if (inSubLoop(BB, CurLoop, LI))
792  continue;
793 
794  for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
795  Instruction &I = *II++;
796  // Try constant folding this instruction. If all the operands are
797  // constants, it is technically hoistable, but it would be better to
798  // just fold it.
800  &I, I.getModule()->getDataLayout(), TLI)) {
801  LLVM_DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C
802  << '\n');
803  if (CurAST)
804  CurAST->copyValue(&I, C);
805  // FIXME MSSA: Such replacements may make accesses unoptimized (D51960).
807  if (isInstructionTriviallyDead(&I, TLI))
808  eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
809  Changed = true;
810  continue;
811  }
812 
813  // Try hoisting the instruction out to the preheader. We can only do
814  // this if all of the operands of the instruction are loop invariant and
815  // if it is safe to hoist the instruction.
816  // TODO: It may be safe to hoist if we are hoisting to a conditional block
817  // and we have accurately duplicated the control flow from the loop header
818  // to that block.
819  if (CurLoop->hasLoopInvariantOperands(&I) &&
820  canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true,
821  NoOfMemAccTooLarge, &LicmMssaOptCounter, ORE) &&
823  I, DT, CurLoop, SafetyInfo, ORE,
824  CurLoop->getLoopPreheader()->getTerminator())) {
825  hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
826  MSSAU, ORE);
827  HoistedInstructions.push_back(&I);
828  Changed = true;
829  continue;
830  }
831 
832  // Attempt to remove floating point division out of the loop by
833  // converting it to a reciprocal multiplication.
834  if (I.getOpcode() == Instruction::FDiv &&
835  CurLoop->isLoopInvariant(I.getOperand(1)) &&
836  I.hasAllowReciprocal()) {
837  auto Divisor = I.getOperand(1);
838  auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0);
839  auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor);
840  ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags());
841  SafetyInfo->insertInstructionTo(ReciprocalDivisor, I.getParent());
842  ReciprocalDivisor->insertBefore(&I);
843 
844  auto Product =
845  BinaryOperator::CreateFMul(I.getOperand(0), ReciprocalDivisor);
846  Product->setFastMathFlags(I.getFastMathFlags());
847  SafetyInfo->insertInstructionTo(Product, I.getParent());
848  Product->insertAfter(&I);
849  I.replaceAllUsesWith(Product);
850  eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
851 
852  hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB),
853  SafetyInfo, MSSAU, ORE);
854  HoistedInstructions.push_back(ReciprocalDivisor);
855  Changed = true;
856  continue;
857  }
858 
859  auto IsInvariantStart = [&](Instruction &I) {
860  using namespace PatternMatch;
861  return I.use_empty() &&
862  match(&I, m_Intrinsic<Intrinsic::invariant_start>());
863  };
864  auto MustExecuteWithoutWritesBefore = [&](Instruction &I) {
865  return SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) &&
866  SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop);
867  };
868  if ((IsInvariantStart(I) || isGuard(&I)) &&
869  CurLoop->hasLoopInvariantOperands(&I) &&
870  MustExecuteWithoutWritesBefore(I)) {
871  hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
872  MSSAU, ORE);
873  HoistedInstructions.push_back(&I);
874  Changed = true;
875  continue;
876  }
877 
878  if (PHINode *PN = dyn_cast<PHINode>(&I)) {
879  if (CFH.canHoistPHI(PN)) {
880  // Redirect incoming blocks first to ensure that we create hoisted
881  // versions of those blocks before we hoist the phi.
882  for (unsigned int i = 0; i < PN->getNumIncomingValues(); ++i)
883  PN->setIncomingBlock(
884  i, CFH.getOrCreateHoistedBlock(PN->getIncomingBlock(i)));
885  hoist(*PN, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
886  MSSAU, ORE);
887  assert(DT->dominates(PN, BB) && "Conditional PHIs not expected");
888  Changed = true;
889  continue;
890  }
891  }
892 
893  // Remember possibly hoistable branches so we can actually hoist them
894  // later if needed.
895  if (BranchInst *BI = dyn_cast<BranchInst>(&I))
896  CFH.registerPossiblyHoistableBranch(BI);
897  }
898  }
899 
900  // If we hoisted instructions to a conditional block they may not dominate
901  // their uses that weren't hoisted (such as phis where some operands are not
902  // loop invariant). If so make them unconditional by moving them to their
903  // immediate dominator. We iterate through the instructions in reverse order
904  // which ensures that when we rehoist an instruction we rehoist its operands,
905  // and also keep track of where in the block we are rehoisting to to make sure
906  // that we rehoist instructions before the instructions that use them.
907  Instruction *HoistPoint = nullptr;
908  if (ControlFlowHoisting) {
909  for (Instruction *I : reverse(HoistedInstructions)) {
910  if (!llvm::all_of(I->uses(),
911  [&](Use &U) { return DT->dominates(I, U); })) {
912  BasicBlock *Dominator =
913  DT->getNode(I->getParent())->getIDom()->getBlock();
914  if (!HoistPoint || !DT->dominates(HoistPoint->getParent(), Dominator)) {
915  if (HoistPoint)
916  assert(DT->dominates(Dominator, HoistPoint->getParent()) &&
917  "New hoist point expected to dominate old hoist point");
918  HoistPoint = Dominator->getTerminator();
919  }
920  LLVM_DEBUG(dbgs() << "LICM rehoisting to "
921  << HoistPoint->getParent()->getName()
922  << ": " << *I << "\n");
923  moveInstructionBefore(*I, *HoistPoint, *SafetyInfo, MSSAU);
924  HoistPoint = I;
925  Changed = true;
926  }
927  }
928  }
929  if (MSSAU && VerifyMemorySSA)
930  MSSAU->getMemorySSA()->verifyMemorySSA();
931 
932  // Now that we've finished hoisting make sure that LI and DT are still
933  // valid.
934 #ifndef NDEBUG
935  if (Changed) {
937  "Dominator tree verification failed");
938  LI->verify(*DT);
939  }
940 #endif
941 
942  return Changed;
943 }
944 
945 // Return true if LI is invariant within scope of the loop. LI is invariant if
946 // CurLoop is dominated by an invariant.start representing the same memory
947 // location and size as the memory location LI loads from, and also the
948 // invariant.start has no uses.
950  Loop *CurLoop) {
951  Value *Addr = LI->getOperand(0);
952  const DataLayout &DL = LI->getModule()->getDataLayout();
953  const uint32_t LocSizeInBits = DL.getTypeSizeInBits(
954  cast<PointerType>(Addr->getType())->getElementType());
955 
956  // if the type is i8 addrspace(x)*, we know this is the type of
957  // llvm.invariant.start operand
958  auto *PtrInt8Ty = PointerType::get(Type::getInt8Ty(LI->getContext()),
959  LI->getPointerAddressSpace());
960  unsigned BitcastsVisited = 0;
961  // Look through bitcasts until we reach the i8* type (this is invariant.start
962  // operand type).
963  while (Addr->getType() != PtrInt8Ty) {
964  auto *BC = dyn_cast<BitCastInst>(Addr);
965  // Avoid traversing high number of bitcast uses.
966  if (++BitcastsVisited > MaxNumUsesTraversed || !BC)
967  return false;
968  Addr = BC->getOperand(0);
969  }
970 
971  unsigned UsesVisited = 0;
972  // Traverse all uses of the load operand value, to see if invariant.start is
973  // one of the uses, and whether it dominates the load instruction.
974  for (auto *U : Addr->users()) {
975  // Avoid traversing for Load operand with high number of users.
976  if (++UsesVisited > MaxNumUsesTraversed)
977  return false;
979  // If there are escaping uses of invariant.start instruction, the load maybe
980  // non-invariant.
981  if (!II || II->getIntrinsicID() != Intrinsic::invariant_start ||
982  !II->use_empty())
983  continue;
984  unsigned InvariantSizeInBits =
985  cast<ConstantInt>(II->getArgOperand(0))->getSExtValue() * 8;
986  // Confirm the invariant.start location size contains the load operand size
987  // in bits. Also, the invariant.start should dominate the load, and we
988  // should not hoist the load out of a loop that contains this dominating
989  // invariant.start.
990  if (LocSizeInBits <= InvariantSizeInBits &&
991  DT->properlyDominates(II->getParent(), CurLoop->getHeader()))
992  return true;
993  }
994 
995  return false;
996 }
997 
998 namespace {
999 /// Return true if-and-only-if we know how to (mechanically) both hoist and
1000 /// sink a given instruction out of a loop. Does not address legality
1001 /// concerns such as aliasing or speculation safety.
1002 bool isHoistableAndSinkableInst(Instruction &I) {
1003  // Only these instructions are hoistable/sinkable.
1004  return (isa<LoadInst>(I) || isa<StoreInst>(I) ||
1005  isa<CallInst>(I) || isa<FenceInst>(I) ||
1006  isa<BinaryOperator>(I) || isa<CastInst>(I) ||
1007  isa<SelectInst>(I) || isa<GetElementPtrInst>(I) ||
1008  isa<CmpInst>(I) || isa<InsertElementInst>(I) ||
1009  isa<ExtractElementInst>(I) || isa<ShuffleVectorInst>(I) ||
1010  isa<ExtractValueInst>(I) || isa<InsertValueInst>(I));
1011 }
1012 /// Return true if all of the alias sets within this AST are known not to
1013 /// contain a Mod, or if MSSA knows thare are no MemoryDefs in the loop.
1014 bool isReadOnly(AliasSetTracker *CurAST, const MemorySSAUpdater *MSSAU,
1015  const Loop *L) {
1016  if (CurAST) {
1017  for (AliasSet &AS : *CurAST) {
1018  if (!AS.isForwardingAliasSet() && AS.isMod()) {
1019  return false;
1020  }
1021  }
1022  return true;
1023  } else { /*MSSAU*/
1024  for (auto *BB : L->getBlocks())
1025  if (MSSAU->getMemorySSA()->getBlockDefs(BB))
1026  return false;
1027  return true;
1028  }
1029 }
1030 
1031 /// Return true if I is the only Instruction with a MemoryAccess in L.
1032 bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
1033  const MemorySSAUpdater *MSSAU) {
1034  for (auto *BB : L->getBlocks())
1035  if (auto *Accs = MSSAU->getMemorySSA()->getBlockAccesses(BB)) {
1036  int NotAPhi = 0;
1037  for (const auto &Acc : *Accs) {
1038  if (isa<MemoryPhi>(&Acc))
1039  continue;
1040  const auto *MUD = cast<MemoryUseOrDef>(&Acc);
1041  if (MUD->getMemoryInst() != I || NotAPhi++ == 1)
1042  return false;
1043  }
1044  }
1045  return true;
1046 }
1047 }
1048 
1050  Loop *CurLoop, AliasSetTracker *CurAST,
1051  MemorySSAUpdater *MSSAU,
1052  bool TargetExecutesOncePerLoop,
1053  bool NoOfMemAccTooLarge, int *LicmMssaOptCounter,
1055  // If we don't understand the instruction, bail early.
1056  if (!isHoistableAndSinkableInst(I))
1057  return false;
1058 
1059  MemorySSA *MSSA = MSSAU ? MSSAU->getMemorySSA() : nullptr;
1060  if (MSSA)
1061  assert(LicmMssaOptCounter != nullptr && "Counter cannot be null.");
1062 
1063  // Loads have extra constraints we have to verify before we can hoist them.
1064  if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
1065  if (!LI->isUnordered())
1066  return false; // Don't sink/hoist volatile or ordered atomic loads!
1067 
1068  // Loads from constant memory are always safe to move, even if they end up
1069  // in the same alias set as something that ends up being modified.
1070  if (AA->pointsToConstantMemory(LI->getOperand(0)))
1071  return true;
1072  if (LI->getMetadata(LLVMContext::MD_invariant_load))
1073  return true;
1074 
1075  if (LI->isAtomic() && !TargetExecutesOncePerLoop)
1076  return false; // Don't risk duplicating unordered loads
1077 
1078  // This checks for an invariant.start dominating the load.
1079  if (isLoadInvariantInLoop(LI, DT, CurLoop))
1080  return true;
1081 
1082  bool Invalidated;
1083  if (CurAST)
1084  Invalidated = pointerInvalidatedByLoop(MemoryLocation::get(LI), CurAST,
1085  CurLoop, AA);
1086  else
1087  Invalidated = pointerInvalidatedByLoopWithMSSA(
1088  MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop,
1089  *LicmMssaOptCounter);
1090  // Check loop-invariant address because this may also be a sinkable load
1091  // whose address is not necessarily loop-invariant.
1092  if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand()))
1093  ORE->emit([&]() {
1094  return OptimizationRemarkMissed(
1095  DEBUG_TYPE, "LoadWithLoopInvariantAddressInvalidated", LI)
1096  << "failed to move load with loop-invariant address "
1097  "because the loop may invalidate its value";
1098  });
1099 
1100  return !Invalidated;
1101  } else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
1102  // Don't sink or hoist dbg info; it's legal, but not useful.
1103  if (isa<DbgInfoIntrinsic>(I))
1104  return false;
1105 
1106  // Don't sink calls which can throw.
1107  if (CI->mayThrow())
1108  return false;
1109 
1110  using namespace PatternMatch;
1111  if (match(CI, m_Intrinsic<Intrinsic::assume>()))
1112  // Assumes don't actually alias anything or throw
1113  return true;
1114 
1115  // Handle simple cases by querying alias analysis.
1116  FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI);
1117  if (Behavior == FMRB_DoesNotAccessMemory)
1118  return true;
1119  if (AliasAnalysis::onlyReadsMemory(Behavior)) {
1120  // A readonly argmemonly function only reads from memory pointed to by
1121  // it's arguments with arbitrary offsets. If we can prove there are no
1122  // writes to this memory in the loop, we can hoist or sink.
1124  // TODO: expand to writeable arguments
1125  for (Value *Op : CI->arg_operands())
1126  if (Op->getType()->isPointerTy()) {
1127  bool Invalidated;
1128  if (CurAST)
1129  Invalidated = pointerInvalidatedByLoop(
1131  CurAST, CurLoop, AA);
1132  else
1133  Invalidated = pointerInvalidatedByLoopWithMSSA(
1134  MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop,
1135  *LicmMssaOptCounter);
1136  if (Invalidated)
1137  return false;
1138  }
1139  return true;
1140  }
1141 
1142  // If this call only reads from memory and there are no writes to memory
1143  // in the loop, we can hoist or sink the call as appropriate.
1144  if (isReadOnly(CurAST, MSSAU, CurLoop))
1145  return true;
1146  }
1147 
1148  // FIXME: This should use mod/ref information to see if we can hoist or
1149  // sink the call.
1150 
1151  return false;
1152  } else if (auto *FI = dyn_cast<FenceInst>(&I)) {
1153  // Fences alias (most) everything to provide ordering. For the moment,
1154  // just give up if there are any other memory operations in the loop.
1155  if (CurAST) {
1156  auto Begin = CurAST->begin();
1157  assert(Begin != CurAST->end() && "must contain FI");
1158  if (std::next(Begin) != CurAST->end())
1159  // constant memory for instance, TODO: handle better
1160  return false;
1161  auto *UniqueI = Begin->getUniqueInstruction();
1162  if (!UniqueI)
1163  // other memory op, give up
1164  return false;
1165  (void)FI; // suppress unused variable warning
1166  assert(UniqueI == FI && "AS must contain FI");
1167  return true;
1168  } else // MSSAU
1169  return isOnlyMemoryAccess(FI, CurLoop, MSSAU);
1170  } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
1171  if (!SI->isUnordered())
1172  return false; // Don't sink/hoist volatile or ordered atomic store!
1173 
1174  // We can only hoist a store that we can prove writes a value which is not
1175  // read or overwritten within the loop. For those cases, we fallback to
1176  // load store promotion instead. TODO: We can extend this to cases where
1177  // there is exactly one write to the location and that write dominates an
1178  // arbitrary number of reads in the loop.
1179  if (CurAST) {
1180  auto &AS = CurAST->getAliasSetFor(MemoryLocation::get(SI));
1181 
1182  if (AS.isRef() || !AS.isMustAlias())
1183  // Quick exit test, handled by the full path below as well.
1184  return false;
1185  auto *UniqueI = AS.getUniqueInstruction();
1186  if (!UniqueI)
1187  // other memory op, give up
1188  return false;
1189  assert(UniqueI == SI && "AS must contain SI");
1190  return true;
1191  } else { // MSSAU
1192  if (isOnlyMemoryAccess(SI, CurLoop, MSSAU))
1193  return true;
1194  if (*LicmMssaOptCounter < LicmMssaOptCap) {
1196  (*LicmMssaOptCounter)++;
1197  // If there are no clobbering Defs in the loop, we still need to check
1198  // for interfering Uses. If there are more accesses than the Promotion
1199  // cap, give up, we're not walking a list that long. Otherwise, walk the
1200  // list, check each Use if it's optimized to an access outside the loop.
1201  // If yes, store is safe to hoist. This is fairly restrictive, but
1202  // conservatively correct.
1203  // TODO: Cache set of Uses on the first walk in runOnLoop, update when
1204  // moving accesses. Can also extend to dominating uses.
1205  if ((!MSSA->isLiveOnEntryDef(Source) &&
1206  CurLoop->contains(Source->getBlock())) ||
1207  NoOfMemAccTooLarge)
1208  return false;
1209  for (auto *BB : CurLoop->getBlocks())
1210  if (auto *Accesses = MSSA->getBlockAccesses(BB))
1211  for (const auto &MA : *Accesses)
1212  if (const auto *MU = dyn_cast<MemoryUse>(&MA)) {
1213  auto *MD = MU->getDefiningAccess();
1214  if (!MSSA->isLiveOnEntryDef(MD) &&
1215  CurLoop->contains(MD->getBlock()))
1216  return false;
1217  }
1218  return true;
1219  }
1220  return false;
1221  }
1222  }
1223 
1224  assert(!I.mayReadOrWriteMemory() && "unhandled aliasing");
1225 
1226  // We've established mechanical ability and aliasing, it's up to the caller
1227  // to check fault safety
1228  return true;
1229 }
1230 
1231 /// Returns true if a PHINode is a trivially replaceable with an
1232 /// Instruction.
1233 /// This is true when all incoming values are that instruction.
1234 /// This pattern occurs most often with LCSSA PHI nodes.
1235 ///
1236 static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I) {
1237  for (const Value *IncValue : PN.incoming_values())
1238  if (IncValue != &I)
1239  return false;
1240 
1241  return true;
1242 }
1243 
1244 /// Return true if the instruction is free in the loop.
1245 static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop,
1246  const TargetTransformInfo *TTI) {
1247 
1248  if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) {
1250  return false;
1251  // For a GEP, we cannot simply use getUserCost because currently it
1252  // optimistically assume that a GEP will fold into addressing mode
1253  // regardless of its users.
1254  const BasicBlock *BB = GEP->getParent();
1255  for (const User *U : GEP->users()) {
1256  const Instruction *UI = cast<Instruction>(U);
1257  if (CurLoop->contains(UI) &&
1258  (BB != UI->getParent() ||
1259  (!isa<StoreInst>(UI) && !isa<LoadInst>(UI))))
1260  return false;
1261  }
1262  return true;
1263  } else
1264  return TTI->getUserCost(&I) == TargetTransformInfo::TCC_Free;
1265 }
1266 
1267 /// Return true if the only users of this instruction are outside of
1268 /// the loop. If this is true, we can sink the instruction to the exit
1269 /// blocks of the loop.
1270 ///
1271 /// We also return true if the instruction could be folded away in lowering.
1272 /// (e.g., a GEP can be folded into a load as an addressing mode in the loop).
1273 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
1274  const LoopSafetyInfo *SafetyInfo,
1275  TargetTransformInfo *TTI, bool &FreeInLoop) {
1276  const auto &BlockColors = SafetyInfo->getBlockColors();
1277  bool IsFree = isFreeInLoop(I, CurLoop, TTI);
1278  for (const User *U : I.users()) {
1279  const Instruction *UI = cast<Instruction>(U);
1280  if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
1281  const BasicBlock *BB = PN->getParent();
1282  // We cannot sink uses in catchswitches.
1283  if (isa<CatchSwitchInst>(BB->getTerminator()))
1284  return false;
1285 
1286  // We need to sink a callsite to a unique funclet. Avoid sinking if the
1287  // phi use is too muddled.
1288  if (isa<CallInst>(I))
1289  if (!BlockColors.empty() &&
1290  BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1)
1291  return false;
1292  }
1293 
1294  if (CurLoop->contains(UI)) {
1295  if (IsFree) {
1296  FreeInLoop = true;
1297  continue;
1298  }
1299  return false;
1300  }
1301  }
1302  return true;
1303 }
1304 
1306  Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
1307  const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU) {
1308  Instruction *New;
1309  if (auto *CI = dyn_cast<CallInst>(&I)) {
1310  const auto &BlockColors = SafetyInfo->getBlockColors();
1311 
1312  // Sinking call-sites need to be handled differently from other
1313  // instructions. The cloned call-site needs a funclet bundle operand
1314  // appropriate for it's location in the CFG.
1316  for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles();
1317  BundleIdx != BundleEnd; ++BundleIdx) {
1318  OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx);
1319  if (Bundle.getTagID() == LLVMContext::OB_funclet)
1320  continue;
1321 
1322  OpBundles.emplace_back(Bundle);
1323  }
1324 
1325  if (!BlockColors.empty()) {
1326  const ColorVector &CV = BlockColors.find(&ExitBlock)->second;
1327  assert(CV.size() == 1 && "non-unique color for exit block!");
1328  BasicBlock *BBColor = CV.front();
1329  Instruction *EHPad = BBColor->getFirstNonPHI();
1330  if (EHPad->isEHPad())
1331  OpBundles.emplace_back("funclet", EHPad);
1332  }
1333 
1334  New = CallInst::Create(CI, OpBundles);
1335  } else {
1336  New = I.clone();
1337  }
1338 
1339  ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New);
1340  if (!I.getName().empty())
1341  New->setName(I.getName() + ".le");
1342 
1343  MemoryAccess *OldMemAcc;
1344  if (MSSAU && (OldMemAcc = MSSAU->getMemorySSA()->getMemoryAccess(&I))) {
1345  // Create a new MemoryAccess and let MemorySSA set its defining access.
1346  MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
1347  New, nullptr, New->getParent(), MemorySSA::Beginning);
1348  if (NewMemAcc) {
1349  if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc))
1350  MSSAU->insertDef(MemDef, /*RenameUses=*/true);
1351  else {
1352  auto *MemUse = cast<MemoryUse>(NewMemAcc);
1353  MSSAU->insertUse(MemUse);
1354  }
1355  }
1356  }
1357 
1358  // Build LCSSA PHI nodes for any in-loop operands. Note that this is
1359  // particularly cheap because we can rip off the PHI node that we're
1360  // replacing for the number and blocks of the predecessors.
1361  // OPT: If this shows up in a profile, we can instead finish sinking all
1362  // invariant instructions, and then walk their operands to re-establish
1363  // LCSSA. That will eliminate creating PHI nodes just to nuke them when
1364  // sinking bottom-up.
1365  for (User::op_iterator OI = New->op_begin(), OE = New->op_end(); OI != OE;
1366  ++OI)
1367  if (Instruction *OInst = dyn_cast<Instruction>(*OI))
1368  if (Loop *OLoop = LI->getLoopFor(OInst->getParent()))
1369  if (!OLoop->contains(&PN)) {
1370  PHINode *OpPN =
1371  PHINode::Create(OInst->getType(), PN.getNumIncomingValues(),
1372  OInst->getName() + ".lcssa", &ExitBlock.front());
1373  for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
1374  OpPN->addIncoming(OInst, PN.getIncomingBlock(i));
1375  *OI = OpPN;
1376  }
1377  return New;
1378 }
1379 
1381  AliasSetTracker *AST, MemorySSAUpdater *MSSAU) {
1382  if (AST)
1383  AST->deleteValue(&I);
1384  if (MSSAU)
1385  MSSAU->removeMemoryAccess(&I);
1386  SafetyInfo.removeInstruction(&I);
1387  I.eraseFromParent();
1388 }
1389 
1391  ICFLoopSafetyInfo &SafetyInfo,
1392  MemorySSAUpdater *MSSAU) {
1393  SafetyInfo.removeInstruction(&I);
1394  SafetyInfo.insertInstructionTo(&I, Dest.getParent());
1395  I.moveBefore(&Dest);
1396  if (MSSAU)
1397  if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
1398  MSSAU->getMemorySSA()->getMemoryAccess(&I)))
1399  MSSAU->moveToPlace(OldMemAcc, Dest.getParent(), MemorySSA::End);
1400 }
1401 
1403  PHINode *TPN, Instruction *I, LoopInfo *LI,
1405  const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop,
1406  MemorySSAUpdater *MSSAU) {
1407  assert(isTriviallyReplaceablePHI(*TPN, *I) &&
1408  "Expect only trivially replaceable PHI");
1409  BasicBlock *ExitBlock = TPN->getParent();
1410  Instruction *New;
1411  auto It = SunkCopies.find(ExitBlock);
1412  if (It != SunkCopies.end())
1413  New = It->second;
1414  else
1415  New = SunkCopies[ExitBlock] = CloneInstructionInExitBlock(
1416  *I, *ExitBlock, *TPN, LI, SafetyInfo, MSSAU);
1417  return New;
1418 }
1419 
1420 static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) {
1421  BasicBlock *BB = PN->getParent();
1422  if (!BB->canSplitPredecessors())
1423  return false;
1424  // It's not impossible to split EHPad blocks, but if BlockColors already exist
1425  // it require updating BlockColors for all offspring blocks accordingly. By
1426  // skipping such corner case, we can make updating BlockColors after splitting
1427  // predecessor fairly simple.
1428  if (!SafetyInfo->getBlockColors().empty() && BB->getFirstNonPHI()->isEHPad())
1429  return false;
1430  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
1431  BasicBlock *BBPred = *PI;
1432  if (isa<IndirectBrInst>(BBPred->getTerminator()))
1433  return false;
1434  }
1435  return true;
1436 }
1437 
1439  LoopInfo *LI, const Loop *CurLoop,
1440  LoopSafetyInfo *SafetyInfo,
1441  MemorySSAUpdater *MSSAU) {
1442 #ifndef NDEBUG
1443  SmallVector<BasicBlock *, 32> ExitBlocks;
1444  CurLoop->getUniqueExitBlocks(ExitBlocks);
1445  SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
1446  ExitBlocks.end());
1447 #endif
1448  BasicBlock *ExitBB = PN->getParent();
1449  assert(ExitBlockSet.count(ExitBB) && "Expect the PHI is in an exit block.");
1450 
1451  // Split predecessors of the loop exit to make instructions in the loop are
1452  // exposed to exit blocks through trivially replaceable PHIs while keeping the
1453  // loop in the canonical form where each predecessor of each exit block should
1454  // be contained within the loop. For example, this will convert the loop below
1455  // from
1456  //
1457  // LB1:
1458  // %v1 =
1459  // br %LE, %LB2
1460  // LB2:
1461  // %v2 =
1462  // br %LE, %LB1
1463  // LE:
1464  // %p = phi [%v1, %LB1], [%v2, %LB2] <-- non-trivially replaceable
1465  //
1466  // to
1467  //
1468  // LB1:
1469  // %v1 =
1470  // br %LE.split, %LB2
1471  // LB2:
1472  // %v2 =
1473  // br %LE.split2, %LB1
1474  // LE.split:
1475  // %p1 = phi [%v1, %LB1] <-- trivially replaceable
1476  // br %LE
1477  // LE.split2:
1478  // %p2 = phi [%v2, %LB2] <-- trivially replaceable
1479  // br %LE
1480  // LE:
1481  // %p = phi [%p1, %LE.split], [%p2, %LE.split2]
1482  //
1483  const auto &BlockColors = SafetyInfo->getBlockColors();
1484  SmallSetVector<BasicBlock *, 8> PredBBs(pred_begin(ExitBB), pred_end(ExitBB));
1485  while (!PredBBs.empty()) {
1486  BasicBlock *PredBB = *PredBBs.begin();
1487  assert(CurLoop->contains(PredBB) &&
1488  "Expect all predecessors are in the loop");
1489  if (PN->getBasicBlockIndex(PredBB) >= 0) {
1491  ExitBB, PredBB, ".split.loop.exit", DT, LI, MSSAU, true);
1492  // Since we do not allow splitting EH-block with BlockColors in
1493  // canSplitPredecessors(), we can simply assign predecessor's color to
1494  // the new block.
1495  if (!BlockColors.empty())
1496  // Grab a reference to the ColorVector to be inserted before getting the
1497  // reference to the vector we are copying because inserting the new
1498  // element in BlockColors might cause the map to be reallocated.
1499  SafetyInfo->copyColors(NewPred, PredBB);
1500  }
1501  PredBBs.remove(PredBB);
1502  }
1503 }
1504 
1505 /// When an instruction is found to only be used outside of the loop, this
1506 /// function moves it to the exit blocks and patches up SSA form as needed.
1507 /// This method is guaranteed to remove the original instruction from its
1508 /// position, and may either delete it or move it to outside of the loop.
1509 ///
1510 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
1511  const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
1513  bool FreeInLoop) {
1514  LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
1515  ORE->emit([&]() {
1516  return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
1517  << "sinking " << ore::NV("Inst", &I);
1518  });
1519  bool Changed = false;
1520  if (isa<LoadInst>(I))
1521  ++NumMovedLoads;
1522  else if (isa<CallInst>(I))
1523  ++NumMovedCalls;
1524  ++NumSunk;
1525 
1526  // Iterate over users to be ready for actual sinking. Replace users via
1527  // unrechable blocks with undef and make all user PHIs trivially replcable.
1528  SmallPtrSet<Instruction *, 8> VisitedUsers;
1529  for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) {
1530  auto *User = cast<Instruction>(*UI);
1531  Use &U = UI.getUse();
1532  ++UI;
1533 
1534  if (VisitedUsers.count(User) || CurLoop->contains(User))
1535  continue;
1536 
1537  if (!DT->isReachableFromEntry(User->getParent())) {
1538  U = UndefValue::get(I.getType());
1539  Changed = true;
1540  continue;
1541  }
1542 
1543  // The user must be a PHI node.
1544  PHINode *PN = cast<PHINode>(User);
1545 
1546  // Surprisingly, instructions can be used outside of loops without any
1547  // exits. This can only happen in PHI nodes if the incoming block is
1548  // unreachable.
1549  BasicBlock *BB = PN->getIncomingBlock(U);
1550  if (!DT->isReachableFromEntry(BB)) {
1551  U = UndefValue::get(I.getType());
1552  Changed = true;
1553  continue;
1554  }
1555 
1556  VisitedUsers.insert(PN);
1557  if (isTriviallyReplaceablePHI(*PN, I))
1558  continue;
1559 
1560  if (!canSplitPredecessors(PN, SafetyInfo))
1561  return Changed;
1562 
1563  // Split predecessors of the PHI so that we can make users trivially
1564  // replaceable.
1565  splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo, MSSAU);
1566 
1567  // Should rebuild the iterators, as they may be invalidated by
1568  // splitPredecessorsOfLoopExit().
1569  UI = I.user_begin();
1570  UE = I.user_end();
1571  }
1572 
1573  if (VisitedUsers.empty())
1574  return Changed;
1575 
1576 #ifndef NDEBUG
1577  SmallVector<BasicBlock *, 32> ExitBlocks;
1578  CurLoop->getUniqueExitBlocks(ExitBlocks);
1579  SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
1580  ExitBlocks.end());
1581 #endif
1582 
1583  // Clones of this instruction. Don't create more than one per exit block!
1585 
1586  // If this instruction is only used outside of the loop, then all users are
1587  // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
1588  // the instruction.
1590  for (auto *UI : Users) {
1591  auto *User = cast<Instruction>(UI);
1592 
1593  if (CurLoop->contains(User))
1594  continue;
1595 
1596  PHINode *PN = cast<PHINode>(User);
1597  assert(ExitBlockSet.count(PN->getParent()) &&
1598  "The LCSSA PHI is not in an exit block!");
1599  // The PHI must be trivially replaceable.
1601  PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU);
1602  PN->replaceAllUsesWith(New);
1603  eraseInstruction(*PN, *SafetyInfo, nullptr, nullptr);
1604  Changed = true;
1605  }
1606  return Changed;
1607 }
1608 
1609 /// When an instruction is found to only use loop invariant operands that
1610 /// is safe to hoist, this instruction is called to do the dirty work.
1611 ///
1612 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
1613  BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
1615  LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getName() << ": " << I
1616  << "\n");
1617  ORE->emit([&]() {
1618  return OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) << "hoisting "
1619  << ore::NV("Inst", &I);
1620  });
1621 
1622  // Metadata can be dependent on conditions we are hoisting above.
1623  // Conservatively strip all metadata on the instruction unless we were
1624  // guaranteed to execute I if we entered the loop, in which case the metadata
1625  // is valid in the loop preheader.
1626  if (I.hasMetadataOtherThanDebugLoc() &&
1627  // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning
1628  // time in isGuaranteedToExecute if we don't actually have anything to
1629  // drop. It is a compile time optimization, not required for correctness.
1630  !SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop))
1632 
1633  if (isa<PHINode>(I))
1634  // Move the new node to the end of the phi list in the destination block.
1635  moveInstructionBefore(I, *Dest->getFirstNonPHI(), *SafetyInfo, MSSAU);
1636  else
1637  // Move the new node to the destination block, before its terminator.
1638  moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo, MSSAU);
1639 
1640  // Do not retain debug locations when we are moving instructions to different
1641  // basic blocks, because we want to avoid jumpy line tables. Calls, however,
1642  // need to retain their debug locs because they may be inlined.
1643  // FIXME: How do we retain source locations without causing poor debugging
1644  // behavior?
1645  if (!isa<CallInst>(I))
1646  I.setDebugLoc(DebugLoc());
1647 
1648  if (isa<LoadInst>(I))
1649  ++NumMovedLoads;
1650  else if (isa<CallInst>(I))
1651  ++NumMovedCalls;
1652  ++NumHoisted;
1653 }
1654 
1655 /// Only sink or hoist an instruction if it is not a trapping instruction,
1656 /// or if the instruction is known not to trap when moved to the preheader.
1657 /// or if it is a trapping instruction and is guaranteed to execute.
1659  const DominatorTree *DT,
1660  const Loop *CurLoop,
1661  const LoopSafetyInfo *SafetyInfo,
1663  const Instruction *CtxI) {
1664  if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT))
1665  return true;
1666 
1667  bool GuaranteedToExecute =
1668  SafetyInfo->isGuaranteedToExecute(Inst, DT, CurLoop);
1669 
1670  if (!GuaranteedToExecute) {
1671  auto *LI = dyn_cast<LoadInst>(&Inst);
1672  if (LI && CurLoop->isLoopInvariant(LI->getPointerOperand()))
1673  ORE->emit([&]() {
1674  return OptimizationRemarkMissed(
1675  DEBUG_TYPE, "LoadWithLoopInvariantAddressCondExecuted", LI)
1676  << "failed to hoist load with loop-invariant address "
1677  "because load is conditionally executed";
1678  });
1679  }
1680 
1681  return GuaranteedToExecute;
1682 }
1683 
1684 namespace {
1685 class LoopPromoter : public LoadAndStorePromoter {
1686  Value *SomePtr; // Designated pointer to store to.
1687  const SmallSetVector<Value *, 8> &PointerMustAliases;
1688  SmallVectorImpl<BasicBlock *> &LoopExitBlocks;
1689  SmallVectorImpl<Instruction *> &LoopInsertPts;
1690  SmallVectorImpl<MemoryAccess *> &MSSAInsertPts;
1691  PredIteratorCache &PredCache;
1692  AliasSetTracker &AST;
1693  MemorySSAUpdater *MSSAU;
1694  LoopInfo &LI;
1695  DebugLoc DL;
1696  int Alignment;
1697  bool UnorderedAtomic;
1698  AAMDNodes AATags;
1699  ICFLoopSafetyInfo &SafetyInfo;
1700 
1701  Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
1702  if (Instruction *I = dyn_cast<Instruction>(V))
1703  if (Loop *L = LI.getLoopFor(I->getParent()))
1704  if (!L->contains(BB)) {
1705  // We need to create an LCSSA PHI node for the incoming value and
1706  // store that.
1707  PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB),
1708  I->getName() + ".lcssa", &BB->front());
1709  for (BasicBlock *Pred : PredCache.get(BB))
1710  PN->addIncoming(I, Pred);
1711  return PN;
1712  }
1713  return V;
1714  }
1715 
1716 public:
1717  LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S,
1718  const SmallSetVector<Value *, 8> &PMA,
1722  AliasSetTracker &ast, MemorySSAUpdater *MSSAU, LoopInfo &li,
1723  DebugLoc dl, int alignment, bool UnorderedAtomic,
1724  const AAMDNodes &AATags, ICFLoopSafetyInfo &SafetyInfo)
1725  : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
1726  LoopExitBlocks(LEB), LoopInsertPts(LIP), MSSAInsertPts(MSSAIP),
1727  PredCache(PIC), AST(ast), MSSAU(MSSAU), LI(li), DL(std::move(dl)),
1728  Alignment(alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags),
1729  SafetyInfo(SafetyInfo) {}
1730 
1731  bool isInstInList(Instruction *I,
1732  const SmallVectorImpl<Instruction *> &) const override {
1733  Value *Ptr;
1734  if (LoadInst *LI = dyn_cast<LoadInst>(I))
1735  Ptr = LI->getOperand(0);
1736  else
1737  Ptr = cast<StoreInst>(I)->getPointerOperand();
1738  return PointerMustAliases.count(Ptr);
1739  }
1740 
1741  void doExtraRewritesBeforeFinalDeletion() override {
1742  // Insert stores after in the loop exit blocks. Each exit block gets a
1743  // store of the live-out values that feed them. Since we've already told
1744  // the SSA updater about the defs in the loop and the preheader
1745  // definition, it is all set and we can start using it.
1746  for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
1747  BasicBlock *ExitBlock = LoopExitBlocks[i];
1748  Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
1749  LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock);
1750  Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
1751  Instruction *InsertPos = LoopInsertPts[i];
1752  StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
1753  if (UnorderedAtomic)
1755  NewSI->setAlignment(Alignment);
1756  NewSI->setDebugLoc(DL);
1757  if (AATags)
1758  NewSI->setAAMetadata(AATags);
1759 
1760  if (MSSAU) {
1761  MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i];
1762  MemoryAccess *NewMemAcc;
1763  if (!MSSAInsertPoint) {
1764  NewMemAcc = MSSAU->createMemoryAccessInBB(
1765  NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning);
1766  } else {
1767  NewMemAcc =
1768  MSSAU->createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint);
1769  }
1770  MSSAInsertPts[i] = NewMemAcc;
1771  MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
1772  // FIXME: true for safety, false may still be correct.
1773  }
1774  }
1775  }
1776 
1777  void replaceLoadWithValue(LoadInst *LI, Value *V) const override {
1778  // Update alias analysis.
1779  AST.copyValue(LI, V);
1780  }
1781  void instructionDeleted(Instruction *I) const override {
1782  SafetyInfo.removeInstruction(I);
1783  AST.deleteValue(I);
1784  if (MSSAU)
1785  MSSAU->removeMemoryAccess(I);
1786  }
1787 };
1788 
1789 
1790 /// Return true iff we can prove that a caller of this function can not inspect
1791 /// the contents of the provided object in a well defined program.
1792 bool isKnownNonEscaping(Value *Object, const TargetLibraryInfo *TLI) {
1793  if (isa<AllocaInst>(Object))
1794  // Since the alloca goes out of scope, we know the caller can't retain a
1795  // reference to it and be well defined. Thus, we don't need to check for
1796  // capture.
1797  return true;
1798 
1799  // For all other objects we need to know that the caller can't possibly
1800  // have gotten a reference to the object. There are two components of
1801  // that:
1802  // 1) Object can't be escaped by this function. This is what
1803  // PointerMayBeCaptured checks.
1804  // 2) Object can't have been captured at definition site. For this, we
1805  // need to know the return value is noalias. At the moment, we use a
1806  // weaker condition and handle only AllocLikeFunctions (which are
1807  // known to be noalias). TODO
1808  return isAllocLikeFn(Object, TLI) &&
1809  !PointerMayBeCaptured(Object, true, true);
1810 }
1811 
1812 } // namespace
1813 
1814 /// Try to promote memory values to scalars by sinking stores out of the
1815 /// loop and moving loads to before the loop. We do this by looping over
1816 /// the stores in the loop, looking for stores to Must pointers which are
1817 /// loop invariant.
1818 ///
1820  const SmallSetVector<Value *, 8> &PointerMustAliases,
1821  SmallVectorImpl<BasicBlock *> &ExitBlocks,
1822  SmallVectorImpl<Instruction *> &InsertPts,
1824  LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
1825  Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
1826  ICFLoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) {
1827  // Verify inputs.
1828  assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
1829  CurAST != nullptr && SafetyInfo != nullptr &&
1830  "Unexpected Input to promoteLoopAccessesToScalars");
1831 
1832  Value *SomePtr = *PointerMustAliases.begin();
1833  BasicBlock *Preheader = CurLoop->getLoopPreheader();
1834 
1835  // It is not safe to promote a load/store from the loop if the load/store is
1836  // conditional. For example, turning:
1837  //
1838  // for () { if (c) *P += 1; }
1839  //
1840  // into:
1841  //
1842  // tmp = *P; for () { if (c) tmp +=1; } *P = tmp;
1843  //
1844  // is not safe, because *P may only be valid to access if 'c' is true.
1845  //
1846  // The safety property divides into two parts:
1847  // p1) The memory may not be dereferenceable on entry to the loop. In this
1848  // case, we can't insert the required load in the preheader.
1849  // p2) The memory model does not allow us to insert a store along any dynamic
1850  // path which did not originally have one.
1851  //
1852  // If at least one store is guaranteed to execute, both properties are
1853  // satisfied, and promotion is legal.
1854  //
1855  // This, however, is not a necessary condition. Even if no store/load is
1856  // guaranteed to execute, we can still establish these properties.
1857  // We can establish (p1) by proving that hoisting the load into the preheader
1858  // is safe (i.e. proving dereferenceability on all paths through the loop). We
1859  // can use any access within the alias set to prove dereferenceability,
1860  // since they're all must alias.
1861  //
1862  // There are two ways establish (p2):
1863  // a) Prove the location is thread-local. In this case the memory model
1864  // requirement does not apply, and stores are safe to insert.
1865  // b) Prove a store dominates every exit block. In this case, if an exit
1866  // blocks is reached, the original dynamic path would have taken us through
1867  // the store, so inserting a store into the exit block is safe. Note that this
1868  // is different from the store being guaranteed to execute. For instance,
1869  // if an exception is thrown on the first iteration of the loop, the original
1870  // store is never executed, but the exit blocks are not executed either.
1871 
1872  bool DereferenceableInPH = false;
1873  bool SafeToInsertStore = false;
1874 
1876 
1877  // We start with an alignment of one and try to find instructions that allow
1878  // us to prove better alignment.
1879  unsigned Alignment = 1;
1880  // Keep track of which types of access we see
1881  bool SawUnorderedAtomic = false;
1882  bool SawNotAtomic = false;
1883  AAMDNodes AATags;
1884 
1885  const DataLayout &MDL = Preheader->getModule()->getDataLayout();
1886 
1887  bool IsKnownThreadLocalObject = false;
1888  if (SafetyInfo->anyBlockMayThrow()) {
1889  // If a loop can throw, we have to insert a store along each unwind edge.
1890  // That said, we can't actually make the unwind edge explicit. Therefore,
1891  // we have to prove that the store is dead along the unwind edge. We do
1892  // this by proving that the caller can't have a reference to the object
1893  // after return and thus can't possibly load from the object.
1894  Value *Object = GetUnderlyingObject(SomePtr, MDL);
1895  if (!isKnownNonEscaping(Object, TLI))
1896  return false;
1897  // Subtlety: Alloca's aren't visible to callers, but *are* potentially
1898  // visible to other threads if captured and used during their lifetimes.
1899  IsKnownThreadLocalObject = !isa<AllocaInst>(Object);
1900  }
1901 
1902  // Check that all of the pointers in the alias set have the same type. We
1903  // cannot (yet) promote a memory location that is loaded and stored in
1904  // different sizes. While we are at it, collect alignment and AA info.
1905  for (Value *ASIV : PointerMustAliases) {
1906  // Check that all of the pointers in the alias set have the same type. We
1907  // cannot (yet) promote a memory location that is loaded and stored in
1908  // different sizes.
1909  if (SomePtr->getType() != ASIV->getType())
1910  return false;
1911 
1912  for (User *U : ASIV->users()) {
1913  // Ignore instructions that are outside the loop.
1914  Instruction *UI = dyn_cast<Instruction>(U);
1915  if (!UI || !CurLoop->contains(UI))
1916  continue;
1917 
1918  // If there is an non-load/store instruction in the loop, we can't promote
1919  // it.
1920  if (LoadInst *Load = dyn_cast<LoadInst>(UI)) {
1921  if (!Load->isUnordered())
1922  return false;
1923 
1924  SawUnorderedAtomic |= Load->isAtomic();
1925  SawNotAtomic |= !Load->isAtomic();
1926 
1927  unsigned InstAlignment = Load->getAlignment();
1928  if (!InstAlignment)
1929  InstAlignment =
1930  MDL.getABITypeAlignment(Load->getType());
1931 
1932  // Note that proving a load safe to speculate requires proving
1933  // sufficient alignment at the target location. Proving it guaranteed
1934  // to execute does as well. Thus we can increase our guaranteed
1935  // alignment as well.
1936  if (!DereferenceableInPH || (InstAlignment > Alignment))
1937  if (isSafeToExecuteUnconditionally(*Load, DT, CurLoop, SafetyInfo,
1938  ORE, Preheader->getTerminator())) {
1939  DereferenceableInPH = true;
1940  Alignment = std::max(Alignment, InstAlignment);
1941  }
1942  } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) {
1943  // Stores *of* the pointer are not interesting, only stores *to* the
1944  // pointer.
1945  if (UI->getOperand(1) != ASIV)
1946  continue;
1947  if (!Store->isUnordered())
1948  return false;
1949 
1950  SawUnorderedAtomic |= Store->isAtomic();
1951  SawNotAtomic |= !Store->isAtomic();
1952 
1953  // If the store is guaranteed to execute, both properties are satisfied.
1954  // We may want to check if a store is guaranteed to execute even if we
1955  // already know that promotion is safe, since it may have higher
1956  // alignment than any other guaranteed stores, in which case we can
1957  // raise the alignment on the promoted store.
1958  unsigned InstAlignment = Store->getAlignment();
1959  if (!InstAlignment)
1960  InstAlignment =
1961  MDL.getABITypeAlignment(Store->getValueOperand()->getType());
1962 
1963  if (!DereferenceableInPH || !SafeToInsertStore ||
1964  (InstAlignment > Alignment)) {
1965  if (SafetyInfo->isGuaranteedToExecute(*UI, DT, CurLoop)) {
1966  DereferenceableInPH = true;
1967  SafeToInsertStore = true;
1968  Alignment = std::max(Alignment, InstAlignment);
1969  }
1970  }
1971 
1972  // If a store dominates all exit blocks, it is safe to sink.
1973  // As explained above, if an exit block was executed, a dominating
1974  // store must have been executed at least once, so we are not
1975  // introducing stores on paths that did not have them.
1976  // Note that this only looks at explicit exit blocks. If we ever
1977  // start sinking stores into unwind edges (see above), this will break.
1978  if (!SafeToInsertStore)
1979  SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) {
1980  return DT->dominates(Store->getParent(), Exit);
1981  });
1982 
1983  // If the store is not guaranteed to execute, we may still get
1984  // deref info through it.
1985  if (!DereferenceableInPH) {
1986  DereferenceableInPH = isDereferenceableAndAlignedPointer(
1987  Store->getPointerOperand(), Store->getAlignment(), MDL,
1988  Preheader->getTerminator(), DT);
1989  }
1990  } else
1991  return false; // Not a load or store.
1992 
1993  // Merge the AA tags.
1994  if (LoopUses.empty()) {
1995  // On the first load/store, just take its AA tags.
1996  UI->getAAMetadata(AATags);
1997  } else if (AATags) {
1998  UI->getAAMetadata(AATags, /* Merge = */ true);
1999  }
2000 
2001  LoopUses.push_back(UI);
2002  }
2003  }
2004 
2005  // If we found both an unordered atomic instruction and a non-atomic memory
2006  // access, bail. We can't blindly promote non-atomic to atomic since we
2007  // might not be able to lower the result. We can't downgrade since that
2008  // would violate memory model. Also, align 0 is an error for atomics.
2009  if (SawUnorderedAtomic && SawNotAtomic)
2010  return false;
2011 
2012  // If we're inserting an atomic load in the preheader, we must be able to
2013  // lower it. We're only guaranteed to be able to lower naturally aligned
2014  // atomics.
2015  auto *SomePtrElemType = SomePtr->getType()->getPointerElementType();
2016  if (SawUnorderedAtomic &&
2017  Alignment < MDL.getTypeStoreSize(SomePtrElemType))
2018  return false;
2019 
2020  // If we couldn't prove we can hoist the load, bail.
2021  if (!DereferenceableInPH)
2022  return false;
2023 
2024  // We know we can hoist the load, but don't have a guaranteed store.
2025  // Check whether the location is thread-local. If it is, then we can insert
2026  // stores along paths which originally didn't have them without violating the
2027  // memory model.
2028  if (!SafeToInsertStore) {
2029  if (IsKnownThreadLocalObject)
2030  SafeToInsertStore = true;
2031  else {
2032  Value *Object = GetUnderlyingObject(SomePtr, MDL);
2033  SafeToInsertStore =
2034  (isAllocLikeFn(Object, TLI) || isa<AllocaInst>(Object)) &&
2035  !PointerMayBeCaptured(Object, true, true);
2036  }
2037  }
2038 
2039  // If we've still failed to prove we can sink the store, give up.
2040  if (!SafeToInsertStore)
2041  return false;
2042 
2043  // Otherwise, this is safe to promote, lets do it!
2044  LLVM_DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr
2045  << '\n');
2046  ORE->emit([&]() {
2047  return OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar",
2048  LoopUses[0])
2049  << "Moving accesses to memory location out of the loop";
2050  });
2051  ++NumPromoted;
2052 
2053  // Grab a debug location for the inserted loads/stores; given that the
2054  // inserted loads/stores have little relation to the original loads/stores,
2055  // this code just arbitrarily picks a location from one, since any debug
2056  // location is better than none.
2057  DebugLoc DL = LoopUses[0]->getDebugLoc();
2058 
2059  // We use the SSAUpdater interface to insert phi nodes as required.
2061  SSAUpdater SSA(&NewPHIs);
2062  LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
2063  InsertPts, MSSAInsertPts, PIC, *CurAST, MSSAU, *LI, DL,
2064  Alignment, SawUnorderedAtomic, AATags, *SafetyInfo);
2065 
2066  // Set up the preheader to have a definition of the value. It is the live-out
2067  // value from the preheader that uses in the loop will use.
2068  LoadInst *PreheaderLoad = new LoadInst(
2069  SomePtr->getType()->getPointerElementType(), SomePtr,
2070  SomePtr->getName() + ".promoted", Preheader->getTerminator());
2071  if (SawUnorderedAtomic)
2072  PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
2073  PreheaderLoad->setAlignment(Alignment);
2074  PreheaderLoad->setDebugLoc(DL);
2075  if (AATags)
2076  PreheaderLoad->setAAMetadata(AATags);
2077  SSA.AddAvailableValue(Preheader, PreheaderLoad);
2078 
2079  MemoryAccess *PreheaderLoadMemoryAccess;
2080  if (MSSAU) {
2081  PreheaderLoadMemoryAccess = MSSAU->createMemoryAccessInBB(
2082  PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End);
2083  MemoryUse *NewMemUse = cast<MemoryUse>(PreheaderLoadMemoryAccess);
2084  MSSAU->insertUse(NewMemUse);
2085  }
2086 
2087  // Rewrite all the loads in the loop and remember all the definitions from
2088  // stores in the loop.
2089  Promoter.run(LoopUses);
2090 
2091  if (MSSAU && VerifyMemorySSA)
2092  MSSAU->getMemorySSA()->verifyMemorySSA();
2093  // If the SSAUpdater didn't use the load in the preheader, just zap it now.
2094  if (PreheaderLoad->use_empty())
2095  eraseInstruction(*PreheaderLoad, *SafetyInfo, CurAST, MSSAU);
2096 
2097  return true;
2098 }
2099 
2100 /// Returns an owning pointer to an alias set which incorporates aliasing info
2101 /// from L and all subloops of L.
2102 /// FIXME: In new pass manager, there is no helper function to handle loop
2103 /// analysis such as cloneBasicBlockAnalysis, so the AST needs to be recomputed
2104 /// from scratch for every loop. Hook up with the helper functions when
2105 /// available in the new pass manager to avoid redundant computation.
2106 std::unique_ptr<AliasSetTracker>
2107 LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI,
2108  AliasAnalysis *AA) {
2109  std::unique_ptr<AliasSetTracker> CurAST;
2110  SmallVector<Loop *, 4> RecomputeLoops;
2111  for (Loop *InnerL : L->getSubLoops()) {
2112  auto MapI = LoopToAliasSetMap.find(InnerL);
2113  // If the AST for this inner loop is missing it may have been merged into
2114  // some other loop's AST and then that loop unrolled, and so we need to
2115  // recompute it.
2116  if (MapI == LoopToAliasSetMap.end()) {
2117  RecomputeLoops.push_back(InnerL);
2118  continue;
2119  }
2120  std::unique_ptr<AliasSetTracker> InnerAST = std::move(MapI->second);
2121 
2122  if (CurAST) {
2123  // What if InnerLoop was modified by other passes ?
2124  // Once we've incorporated the inner loop's AST into ours, we don't need
2125  // the subloop's anymore.
2126  CurAST->add(*InnerAST);
2127  } else {
2128  CurAST = std::move(InnerAST);
2129  }
2130  LoopToAliasSetMap.erase(MapI);
2131  }
2132  if (!CurAST)
2133  CurAST = make_unique<AliasSetTracker>(*AA);
2134 
2135  // Add everything from the sub loops that are no longer directly available.
2136  for (Loop *InnerL : RecomputeLoops)
2137  for (BasicBlock *BB : InnerL->blocks())
2138  CurAST->add(*BB);
2139 
2140  // And merge in this loop (without anything from inner loops).
2141  for (BasicBlock *BB : L->blocks())
2142  if (LI->getLoopFor(BB) == L)
2143  CurAST->add(*BB);
2144 
2145  return CurAST;
2146 }
2147 
2148 std::unique_ptr<AliasSetTracker>
2149 LoopInvariantCodeMotion::collectAliasInfoForLoopWithMSSA(
2150  Loop *L, AliasAnalysis *AA, MemorySSAUpdater *MSSAU) {
2151  auto *MSSA = MSSAU->getMemorySSA();
2152  auto CurAST = make_unique<AliasSetTracker>(*AA, MSSA, L);
2153  CurAST->addAllInstructionsInLoopUsingMSSA();
2154  return CurAST;
2155 }
2156 
2157 /// Simple analysis hook. Clone alias set info.
2158 ///
2159 void LegacyLICMPass::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To,
2160  Loop *L) {
2161  auto ASTIt = LICM.getLoopToAliasSetMap().find(L);
2162  if (ASTIt == LICM.getLoopToAliasSetMap().end())
2163  return;
2164 
2165  ASTIt->second->copyValue(From, To);
2166 }
2167 
2168 /// Simple Analysis hook. Delete value V from alias set
2169 ///
2170 void LegacyLICMPass::deleteAnalysisValue(Value *V, Loop *L) {
2171  auto ASTIt = LICM.getLoopToAliasSetMap().find(L);
2172  if (ASTIt == LICM.getLoopToAliasSetMap().end())
2173  return;
2174 
2175  ASTIt->second->deleteValue(V);
2176 }
2177 
2178 /// Simple Analysis hook. Delete value L from alias set map.
2179 ///
2180 void LegacyLICMPass::deleteAnalysisLoop(Loop *L) {
2181  if (!LICM.getLoopToAliasSetMap().count(L))
2182  return;
2183 
2184  LICM.getLoopToAliasSetMap().erase(L);
2185 }
2186 
2188  AliasSetTracker *CurAST, Loop *CurLoop,
2189  AliasAnalysis *AA) {
2190  // First check to see if any of the basic blocks in CurLoop invalidate *V.
2191  bool isInvalidatedAccordingToAST = CurAST->getAliasSetFor(MemLoc).isMod();
2192 
2193  if (!isInvalidatedAccordingToAST || !LICMN2Theshold)
2194  return isInvalidatedAccordingToAST;
2195 
2196  // Check with a diagnostic analysis if we can refine the information above.
2197  // This is to identify the limitations of using the AST.
2198  // The alias set mechanism used by LICM has a major weakness in that it
2199  // combines all things which may alias into a single set *before* asking
2200  // modref questions. As a result, a single readonly call within a loop will
2201  // collapse all loads and stores into a single alias set and report
2202  // invalidation if the loop contains any store. For example, readonly calls
2203  // with deopt states have this form and create a general alias set with all
2204  // loads and stores. In order to get any LICM in loops containing possible
2205  // deopt states we need a more precise invalidation of checking the mod ref
2206  // info of each instruction within the loop and LI. This has a complexity of
2207  // O(N^2), so currently, it is used only as a diagnostic tool since the
2208  // default value of LICMN2Threshold is zero.
2209 
2210  // Don't look at nested loops.
2211  if (CurLoop->begin() != CurLoop->end())
2212  return true;
2213 
2214  int N = 0;
2215  for (BasicBlock *BB : CurLoop->getBlocks())
2216  for (Instruction &I : *BB) {
2217  if (N >= LICMN2Theshold) {
2218  LLVM_DEBUG(dbgs() << "Alasing N2 threshold exhausted for "
2219  << *(MemLoc.Ptr) << "\n");
2220  return true;
2221  }
2222  N++;
2223  auto Res = AA->getModRefInfo(&I, MemLoc);
2224  if (isModSet(Res)) {
2225  LLVM_DEBUG(dbgs() << "Aliasing failed on " << I << " for "
2226  << *(MemLoc.Ptr) << "\n");
2227  return true;
2228  }
2229  }
2230  LLVM_DEBUG(dbgs() << "Aliasing okay for " << *(MemLoc.Ptr) << "\n");
2231  return false;
2232 }
2233 
2235  Loop *CurLoop,
2236  int &LicmMssaOptCounter) {
2238  // See declaration of LicmMssaOptCap for usage details.
2239  if (LicmMssaOptCounter >= LicmMssaOptCap)
2240  Source = MU->getDefiningAccess();
2241  else {
2242  Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU);
2243  LicmMssaOptCounter++;
2244  }
2245  return !MSSA->isLiveOnEntryDef(Source) &&
2246  CurLoop->contains(Source->getBlock());
2247 }
2248 
2249 /// Little predicate that returns true if the specified basic block is in
2250 /// a subloop of the current one, not the current one itself.
2251 ///
2252 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) {
2253  assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
2254  return LI->getLoopFor(BB) != CurLoop;
2255 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:80
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, AliasSetTracker *AST, MemorySSAUpdater *MSSAU)
Definition: LICM.cpp:1380
uint64_t CallInst * C
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
bool canSplitPredecessors() const
Definition: BasicBlock.cpp:371
void insertInstructionTo(const Instruction *Inst, const BasicBlock *BB)
Inform the safety info that we are planning to insert a new instruction Inst into the basic block BB...
Definition: MustExecute.cpp:85
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:645
void ReplaceInstWithInst(BasicBlock::InstListType &BIL, BasicBlock::iterator &BI, Instruction *I)
Replace the instruction specified by BI with the instruction specified by I.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
bool promoteLoopAccessesToScalars(const SmallSetVector< Value *, 8 > &, SmallVectorImpl< BasicBlock *> &, SmallVectorImpl< Instruction *> &, SmallVectorImpl< MemoryAccess *> &, PredIteratorCache &, LoopInfo *, DominatorTree *, const TargetLibraryInfo *, Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *, OptimizationRemarkEmitter *)
Try to promote memory values to scalars by sinking stores out of the loop and moving loads to before ...
Definition: LICM.cpp:1819
static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I)
Returns true if a PHINode is a trivially replaceable with an Instruction.
Definition: LICM.cpp:1236
Helper class for SSA formation on a set of values defined in multiple blocks.
Definition: SSAUpdater.h:38
Diagnostic information for missed-optimization remarks.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Value * getPointerOperand(Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
DiagnosticInfoOptimizationBase::Argument NV
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:82
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:769
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
This class represents lattice values for constants.
Definition: AllocatorList.h:23
This header provides classes for managing a pipeline of passes over loops in LLVM IR...
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition: MemorySSA.h:254
This is the interface for a simple mod/ref and alias analysis over globals.
const_iterator end() const
EltTy front() const
bool hasMetadataOtherThanDebugLoc() const
Return true if this instruction has metadata attached to it other than a debug location.
Definition: Instruction.h:228
void dropUnknownNonDebugMetadata(ArrayRef< unsigned > KnownIDs)
Drop all unknown metadata except for debug locations.
Definition: Metadata.cpp:1198
iterator end()
Definition: Function.h:657
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
static constexpr LocationSize unknown()
bool hasDedicatedExits() const
Return true if no exit block for the loop has a predecessor that is outside the loop.
Definition: LoopInfoImpl.h:85
bool isLCSSAForm(DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:176
const AccessList * getBlockAccesses(const BasicBlock *BB) const
Return the list of MemoryAccess&#39;s for a given basic block.
Definition: MemorySSA.h:750
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value...
Definition: SSAUpdater.cpp:71
The main scalar evolution driver.
static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT, LoopInfo *LI, const Loop *CurLoop, LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU)
Definition: LICM.cpp:1438
This class represents a function call, abstracting a target machine&#39;s calling convention.
This file contains the declarations for metadata subclasses.
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this load instruction.
Definition: Instructions.h:253
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:173
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:629
bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, TargetLibraryInfo *, TargetTransformInfo *, Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *, bool, int &, OptimizationRemarkEmitter *)
Walk the specified region of the CFG (defined by all blocks dominated by the specified block...
Definition: LICM.cpp:463
bool salvageDebugInfo(Instruction &I)
Assuming the instruction I is going to be deleted, attempt to salvage debug users of I by writing the...
Definition: Local.cpp:1603
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:709
void insertUse(MemoryUse *Use)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1185
BasicBlock * getSuccessor(unsigned i) const
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop, const TargetTransformInfo *TTI)
Return true if the instruction is free in the loop.
Definition: LICM.cpp:1245
STATISTIC(NumFunctions, "Total number of functions")
A debug info location.
Definition: DebugLoc.h:33
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:230
F(f)
An instruction for reading from memory.
Definition: Instructions.h:167
Hexagon Common GEP
Value * getCondition() const
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:137
iv Induction Variable Users
Definition: IVUsers.cpp:51
void reserve(size_type N)
Definition: SmallVector.h:369
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Definition: TinyPtrVector.h:30
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:32
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:299
op_iterator op_begin()
Definition: User.h:229
bool hasLoopInvariantOperands(const Instruction *I) const
Return true if all the operands of the specified instruction are loop invariant.
Definition: LoopInfo.cpp:63
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
Definition: LoopInfoImpl.h:703
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
static bool pointerInvalidatedByLoop(MemoryLocation MemLoc, AliasSetTracker *CurAST, Loop *CurLoop, AliasAnalysis *AA)
Definition: LICM.cpp:2187
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:268
Loop Invariant Code Motion
Definition: LICM.cpp:291
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1155
bool formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition: LCSSA.cpp:383
Represents read-only accesses to memory.
Definition: MemorySSA.h:316
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:47
AnalysisUsage & addRequired()
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:133
ArrayRef< BasicBlock * > get(BasicBlock *BB)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:951
virtual void computeLoopSafetyInfo(const Loop *CurLoop)
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
Definition: MustExecute.cpp:71
This is the interface for a SCEV-based alias analysis.
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
Type * getPointerElementType() const
Definition: Type.h:375
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:689
This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to give precise answers on "may...
Definition: MustExecute.h:123
bool hasAllowReciprocal() const
Determine whether the allow-reciprocal flag is set.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
virtual bool doFinalization(Module &)
doFinalization - Virtual method overriden by subclasses to do any necessary clean up after all passes...
Definition: Pass.h:109
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:700
uint32_t getTagID() const
Return the tag of this operand bundle as an integer.
Definition: InstrTypes.h:942
const DefsList * getBlockDefs(const BasicBlock *BB) const
Return the list of MemoryDef&#39;s and MemoryPhi&#39;s for a given basic block.
Definition: MemorySSA.h:758
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:285
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:944
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:157
LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:126
virtual bool anyBlockMayThrow() const
Returns true iff any block of the loop for which this info is contains an instruction that may throw ...
Definition: MustExecute.cpp:67
BlockT * getHeader() const
Definition: LoopInfo.h:99
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:102
FunctionModRefBehavior getModRefBehavior(const CallBase *Call)
Return the behavior of the given call site.
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:266
INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false, false) INITIALIZE_PASS_END(LegacyLICMPass
Instruction * clone() const
Create a copy of &#39;this&#39; instruction that is identical in all ways except the following: ...
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref&#39;ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:712
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Definition: LoopInfoImpl.h:250
PredIteratorCache - This class is an extremely trivial cache for predecessor iterator queries...
FunctionModRefBehavior
Summary of how a function affects memory in the program.
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:268
Instruction * getUniqueInstruction()
If this alias set is known to contain a single instruction and only a single unique instruction...
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:82
This class represents a no-op cast from one type to another.
Memory SSA
Definition: MemorySSA.cpp:64
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
An instruction for storing to memory.
Definition: Instructions.h:320
static cl::opt< bool > DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false), cl::desc("Disable memory promotion in LICM pass"))
Memory promotion is enabled by default.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
void copyColors(BasicBlock *New, BasicBlock *Old)
Copy colors of block Old into the block New.
Definition: MustExecute.cpp:29
iterator begin()
Definition: Function.h:655
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
Value * getOperand(unsigned i) const
Definition: User.h:169
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:210
void forgetLoopDispositions(const Loop *L)
Called when the client has changed the disposition of values in this loop.
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:105
bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal=false)
Checks whether the given location points to constant memory, or if OrLocal is true whether it points ...
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:873
void getAAMetadata(AAMDNodes &N, bool Merge=false) const
Fills the AAMDNodes structure with AA metadata from this instruction.
NodeT * getBlock() const
static Instruction * sinkThroughTriviallyReplaceablePHI(PHINode *TPN, Instruction *I, LoopInfo *LI, SmallDenseMap< BasicBlock *, Instruction *, 32 > &SunkCopies, const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop, MemorySSAUpdater *MSSAU)
Definition: LICM.cpp:1402
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock *> Preds, const char *Suffix, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
virtual bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
static cl::opt< int > LICMN2Theshold("licm-n2-threshold", cl::Hidden, cl::init(0), cl::desc("How many instruction to cross product using AA"))
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:189
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:216
virtual bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const =0
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:321
MemorySSAWalker * getSkipSelfWalker()
Definition: MemorySSA.cpp:1493
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
Definition: SetOperations.h:39
void setAAMetadata(const AAMDNodes &N)
Sets the metadata on this instruction from the AAMDNodes structure.
Definition: Metadata.cpp:1264
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
Conditional or Unconditional Branch instruction.
size_t size(BasicBlock *BB) const
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node&#39;s...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:91
This file contains the declarations for the subclasses of Constant, which represent the different fla...
const Instruction & front() const
Definition: BasicBlock.h:280
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Definition: Instruction.h:575
Expected to fold away in lowering.
Constant * ConstantFoldInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstruction - Try to constant fold the specified instruction.
static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU, Loop *CurLoop, int &LicmMssaOptCounter)
Definition: LICM.cpp:2234
Diagnostic information for applied optimization remarks.
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:112
Pass * createLICMPass()
Definition: LICM.cpp:294
Represent the analysis usage information of a pass.
op_iterator op_end()
Definition: User.h:231
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1192
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LICM.cpp:259
AliasSet & getAliasSetFor(const MemoryLocation &MemLoc)
Return the alias set which contains the specified memory location.
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:115
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:99
static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo)
Definition: LICM.cpp:1420
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
auto find_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1213
bool isMod() const
void setAlignment(unsigned Align)
bool onlyReadsMemory(const CallBase *Call)
Checks if the specified call is known to only read from non-volatile memory (or not access memory at ...
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1424
MemoryAccess * createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, MemorySSA::InsertionPlace Point)
Create a MemoryAccess in MemorySSA at a specified point in a block, with a specified clobbering defin...
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT, Loop *CurLoop)
Definition: LICM.cpp:949
size_t size() const
Definition: SmallVector.h:52
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
This function does not perform any non-local loads or stores to memory.
void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, MemorySSA::InsertionPlace Where)
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:57
static cl::opt< unsigned > AccessCapForMSSAPromotion("max-acc-licm-promotion", cl::init(250), cl::Hidden, cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no " "effect. When MSSA in LICM is enabled, then this is the maximum " "number of accesses allowed to be present in a loop in order to " "enable memory promotion."))
static cl::opt< uint32_t > MaxNumUsesTraversed("licm-max-num-uses-traversed", cl::Hidden, cl::init(8), cl::desc("Max num uses visited for identifying load " "invariance in loop using invariant start (default = 8)"))
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:50
const Value * Ptr
The address of the start of the location.
Representation for a specific memory location.
size_type size() const
Definition: SmallPtrSet.h:92
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:333
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:109
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:297
static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE)
When an instruction is found to only use loop invariant operands that is safe to hoist, this instruction is called to do the dirty work.
Definition: LICM.cpp:1612
Iterator for intrusive lists based on ilist_node.
void removeInstruction(const Instruction *Inst)
Inform safety info that we are planning to remove the instruction Inst from its block.
Definition: MustExecute.cpp:91
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
bool doesNotWriteMemoryBefore(const BasicBlock *BB, const Loop *CurLoop) const
Returns true if we could not execute a memory-modifying instruction before we enter BB under assumpti...
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
#define DEBUG_TYPE
Definition: LICM.cpp:78
BlockVerifier::State From
static cl::opt< bool > ControlFlowHoisting("licm-control-flow-hoisting", cl::Hidden, cl::init(false), cl::desc("Enable control flow (and PHI) hoisting in LICM"))
void verifyMemorySSA() const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1773
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:377
iterator end()
Definition: BasicBlock.h:270
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1153
Helper class for promoting a collection of loads and stores into SSA Form using the SSAUpdater...
Definition: SSAUpdater.h:136
void copyValue(Value *From, Value *To)
This method should be used whenever a preexisting value in the program is copied or cloned...
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:248
Provides information about what library functions are available for the current target.
iterator begin() const
Definition: LoopInfo.h:141
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:749
void wireOldPredecessorsToNewImmediatePredecessor(BasicBlock *Old, BasicBlock *New, ArrayRef< BasicBlock *> Preds, bool IdenticalEdgesWereMerged=true)
A new empty BasicBlock (New) now branches directly to Old.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:643
BasicBlock * getBlock() const
Definition: MemorySSA.h:156
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
bool isConditional() const
static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, TargetTransformInfo *TTI, bool &FreeInLoop)
Return true if the only users of this instruction are outside of the loop.
Definition: LICM.cpp:1273
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:124
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:694
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE, bool FreeInLoop)
When an instruction is found to only be used outside of the loop, this function moves it to the exit ...
Definition: LICM.cpp:1510
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental.guard intrinsic.
Definition: GuardUtils.cpp:17
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
Class that has the common methods + fields of memory uses/defs.
Definition: MemorySSA.h:244
A lightweight accessor for an operand bundle meant to be passed around by value.
Definition: InstrTypes.h:914
iterator_range< user_iterator > users()
Definition: Value.h:399
static bool isSafeToExecuteUnconditionally(Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE, const Instruction *CtxI=nullptr)
Only sink or hoist an instruction if it is not a trapping instruction, or if the instruction is known...
Definition: LICM.cpp:1658
bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, TargetLibraryInfo *, Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *, bool, int &, OptimizationRemarkEmitter *)
Walk the specified region of the CFG (defined by all blocks dominated by the specified block...
Definition: LICM.cpp:763
static cl::opt< int > LicmMssaOptCap("licm-mssa-optimization-cap", cl::init(100), cl::Hidden, cl::desc("Enable imprecision in LICM in pathological cases, in exchange " "for faster compile. Caps the MemorySSA clobbering calls."))
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:593
LoopT * getParentLoop() const
Definition: LoopInfo.h:100
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
Definition: LoopInfo.h:130
LLVM_NODISCARD bool isModSet(const ModRefInfo MRI)
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this store instruction.
Definition: Instructions.h:378
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
static bool onlyAccessesArgPointees(FunctionModRefBehavior MRB)
Checks if functions with the specified behavior are known to read and write at most from objects poin...
iterator insert(iterator where, pointer New)
Definition: ilist.h:226
iterator begin() const
Definition: SmallPtrSet.h:396
int getUserCost(const User *U, ArrayRef< const Value *> Operands) const
Estimate the cost of a given IR user when lowered.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:464
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:148
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
unsigned pred_size(const BasicBlock *BB)
Get the number of predecessors of BB.
Definition: CFG.h:121
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
user_iterator_impl< User > user_iterator
Definition: Value.h:368
bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast=false)
Tests if a value is a call or invoke to a library function that allocates memory (either malloc...
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass&#39;s AnalysisUsage.
Definition: LoopUtils.cpp:136
licm
Definition: LICM.cpp:291
MemoryUseOrDef * createMemoryAccessAfter(Instruction *I, MemoryAccess *Definition, MemoryAccess *InsertPt)
iterator end() const
Definition: LoopInfo.h:142
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:322
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:730
Captures loop safety information.
Definition: MustExecute.h:47
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:171
const_iterator begin() const
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
Definition: LoopIterator.h:172
SmallVector< DomTreeNode *, 16 > collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop)
Does a BFS from a given node to all of its children inside a given loop.
Definition: LoopUtils.cpp:432
bool empty() const
Definition: LoopInfo.h:145
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:290
MemoryAccess * getClobberingMemoryAccess(const Instruction *I)
Given a memory Mod/Ref/ModRef&#39;ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1011
static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI)
Little predicate that returns true if the specified basic block is in a subloop of the current one...
Definition: LICM.cpp:2252
void removeMemoryAccess(MemoryAccess *, bool OptimizePhis=false)
Remove a MemoryAccess from MemorySSA, including updating all definitions and uses.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:375
bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
void insertDef(MemoryDef *Def, bool RenameUses=false)
Insert a definition into the MemorySSA IR.
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
Definition: Local.cpp:349
LLVM Value Representation.
Definition: Value.h:72
void setAlignment(unsigned Align)
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:444
bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU, bool TargetExecutesOncePerLoop, bool NoOfMemAccessesTooLarge, int *LicmMssaOptCap=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Returns true if is legal to hoist or sink this instruction disregarding the possible introduction of ...
Definition: LICM.cpp:1049
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:86
void initializeLegacyLICMPassPass(PassRegistry &)
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:586
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:969
void getUniqueExitBlocks(SmallVectorImpl< BlockT *> &ExitBlocks) const
Return all unique successor blocks of this loop.
Definition: LoopInfoImpl.h:99
This is the interface for LLVM&#39;s primary stateless and local alias analysis.
A container for analyses that lazily runs them and caches their results.
static Instruction * CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI, const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU)
Definition: LICM.cpp:1305
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:259
bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, const DataLayout &DL, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested...
Definition: Loads.cpp:128
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopIterator.h:180
ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc)
getModRefInfo (for call sites) - Return information about whether a particular call site modifies or ...
#define LLVM_DEBUG(X)
Definition: Debug.h:122
op_range incoming_values()
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:155
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:173
The optimization diagnostic interface.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition: BasicBlock.cpp:120
bool use_empty() const
Definition: Value.h:322
unsigned size() const
void deleteValue(Value *PtrVal)
This method is used to remove a pointer value from the AliasSetTracker entirely.
loop versioning Loop Versioning For LICM
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
const BasicBlock * getParent() const
Definition: Instruction.h:66
static void moveInstructionBefore(Instruction &I, Instruction &Dest, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater *MSSAU)
Definition: LICM.cpp:1390
cl::opt< bool > EnableMSSALoopDependency
Enables memory ssa as a dependency for loop passes.
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, unsigned MaxUsesToExplore=DefaultMaxUsesToExplore)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
user_iterator user_end()
Definition: Value.h:383
bool mayReadOrWriteMemory() const
Return true if this instruction may read or write memory.
Definition: Instruction.h:535
const DenseMap< BasicBlock *, ColorVector > & getBlockColors() const
Returns block colors map that is used to update funclet operand bundles.
Definition: MustExecute.cpp:25