LLVM  10.0.0svn
LICM.cpp
Go to the documentation of this file.
1 //===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs loop invariant code motion, attempting to remove as much
10 // code from the body of a loop as possible. It does this by either hoisting
11 // code into the preheader block, or by sinking code to the exit blocks if it is
12 // safe. This pass also promotes must-aliased memory locations in the loop to
13 // live in registers, thus hoisting and sinking "invariant" loads and stores.
14 //
15 // This pass uses alias analysis for two purposes:
16 //
17 // 1. Moving loop invariant loads and calls out of loops. If we can determine
18 // that a load or call inside of a loop never aliases anything stored to,
19 // we can hoist it or sink it like any other instruction.
20 // 2. Scalar Promotion of Memory - If there is a store instruction inside of
21 // the loop, we try to move the store to happen AFTER the loop instead of
22 // inside of the loop. This can only happen if a few conditions are true:
23 // A. The pointer stored through is loop invariant
24 // B. There are no stores or loads in the loop which _may_ alias the
25 // pointer. There are no calls in the loop which mod/ref the pointer.
26 // If these conditions are true, we can promote the loads and stores in the
27 // loop of the pointer to use a temporary alloca'd variable. We then use
28 // the SSAUpdater to construct the appropriate SSA form for the value.
29 //
30 //===----------------------------------------------------------------------===//
31 
33 #include "llvm/ADT/SetOperations.h"
34 #include "llvm/ADT/Statistic.h"
42 #include "llvm/Analysis/Loads.h"
43 #include "llvm/Analysis/LoopInfo.h"
45 #include "llvm/Analysis/LoopPass.h"
54 #include "llvm/IR/CFG.h"
55 #include "llvm/IR/Constants.h"
56 #include "llvm/IR/DataLayout.h"
58 #include "llvm/IR/DerivedTypes.h"
59 #include "llvm/IR/Dominators.h"
60 #include "llvm/IR/Instructions.h"
61 #include "llvm/IR/IntrinsicInst.h"
62 #include "llvm/IR/LLVMContext.h"
63 #include "llvm/IR/Metadata.h"
64 #include "llvm/IR/PatternMatch.h"
67 #include "llvm/Support/Debug.h"
69 #include "llvm/Transforms/Scalar.h"
75 #include <algorithm>
76 #include <utility>
77 using namespace llvm;
78 
79 #define DEBUG_TYPE "licm"
80 
81 STATISTIC(NumCreatedBlocks, "Number of blocks created");
82 STATISTIC(NumClonedBranches, "Number of branches cloned");
83 STATISTIC(NumSunk, "Number of instructions sunk out of loop");
84 STATISTIC(NumHoisted, "Number of instructions hoisted out of loop");
85 STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
86 STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
87 STATISTIC(NumPromoted, "Number of memory locations promoted to registers");
88 
89 /// Memory promotion is enabled by default.
90 static cl::opt<bool>
91  DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false),
92  cl::desc("Disable memory promotion in LICM pass"));
93 
95  "licm-control-flow-hoisting", cl::Hidden, cl::init(false),
96  cl::desc("Enable control flow (and PHI) hoisting in LICM"));
97 
99  "licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
100  cl::desc("Max num uses visited for identifying load "
101  "invariance in loop using invariant start (default = 8)"));
102 
103 // Default value of zero implies we use the regular alias set tracker mechanism
104 // instead of the cross product using AA to identify aliasing of the memory
105 // location we are interested in.
106 static cl::opt<int>
107 LICMN2Theshold("licm-n2-threshold", cl::Hidden, cl::init(0),
108  cl::desc("How many instruction to cross product using AA"));
109 
110 // Experimental option to allow imprecision in LICM in pathological cases, in
111 // exchange for faster compile. This is to be removed if MemorySSA starts to
112 // address the same issue. This flag applies only when LICM uses MemorySSA
113 // instead on AliasSetTracker. LICM calls MemorySSAWalker's
114 // getClobberingMemoryAccess, up to the value of the Cap, getting perfect
115 // accuracy. Afterwards, LICM will call into MemorySSA's getDefiningAccess,
116 // which may not be precise, since optimizeUses is capped. The result is
117 // correct, but we may not get as "far up" as possible to get which access is
118 // clobbering the one queried.
120  "licm-mssa-optimization-cap", cl::init(100), cl::Hidden,
121  cl::desc("Enable imprecision in LICM in pathological cases, in exchange "
122  "for faster compile. Caps the MemorySSA clobbering calls."));
123 
124 // Experimentally, memory promotion carries less importance than sinking and
125 // hoisting. Limit when we do promotion when using MemorySSA, in order to save
126 // compile time.
128  "licm-mssa-max-acc-promotion", cl::init(250), cl::Hidden,
129  cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no "
130  "effect. When MSSA in LICM is enabled, then this is the maximum "
131  "number of accesses allowed to be present in a loop in order to "
132  "enable memory promotion."));
133 
134 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
135 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
136  const LoopSafetyInfo *SafetyInfo,
137  TargetTransformInfo *TTI, bool &FreeInLoop);
138 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
139  BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
141 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
142  const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
145  const DominatorTree *DT,
146  const Loop *CurLoop,
147  const LoopSafetyInfo *SafetyInfo,
149  const Instruction *CtxI = nullptr);
150 static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
151  AliasSetTracker *CurAST, Loop *CurLoop,
152  AliasAnalysis *AA);
154  Loop *CurLoop,
155  SinkAndHoistLICMFlags &Flags);
157  Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
158  const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU);
159 
160 static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
161  AliasSetTracker *AST, MemorySSAUpdater *MSSAU);
162 
163 static void moveInstructionBefore(Instruction &I, Instruction &Dest,
164  ICFLoopSafetyInfo &SafetyInfo,
165  MemorySSAUpdater *MSSAU);
166 
167 namespace {
168 struct LoopInvariantCodeMotion {
170  bool runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
172  ScalarEvolution *SE, MemorySSA *MSSA,
173  OptimizationRemarkEmitter *ORE, bool DeleteAST);
174 
175  ASTrackerMapTy &getLoopToAliasSetMap() { return LoopToAliasSetMap; }
176  LoopInvariantCodeMotion(unsigned LicmMssaOptCap,
177  unsigned LicmMssaNoAccForPromotionCap)
178  : LicmMssaOptCap(LicmMssaOptCap),
179  LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {}
180 
181 private:
182  ASTrackerMapTy LoopToAliasSetMap;
183  unsigned LicmMssaOptCap;
184  unsigned LicmMssaNoAccForPromotionCap;
185 
186  std::unique_ptr<AliasSetTracker>
187  collectAliasInfoForLoop(Loop *L, LoopInfo *LI, AliasAnalysis *AA);
188  std::unique_ptr<AliasSetTracker>
189  collectAliasInfoForLoopWithMSSA(Loop *L, AliasAnalysis *AA,
190  MemorySSAUpdater *MSSAU);
191 };
192 
193 struct LegacyLICMPass : public LoopPass {
194  static char ID; // Pass identification, replacement for typeid
195  LegacyLICMPass(
196  unsigned LicmMssaOptCap = SetLicmMssaOptCap,
197  unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap)
198  : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap) {
200  }
201 
202  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
203  if (skipLoop(L)) {
204  // If we have run LICM on a previous loop but now we are skipping
205  // (because we've hit the opt-bisect limit), we need to clear the
206  // loop alias information.
207  LICM.getLoopToAliasSetMap().clear();
208  return false;
209  }
210 
211  auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
213  ? (&getAnalysis<MemorySSAWrapperPass>().getMSSA())
214  : nullptr;
215  // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
216  // pass. Function analyses need to be preserved across loop transformations
217  // but ORE cannot be preserved (see comment before the pass definition).
219  return LICM.runOnLoop(L,
220  &getAnalysis<AAResultsWrapperPass>().getAAResults(),
221  &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
222  &getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
223  &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
224  &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
225  *L->getHeader()->getParent()),
226  SE ? &SE->getSE() : nullptr, MSSA, &ORE, false);
227  }
228 
229  /// This transformation requires natural loop information & requires that
230  /// loop preheaders be inserted into the CFG...
231  ///
232  void getAnalysisUsage(AnalysisUsage &AU) const override {
239  }
242  }
243 
245 
246  bool doFinalization() override {
247  auto &AliasSetMap = LICM.getLoopToAliasSetMap();
248  // All loops in the AliasSetMap should be cleaned up already. The only case
249  // where we fail to do so is if an outer loop gets deleted before LICM
250  // visits it.
251  assert(all_of(AliasSetMap,
253  return !KV.first->getParentLoop();
254  }) &&
255  "Didn't free loop alias sets");
256  AliasSetMap.clear();
257  return false;
258  }
259 
260 private:
261  LoopInvariantCodeMotion LICM;
262 
263  /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
264  void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To,
265  Loop *L) override;
266 
267  /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
268  /// set.
269  void deleteAnalysisValue(Value *V, Loop *L) override;
270 
271  /// Simple Analysis hook. Delete loop L from alias set map.
272  void deleteAnalysisLoop(Loop *L) override;
273 };
274 } // namespace
275 
278  const auto &FAM =
279  AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
280  Function *F = L.getHeader()->getParent();
281 
282  auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
283  // FIXME: This should probably be optional rather than required.
284  if (!ORE)
285  report_fatal_error("LICM: OptimizationRemarkEmitterAnalysis not "
286  "cached at a higher level");
287 
288  LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
289  if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.TTI, &AR.SE,
290  AR.MSSA, ORE, true))
291  return PreservedAnalyses::all();
292 
293  auto PA = getLoopPassPreservedAnalyses();
294 
295  PA.preserve<DominatorTreeAnalysis>();
296  PA.preserve<LoopAnalysis>();
298  PA.preserve<MemorySSAAnalysis>();
299 
300  return PA;
301 }
302 
303 char LegacyLICMPass::ID = 0;
304 INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion",
305  false, false)
310 INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
311  false)
312 
313 Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }
314 Pass *llvm::createLICMPass(unsigned LicmMssaOptCap,
315  unsigned LicmMssaNoAccForPromotionCap) {
316  return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
317 }
318 
319 /// Hoist expressions out of the specified loop. Note, alias info for inner
320 /// loop is not preserved so it is not a good idea to run LICM multiple
321 /// times on one loop.
322 /// We should delete AST for inner loops in the new pass manager to avoid
323 /// memory leak.
324 ///
325 bool LoopInvariantCodeMotion::runOnLoop(
326  Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
328  MemorySSA *MSSA, OptimizationRemarkEmitter *ORE, bool DeleteAST) {
329  bool Changed = false;
330 
331  assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
332 
333  std::unique_ptr<AliasSetTracker> CurAST;
334  std::unique_ptr<MemorySSAUpdater> MSSAU;
335  bool NoOfMemAccTooLarge = false;
336  unsigned LicmMssaOptCounter = 0;
337 
338  if (!MSSA) {
339  LLVM_DEBUG(dbgs() << "LICM: Using Alias Set Tracker.\n");
340  CurAST = collectAliasInfoForLoop(L, LI, AA);
341  } else {
342  LLVM_DEBUG(dbgs() << "LICM: Using MemorySSA.\n");
343  MSSAU = make_unique<MemorySSAUpdater>(MSSA);
344 
345  unsigned AccessCapCount = 0;
346  for (auto *BB : L->getBlocks()) {
347  if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
348  for (const auto &MA : *Accesses) {
349  (void)MA;
350  AccessCapCount++;
351  if (AccessCapCount > LicmMssaNoAccForPromotionCap) {
352  NoOfMemAccTooLarge = true;
353  break;
354  }
355  }
356  }
357  if (NoOfMemAccTooLarge)
358  break;
359  }
360  }
361 
362  // Get the preheader block to move instructions into...
363  BasicBlock *Preheader = L->getLoopPreheader();
364 
365  // Compute loop safety information.
366  ICFLoopSafetyInfo SafetyInfo(DT);
367  SafetyInfo.computeLoopSafetyInfo(L);
368 
369  // We want to visit all of the instructions in this loop... that are not parts
370  // of our subloops (they have already had their invariants hoisted out of
371  // their loop, into this loop, so there is no need to process the BODIES of
372  // the subloops).
373  //
374  // Traverse the body of the loop in depth first order on the dominator tree so
375  // that we are guaranteed to see definitions before we see uses. This allows
376  // us to sink instructions in one pass, without iteration. After sinking
377  // instructions, we perform another pass to hoist them out of the loop.
378  SinkAndHoistLICMFlags Flags = {NoOfMemAccTooLarge, LicmMssaOptCounter,
379  LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
380  /*IsSink=*/true};
381  if (L->hasDedicatedExits())
382  Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
383  CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE);
384  Flags.IsSink = false;
385  if (Preheader)
386  Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
387  CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE);
388 
389  // Now that all loop invariants have been removed from the loop, promote any
390  // memory references to scalars that we can.
391  // Don't sink stores from loops without dedicated block exits. Exits
392  // containing indirect branches are not transformed by loop simplify,
393  // make sure we catch that. An additional load may be generated in the
394  // preheader for SSA updater, so also avoid sinking when no preheader
395  // is available.
396  if (!DisablePromotion && Preheader && L->hasDedicatedExits() &&
397  !NoOfMemAccTooLarge) {
398  // Figure out the loop exits and their insertion points
399  SmallVector<BasicBlock *, 8> ExitBlocks;
400  L->getUniqueExitBlocks(ExitBlocks);
401 
402  // We can't insert into a catchswitch.
403  bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) {
404  return isa<CatchSwitchInst>(Exit->getTerminator());
405  });
406 
407  if (!HasCatchSwitch) {
409  SmallVector<MemoryAccess *, 8> MSSAInsertPts;
410  InsertPts.reserve(ExitBlocks.size());
411  if (MSSAU)
412  MSSAInsertPts.reserve(ExitBlocks.size());
413  for (BasicBlock *ExitBlock : ExitBlocks) {
414  InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
415  if (MSSAU)
416  MSSAInsertPts.push_back(nullptr);
417  }
418 
419  PredIteratorCache PIC;
420 
421  bool Promoted = false;
422 
423  // Build an AST using MSSA.
424  if (!CurAST.get())
425  CurAST = collectAliasInfoForLoopWithMSSA(L, AA, MSSAU.get());
426 
427  // Loop over all of the alias sets in the tracker object.
428  for (AliasSet &AS : *CurAST) {
429  // We can promote this alias set if it has a store, if it is a "Must"
430  // alias set, if the pointer is loop invariant, and if we are not
431  // eliminating any volatile loads or stores.
432  if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
433  !L->isLoopInvariant(AS.begin()->getValue()))
434  continue;
435 
436  assert(
437  !AS.empty() &&
438  "Must alias set should have at least one pointer element in it!");
439 
440  SmallSetVector<Value *, 8> PointerMustAliases;
441  for (const auto &ASI : AS)
442  PointerMustAliases.insert(ASI.getValue());
443 
444  Promoted |= promoteLoopAccessesToScalars(
445  PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
446  DT, TLI, L, CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
447  }
448 
449  // Once we have promoted values across the loop body we have to
450  // recursively reform LCSSA as any nested loop may now have values defined
451  // within the loop used in the outer loop.
452  // FIXME: This is really heavy handed. It would be a bit better to use an
453  // SSAUpdater strategy during promotion that was LCSSA aware and reformed
454  // it as it went.
455  if (Promoted)
456  formLCSSARecursively(*L, *DT, LI, SE);
457 
458  Changed |= Promoted;
459  }
460  }
461 
462  // Check that neither this loop nor its parent have had LCSSA broken. LICM is
463  // specifically moving instructions across the loop boundary and so it is
464  // especially in need of sanity checking here.
465  assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!");
466  assert((!L->getParentLoop() || L->getParentLoop()->isLCSSAForm(*DT)) &&
467  "Parent loop not left in LCSSA form after LICM!");
468 
469  // If this loop is nested inside of another one, save the alias information
470  // for when we process the outer loop.
471  if (!MSSAU.get() && CurAST.get() && L->getParentLoop() && !DeleteAST)
472  LoopToAliasSetMap[L] = std::move(CurAST);
473 
474  if (MSSAU.get() && VerifyMemorySSA)
475  MSSAU->getMemorySSA()->verifyMemorySSA();
476 
477  if (Changed && SE)
478  SE->forgetLoopDispositions(L);
479  return Changed;
480 }
481 
482 /// Walk the specified region of the CFG (defined by all blocks dominated by
483 /// the specified block, and that are in the current loop) in reverse depth
484 /// first order w.r.t the DominatorTree. This allows us to visit uses before
485 /// definitions, allowing us to sink a loop body in one pass without iteration.
486 ///
489  TargetTransformInfo *TTI, Loop *CurLoop,
490  AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
491  ICFLoopSafetyInfo *SafetyInfo,
492  SinkAndHoistLICMFlags &Flags,
494 
495  // Verify inputs.
496  assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
497  CurLoop != nullptr && SafetyInfo != nullptr &&
498  "Unexpected input to sinkRegion.");
499  assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
500  "Either AliasSetTracker or MemorySSA should be initialized.");
501 
502  // We want to visit children before parents. We will enque all the parents
503  // before their children in the worklist and process the worklist in reverse
504  // order.
506 
507  bool Changed = false;
508  for (DomTreeNode *DTN : reverse(Worklist)) {
509  BasicBlock *BB = DTN->getBlock();
510  // Only need to process the contents of this block if it is not part of a
511  // subloop (which would already have been processed).
512  if (inSubLoop(BB, CurLoop, LI))
513  continue;
514 
515  for (BasicBlock::iterator II = BB->end(); II != BB->begin();) {
516  Instruction &I = *--II;
517 
518  // If the instruction is dead, we would try to sink it because it isn't
519  // used in the loop, instead, just delete it.
520  if (isInstructionTriviallyDead(&I, TLI)) {
521  LLVM_DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
522  salvageDebugInfo(I);
523  ++II;
524  eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
525  Changed = true;
526  continue;
527  }
528 
529  // Check to see if we can sink this instruction to the exit blocks
530  // of the loop. We can do this if the all users of the instruction are
531  // outside of the loop. In this case, it doesn't even matter if the
532  // operands of the instruction are loop invariant.
533  //
534  bool FreeInLoop = false;
535  if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) &&
536  canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags,
537  ORE) &&
538  !I.mayHaveSideEffects()) {
539  if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE)) {
540  if (!FreeInLoop) {
541  ++II;
542  eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
543  }
544  Changed = true;
545  }
546  }
547  }
548  }
549  if (MSSAU && VerifyMemorySSA)
550  MSSAU->getMemorySSA()->verifyMemorySSA();
551  return Changed;
552 }
553 
554 namespace {
555 // This is a helper class for hoistRegion to make it able to hoist control flow
556 // in order to be able to hoist phis. The way this works is that we initially
557 // start hoisting to the loop preheader, and when we see a loop invariant branch
558 // we make note of this. When we then come to hoist an instruction that's
559 // conditional on such a branch we duplicate the branch and the relevant control
560 // flow, then hoist the instruction into the block corresponding to its original
561 // block in the duplicated control flow.
562 class ControlFlowHoister {
563 private:
564  // Information about the loop we are hoisting from
565  LoopInfo *LI;
566  DominatorTree *DT;
567  Loop *CurLoop;
568  MemorySSAUpdater *MSSAU;
569 
570  // A map of blocks in the loop to the block their instructions will be hoisted
571  // to.
572  DenseMap<BasicBlock *, BasicBlock *> HoistDestinationMap;
573 
574  // The branches that we can hoist, mapped to the block that marks a
575  // convergence point of their control flow.
576  DenseMap<BranchInst *, BasicBlock *> HoistableBranches;
577 
578 public:
579  ControlFlowHoister(LoopInfo *LI, DominatorTree *DT, Loop *CurLoop,
580  MemorySSAUpdater *MSSAU)
581  : LI(LI), DT(DT), CurLoop(CurLoop), MSSAU(MSSAU) {}
582 
583  void registerPossiblyHoistableBranch(BranchInst *BI) {
584  // We can only hoist conditional branches with loop invariant operands.
585  if (!ControlFlowHoisting || !BI->isConditional() ||
586  !CurLoop->hasLoopInvariantOperands(BI))
587  return;
588 
589  // The branch destinations need to be in the loop, and we don't gain
590  // anything by duplicating conditional branches with duplicate successors,
591  // as it's essentially the same as an unconditional branch.
592  BasicBlock *TrueDest = BI->getSuccessor(0);
593  BasicBlock *FalseDest = BI->getSuccessor(1);
594  if (!CurLoop->contains(TrueDest) || !CurLoop->contains(FalseDest) ||
595  TrueDest == FalseDest)
596  return;
597 
598  // We can hoist BI if one branch destination is the successor of the other,
599  // or both have common successor which we check by seeing if the
600  // intersection of their successors is non-empty.
601  // TODO: This could be expanded to allowing branches where both ends
602  // eventually converge to a single block.
603  SmallPtrSet<BasicBlock *, 4> TrueDestSucc, FalseDestSucc;
604  TrueDestSucc.insert(succ_begin(TrueDest), succ_end(TrueDest));
605  FalseDestSucc.insert(succ_begin(FalseDest), succ_end(FalseDest));
606  BasicBlock *CommonSucc = nullptr;
607  if (TrueDestSucc.count(FalseDest)) {
608  CommonSucc = FalseDest;
609  } else if (FalseDestSucc.count(TrueDest)) {
610  CommonSucc = TrueDest;
611  } else {
612  set_intersect(TrueDestSucc, FalseDestSucc);
613  // If there's one common successor use that.
614  if (TrueDestSucc.size() == 1)
615  CommonSucc = *TrueDestSucc.begin();
616  // If there's more than one pick whichever appears first in the block list
617  // (we can't use the value returned by TrueDestSucc.begin() as it's
618  // unpredicatable which element gets returned).
619  else if (!TrueDestSucc.empty()) {
620  Function *F = TrueDest->getParent();
621  auto IsSucc = [&](BasicBlock &BB) { return TrueDestSucc.count(&BB); };
622  auto It = std::find_if(F->begin(), F->end(), IsSucc);
623  assert(It != F->end() && "Could not find successor in function");
624  CommonSucc = &*It;
625  }
626  }
627  // The common successor has to be dominated by the branch, as otherwise
628  // there will be some other path to the successor that will not be
629  // controlled by this branch so any phi we hoist would be controlled by the
630  // wrong condition. This also takes care of avoiding hoisting of loop back
631  // edges.
632  // TODO: In some cases this could be relaxed if the successor is dominated
633  // by another block that's been hoisted and we can guarantee that the
634  // control flow has been replicated exactly.
635  if (CommonSucc && DT->dominates(BI, CommonSucc))
636  HoistableBranches[BI] = CommonSucc;
637  }
638 
639  bool canHoistPHI(PHINode *PN) {
640  // The phi must have loop invariant operands.
641  if (!ControlFlowHoisting || !CurLoop->hasLoopInvariantOperands(PN))
642  return false;
643  // We can hoist phis if the block they are in is the target of hoistable
644  // branches which cover all of the predecessors of the block.
645  SmallPtrSet<BasicBlock *, 8> PredecessorBlocks;
646  BasicBlock *BB = PN->getParent();
647  for (BasicBlock *PredBB : predecessors(BB))
648  PredecessorBlocks.insert(PredBB);
649  // If we have less predecessor blocks than predecessors then the phi will
650  // have more than one incoming value for the same block which we can't
651  // handle.
652  // TODO: This could be handled be erasing some of the duplicate incoming
653  // values.
654  if (PredecessorBlocks.size() != pred_size(BB))
655  return false;
656  for (auto &Pair : HoistableBranches) {
657  if (Pair.second == BB) {
658  // Which blocks are predecessors via this branch depends on if the
659  // branch is triangle-like or diamond-like.
660  if (Pair.first->getSuccessor(0) == BB) {
661  PredecessorBlocks.erase(Pair.first->getParent());
662  PredecessorBlocks.erase(Pair.first->getSuccessor(1));
663  } else if (Pair.first->getSuccessor(1) == BB) {
664  PredecessorBlocks.erase(Pair.first->getParent());
665  PredecessorBlocks.erase(Pair.first->getSuccessor(0));
666  } else {
667  PredecessorBlocks.erase(Pair.first->getSuccessor(0));
668  PredecessorBlocks.erase(Pair.first->getSuccessor(1));
669  }
670  }
671  }
672  // PredecessorBlocks will now be empty if for every predecessor of BB we
673  // found a hoistable branch source.
674  return PredecessorBlocks.empty();
675  }
676 
677  BasicBlock *getOrCreateHoistedBlock(BasicBlock *BB) {
678  if (!ControlFlowHoisting)
679  return CurLoop->getLoopPreheader();
680  // If BB has already been hoisted, return that
681  if (HoistDestinationMap.count(BB))
682  return HoistDestinationMap[BB];
683 
684  // Check if this block is conditional based on a pending branch
685  auto HasBBAsSuccessor =
687  return BB != Pair.second && (Pair.first->getSuccessor(0) == BB ||
688  Pair.first->getSuccessor(1) == BB);
689  };
690  auto It = std::find_if(HoistableBranches.begin(), HoistableBranches.end(),
691  HasBBAsSuccessor);
692 
693  // If not involved in a pending branch, hoist to preheader
694  BasicBlock *InitialPreheader = CurLoop->getLoopPreheader();
695  if (It == HoistableBranches.end()) {
696  LLVM_DEBUG(dbgs() << "LICM using " << InitialPreheader->getName()
697  << " as hoist destination for " << BB->getName()
698  << "\n");
699  HoistDestinationMap[BB] = InitialPreheader;
700  return InitialPreheader;
701  }
702  BranchInst *BI = It->first;
703  assert(std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor) ==
704  HoistableBranches.end() &&
705  "BB is expected to be the target of at most one branch");
706 
707  LLVMContext &C = BB->getContext();
708  BasicBlock *TrueDest = BI->getSuccessor(0);
709  BasicBlock *FalseDest = BI->getSuccessor(1);
710  BasicBlock *CommonSucc = HoistableBranches[BI];
711  BasicBlock *HoistTarget = getOrCreateHoistedBlock(BI->getParent());
712 
713  // Create hoisted versions of blocks that currently don't have them
714  auto CreateHoistedBlock = [&](BasicBlock *Orig) {
715  if (HoistDestinationMap.count(Orig))
716  return HoistDestinationMap[Orig];
717  BasicBlock *New =
718  BasicBlock::Create(C, Orig->getName() + ".licm", Orig->getParent());
719  HoistDestinationMap[Orig] = New;
720  DT->addNewBlock(New, HoistTarget);
721  if (CurLoop->getParentLoop())
722  CurLoop->getParentLoop()->addBasicBlockToLoop(New, *LI);
723  ++NumCreatedBlocks;
724  LLVM_DEBUG(dbgs() << "LICM created " << New->getName()
725  << " as hoist destination for " << Orig->getName()
726  << "\n");
727  return New;
728  };
729  BasicBlock *HoistTrueDest = CreateHoistedBlock(TrueDest);
730  BasicBlock *HoistFalseDest = CreateHoistedBlock(FalseDest);
731  BasicBlock *HoistCommonSucc = CreateHoistedBlock(CommonSucc);
732 
733  // Link up these blocks with branches.
734  if (!HoistCommonSucc->getTerminator()) {
735  // The new common successor we've generated will branch to whatever that
736  // hoist target branched to.
737  BasicBlock *TargetSucc = HoistTarget->getSingleSuccessor();
738  assert(TargetSucc && "Expected hoist target to have a single successor");
739  HoistCommonSucc->moveBefore(TargetSucc);
740  BranchInst::Create(TargetSucc, HoistCommonSucc);
741  }
742  if (!HoistTrueDest->getTerminator()) {
743  HoistTrueDest->moveBefore(HoistCommonSucc);
744  BranchInst::Create(HoistCommonSucc, HoistTrueDest);
745  }
746  if (!HoistFalseDest->getTerminator()) {
747  HoistFalseDest->moveBefore(HoistCommonSucc);
748  BranchInst::Create(HoistCommonSucc, HoistFalseDest);
749  }
750 
751  // If BI is being cloned to what was originally the preheader then
752  // HoistCommonSucc will now be the new preheader.
753  if (HoistTarget == InitialPreheader) {
754  // Phis in the loop header now need to use the new preheader.
755  InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc);
756  if (MSSAU)
758  HoistTarget->getSingleSuccessor(), HoistCommonSucc, {HoistTarget});
759  // The new preheader dominates the loop header.
760  DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc);
761  DomTreeNode *HeaderNode = DT->getNode(CurLoop->getHeader());
762  DT->changeImmediateDominator(HeaderNode, PreheaderNode);
763  // The preheader hoist destination is now the new preheader, with the
764  // exception of the hoist destination of this branch.
765  for (auto &Pair : HoistDestinationMap)
766  if (Pair.second == InitialPreheader && Pair.first != BI->getParent())
767  Pair.second = HoistCommonSucc;
768  }
769 
770  // Now finally clone BI.
772  HoistTarget->getTerminator(),
773  BranchInst::Create(HoistTrueDest, HoistFalseDest, BI->getCondition()));
774  ++NumClonedBranches;
775 
776  assert(CurLoop->getLoopPreheader() &&
777  "Hoisting blocks should not have destroyed preheader");
778  return HoistDestinationMap[BB];
779  }
780 };
781 } // namespace
782 
783 /// Walk the specified region of the CFG (defined by all blocks dominated by
784 /// the specified block, and that are in the current loop) in depth first
785 /// order w.r.t the DominatorTree. This allows us to visit definitions before
786 /// uses, allowing us to hoist a loop body in one pass without iteration.
787 ///
789  DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
790  AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
791  ICFLoopSafetyInfo *SafetyInfo,
792  SinkAndHoistLICMFlags &Flags,
794  // Verify inputs.
795  assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
796  CurLoop != nullptr && SafetyInfo != nullptr &&
797  "Unexpected input to hoistRegion.");
798  assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
799  "Either AliasSetTracker or MemorySSA should be initialized.");
800 
801  ControlFlowHoister CFH(LI, DT, CurLoop, MSSAU);
802 
803  // Keep track of instructions that have been hoisted, as they may need to be
804  // re-hoisted if they end up not dominating all of their uses.
805  SmallVector<Instruction *, 16> HoistedInstructions;
806 
807  // For PHI hoisting to work we need to hoist blocks before their successors.
808  // We can do this by iterating through the blocks in the loop in reverse
809  // post-order.
810  LoopBlocksRPO Worklist(CurLoop);
811  Worklist.perform(LI);
812  bool Changed = false;
813  for (BasicBlock *BB : Worklist) {
814  // Only need to process the contents of this block if it is not part of a
815  // subloop (which would already have been processed).
816  if (inSubLoop(BB, CurLoop, LI))
817  continue;
818 
819  for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
820  Instruction &I = *II++;
821  // Try constant folding this instruction. If all the operands are
822  // constants, it is technically hoistable, but it would be better to
823  // just fold it.
825  &I, I.getModule()->getDataLayout(), TLI)) {
826  LLVM_DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C
827  << '\n');
828  if (CurAST)
829  CurAST->copyValue(&I, C);
830  // FIXME MSSA: Such replacements may make accesses unoptimized (D51960).
832  if (isInstructionTriviallyDead(&I, TLI))
833  eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
834  Changed = true;
835  continue;
836  }
837 
838  // Try hoisting the instruction out to the preheader. We can only do
839  // this if all of the operands of the instruction are loop invariant and
840  // if it is safe to hoist the instruction.
841  // TODO: It may be safe to hoist if we are hoisting to a conditional block
842  // and we have accurately duplicated the control flow from the loop header
843  // to that block.
844  if (CurLoop->hasLoopInvariantOperands(&I) &&
845  canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags,
846  ORE) &&
848  I, DT, CurLoop, SafetyInfo, ORE,
849  CurLoop->getLoopPreheader()->getTerminator())) {
850  hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
851  MSSAU, ORE);
852  HoistedInstructions.push_back(&I);
853  Changed = true;
854  continue;
855  }
856 
857  // Attempt to remove floating point division out of the loop by
858  // converting it to a reciprocal multiplication.
859  if (I.getOpcode() == Instruction::FDiv &&
860  CurLoop->isLoopInvariant(I.getOperand(1)) &&
861  I.hasAllowReciprocal()) {
862  auto Divisor = I.getOperand(1);
863  auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0);
864  auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor);
865  ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags());
866  SafetyInfo->insertInstructionTo(ReciprocalDivisor, I.getParent());
867  ReciprocalDivisor->insertBefore(&I);
868 
869  auto Product =
870  BinaryOperator::CreateFMul(I.getOperand(0), ReciprocalDivisor);
871  Product->setFastMathFlags(I.getFastMathFlags());
872  SafetyInfo->insertInstructionTo(Product, I.getParent());
873  Product->insertAfter(&I);
874  I.replaceAllUsesWith(Product);
875  eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
876 
877  hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB),
878  SafetyInfo, MSSAU, ORE);
879  HoistedInstructions.push_back(ReciprocalDivisor);
880  Changed = true;
881  continue;
882  }
883 
884  auto IsInvariantStart = [&](Instruction &I) {
885  using namespace PatternMatch;
886  return I.use_empty() &&
887  match(&I, m_Intrinsic<Intrinsic::invariant_start>());
888  };
889  auto MustExecuteWithoutWritesBefore = [&](Instruction &I) {
890  return SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) &&
891  SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop);
892  };
893  if ((IsInvariantStart(I) || isGuard(&I)) &&
894  CurLoop->hasLoopInvariantOperands(&I) &&
895  MustExecuteWithoutWritesBefore(I)) {
896  hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
897  MSSAU, ORE);
898  HoistedInstructions.push_back(&I);
899  Changed = true;
900  continue;
901  }
902 
903  if (PHINode *PN = dyn_cast<PHINode>(&I)) {
904  if (CFH.canHoistPHI(PN)) {
905  // Redirect incoming blocks first to ensure that we create hoisted
906  // versions of those blocks before we hoist the phi.
907  for (unsigned int i = 0; i < PN->getNumIncomingValues(); ++i)
908  PN->setIncomingBlock(
909  i, CFH.getOrCreateHoistedBlock(PN->getIncomingBlock(i)));
910  hoist(*PN, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
911  MSSAU, ORE);
912  assert(DT->dominates(PN, BB) && "Conditional PHIs not expected");
913  Changed = true;
914  continue;
915  }
916  }
917 
918  // Remember possibly hoistable branches so we can actually hoist them
919  // later if needed.
920  if (BranchInst *BI = dyn_cast<BranchInst>(&I))
921  CFH.registerPossiblyHoistableBranch(BI);
922  }
923  }
924 
925  // If we hoisted instructions to a conditional block they may not dominate
926  // their uses that weren't hoisted (such as phis where some operands are not
927  // loop invariant). If so make them unconditional by moving them to their
928  // immediate dominator. We iterate through the instructions in reverse order
929  // which ensures that when we rehoist an instruction we rehoist its operands,
930  // and also keep track of where in the block we are rehoisting to to make sure
931  // that we rehoist instructions before the instructions that use them.
932  Instruction *HoistPoint = nullptr;
933  if (ControlFlowHoisting) {
934  for (Instruction *I : reverse(HoistedInstructions)) {
935  if (!llvm::all_of(I->uses(),
936  [&](Use &U) { return DT->dominates(I, U); })) {
937  BasicBlock *Dominator =
938  DT->getNode(I->getParent())->getIDom()->getBlock();
939  if (!HoistPoint || !DT->dominates(HoistPoint->getParent(), Dominator)) {
940  if (HoistPoint)
941  assert(DT->dominates(Dominator, HoistPoint->getParent()) &&
942  "New hoist point expected to dominate old hoist point");
943  HoistPoint = Dominator->getTerminator();
944  }
945  LLVM_DEBUG(dbgs() << "LICM rehoisting to "
946  << HoistPoint->getParent()->getName()
947  << ": " << *I << "\n");
948  moveInstructionBefore(*I, *HoistPoint, *SafetyInfo, MSSAU);
949  HoistPoint = I;
950  Changed = true;
951  }
952  }
953  }
954  if (MSSAU && VerifyMemorySSA)
955  MSSAU->getMemorySSA()->verifyMemorySSA();
956 
957  // Now that we've finished hoisting make sure that LI and DT are still
958  // valid.
959 #ifndef NDEBUG
960  if (Changed) {
962  "Dominator tree verification failed");
963  LI->verify(*DT);
964  }
965 #endif
966 
967  return Changed;
968 }
969 
970 // Return true if LI is invariant within scope of the loop. LI is invariant if
971 // CurLoop is dominated by an invariant.start representing the same memory
972 // location and size as the memory location LI loads from, and also the
973 // invariant.start has no uses.
975  Loop *CurLoop) {
976  Value *Addr = LI->getOperand(0);
977  const DataLayout &DL = LI->getModule()->getDataLayout();
978  const uint32_t LocSizeInBits = DL.getTypeSizeInBits(LI->getType());
979 
980  // if the type is i8 addrspace(x)*, we know this is the type of
981  // llvm.invariant.start operand
982  auto *PtrInt8Ty = PointerType::get(Type::getInt8Ty(LI->getContext()),
983  LI->getPointerAddressSpace());
984  unsigned BitcastsVisited = 0;
985  // Look through bitcasts until we reach the i8* type (this is invariant.start
986  // operand type).
987  while (Addr->getType() != PtrInt8Ty) {
988  auto *BC = dyn_cast<BitCastInst>(Addr);
989  // Avoid traversing high number of bitcast uses.
990  if (++BitcastsVisited > MaxNumUsesTraversed || !BC)
991  return false;
992  Addr = BC->getOperand(0);
993  }
994 
995  unsigned UsesVisited = 0;
996  // Traverse all uses of the load operand value, to see if invariant.start is
997  // one of the uses, and whether it dominates the load instruction.
998  for (auto *U : Addr->users()) {
999  // Avoid traversing for Load operand with high number of users.
1000  if (++UsesVisited > MaxNumUsesTraversed)
1001  return false;
1003  // If there are escaping uses of invariant.start instruction, the load maybe
1004  // non-invariant.
1005  if (!II || II->getIntrinsicID() != Intrinsic::invariant_start ||
1006  !II->use_empty())
1007  continue;
1008  unsigned InvariantSizeInBits =
1009  cast<ConstantInt>(II->getArgOperand(0))->getSExtValue() * 8;
1010  // Confirm the invariant.start location size contains the load operand size
1011  // in bits. Also, the invariant.start should dominate the load, and we
1012  // should not hoist the load out of a loop that contains this dominating
1013  // invariant.start.
1014  if (LocSizeInBits <= InvariantSizeInBits &&
1015  DT->properlyDominates(II->getParent(), CurLoop->getHeader()))
1016  return true;
1017  }
1018 
1019  return false;
1020 }
1021 
1022 namespace {
1023 /// Return true if-and-only-if we know how to (mechanically) both hoist and
1024 /// sink a given instruction out of a loop. Does not address legality
1025 /// concerns such as aliasing or speculation safety.
1026 bool isHoistableAndSinkableInst(Instruction &I) {
1027  // Only these instructions are hoistable/sinkable.
1028  return (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<CallInst>(I) ||
1029  isa<FenceInst>(I) || isa<BinaryOperator>(I) || isa<CastInst>(I) ||
1030  isa<SelectInst>(I) || isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
1031  isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
1032  isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
1033  isa<InsertValueInst>(I));
1034 }
1035 /// Return true if all of the alias sets within this AST are known not to
1036 /// contain a Mod, or if MSSA knows thare are no MemoryDefs in the loop.
1037 bool isReadOnly(AliasSetTracker *CurAST, const MemorySSAUpdater *MSSAU,
1038  const Loop *L) {
1039  if (CurAST) {
1040  for (AliasSet &AS : *CurAST) {
1041  if (!AS.isForwardingAliasSet() && AS.isMod()) {
1042  return false;
1043  }
1044  }
1045  return true;
1046  } else { /*MSSAU*/
1047  for (auto *BB : L->getBlocks())
1048  if (MSSAU->getMemorySSA()->getBlockDefs(BB))
1049  return false;
1050  return true;
1051  }
1052 }
1053 
1054 /// Return true if I is the only Instruction with a MemoryAccess in L.
1055 bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
1056  const MemorySSAUpdater *MSSAU) {
1057  for (auto *BB : L->getBlocks())
1058  if (auto *Accs = MSSAU->getMemorySSA()->getBlockAccesses(BB)) {
1059  int NotAPhi = 0;
1060  for (const auto &Acc : *Accs) {
1061  if (isa<MemoryPhi>(&Acc))
1062  continue;
1063  const auto *MUD = cast<MemoryUseOrDef>(&Acc);
1064  if (MUD->getMemoryInst() != I || NotAPhi++ == 1)
1065  return false;
1066  }
1067  }
1068  return true;
1069 }
1070 }
1071 
1073  Loop *CurLoop, AliasSetTracker *CurAST,
1074  MemorySSAUpdater *MSSAU,
1075  bool TargetExecutesOncePerLoop,
1076  SinkAndHoistLICMFlags *Flags,
1078  // If we don't understand the instruction, bail early.
1079  if (!isHoistableAndSinkableInst(I))
1080  return false;
1081 
1082  MemorySSA *MSSA = MSSAU ? MSSAU->getMemorySSA() : nullptr;
1083  if (MSSA)
1084  assert(Flags != nullptr && "Flags cannot be null.");
1085 
1086  // Loads have extra constraints we have to verify before we can hoist them.
1087  if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
1088  if (!LI->isUnordered())
1089  return false; // Don't sink/hoist volatile or ordered atomic loads!
1090 
1091  // Loads from constant memory are always safe to move, even if they end up
1092  // in the same alias set as something that ends up being modified.
1093  if (AA->pointsToConstantMemory(LI->getOperand(0)))
1094  return true;
1095  if (LI->getMetadata(LLVMContext::MD_invariant_load))
1096  return true;
1097 
1098  if (LI->isAtomic() && !TargetExecutesOncePerLoop)
1099  return false; // Don't risk duplicating unordered loads
1100 
1101  // This checks for an invariant.start dominating the load.
1102  if (isLoadInvariantInLoop(LI, DT, CurLoop))
1103  return true;
1104 
1105  bool Invalidated;
1106  if (CurAST)
1107  Invalidated = pointerInvalidatedByLoop(MemoryLocation::get(LI), CurAST,
1108  CurLoop, AA);
1109  else
1110  Invalidated = pointerInvalidatedByLoopWithMSSA(
1111  MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop, *Flags);
1112  // Check loop-invariant address because this may also be a sinkable load
1113  // whose address is not necessarily loop-invariant.
1114  if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand()))
1115  ORE->emit([&]() {
1116  return OptimizationRemarkMissed(
1117  DEBUG_TYPE, "LoadWithLoopInvariantAddressInvalidated", LI)
1118  << "failed to move load with loop-invariant address "
1119  "because the loop may invalidate its value";
1120  });
1121 
1122  return !Invalidated;
1123  } else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
1124  // Don't sink or hoist dbg info; it's legal, but not useful.
1125  if (isa<DbgInfoIntrinsic>(I))
1126  return false;
1127 
1128  // Don't sink calls which can throw.
1129  if (CI->mayThrow())
1130  return false;
1131 
1132  using namespace PatternMatch;
1133  if (match(CI, m_Intrinsic<Intrinsic::assume>()))
1134  // Assumes don't actually alias anything or throw
1135  return true;
1136 
1137  // Handle simple cases by querying alias analysis.
1138  FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI);
1139  if (Behavior == FMRB_DoesNotAccessMemory)
1140  return true;
1141  if (AliasAnalysis::onlyReadsMemory(Behavior)) {
1142  // A readonly argmemonly function only reads from memory pointed to by
1143  // it's arguments with arbitrary offsets. If we can prove there are no
1144  // writes to this memory in the loop, we can hoist or sink.
1146  // TODO: expand to writeable arguments
1147  for (Value *Op : CI->arg_operands())
1148  if (Op->getType()->isPointerTy()) {
1149  bool Invalidated;
1150  if (CurAST)
1151  Invalidated = pointerInvalidatedByLoop(
1153  CurAST, CurLoop, AA);
1154  else
1155  Invalidated = pointerInvalidatedByLoopWithMSSA(
1156  MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop,
1157  *Flags);
1158  if (Invalidated)
1159  return false;
1160  }
1161  return true;
1162  }
1163 
1164  // If this call only reads from memory and there are no writes to memory
1165  // in the loop, we can hoist or sink the call as appropriate.
1166  if (isReadOnly(CurAST, MSSAU, CurLoop))
1167  return true;
1168  }
1169 
1170  // FIXME: This should use mod/ref information to see if we can hoist or
1171  // sink the call.
1172 
1173  return false;
1174  } else if (auto *FI = dyn_cast<FenceInst>(&I)) {
1175  // Fences alias (most) everything to provide ordering. For the moment,
1176  // just give up if there are any other memory operations in the loop.
1177  if (CurAST) {
1178  auto Begin = CurAST->begin();
1179  assert(Begin != CurAST->end() && "must contain FI");
1180  if (std::next(Begin) != CurAST->end())
1181  // constant memory for instance, TODO: handle better
1182  return false;
1183  auto *UniqueI = Begin->getUniqueInstruction();
1184  if (!UniqueI)
1185  // other memory op, give up
1186  return false;
1187  (void)FI; // suppress unused variable warning
1188  assert(UniqueI == FI && "AS must contain FI");
1189  return true;
1190  } else // MSSAU
1191  return isOnlyMemoryAccess(FI, CurLoop, MSSAU);
1192  } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
1193  if (!SI->isUnordered())
1194  return false; // Don't sink/hoist volatile or ordered atomic store!
1195 
1196  // We can only hoist a store that we can prove writes a value which is not
1197  // read or overwritten within the loop. For those cases, we fallback to
1198  // load store promotion instead. TODO: We can extend this to cases where
1199  // there is exactly one write to the location and that write dominates an
1200  // arbitrary number of reads in the loop.
1201  if (CurAST) {
1202  auto &AS = CurAST->getAliasSetFor(MemoryLocation::get(SI));
1203 
1204  if (AS.isRef() || !AS.isMustAlias())
1205  // Quick exit test, handled by the full path below as well.
1206  return false;
1207  auto *UniqueI = AS.getUniqueInstruction();
1208  if (!UniqueI)
1209  // other memory op, give up
1210  return false;
1211  assert(UniqueI == SI && "AS must contain SI");
1212  return true;
1213  } else { // MSSAU
1214  if (isOnlyMemoryAccess(SI, CurLoop, MSSAU))
1215  return true;
1216  // If there are more accesses than the Promotion cap, give up, we're not
1217  // walking a list that long.
1218  if (Flags->NoOfMemAccTooLarge)
1219  return false;
1220  // Check store only if there's still "quota" to check clobber.
1221  if (Flags->LicmMssaOptCounter >= Flags->LicmMssaOptCap)
1222  return false;
1223  // If there are interfering Uses (i.e. their defining access is in the
1224  // loop), or ordered loads (stored as Defs!), don't move this store.
1225  // Could do better here, but this is conservatively correct.
1226  // TODO: Cache set of Uses on the first walk in runOnLoop, update when
1227  // moving accesses. Can also extend to dominating uses.
1228  auto *SIMD = MSSA->getMemoryAccess(SI);
1229  for (auto *BB : CurLoop->getBlocks())
1230  if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
1231  for (const auto &MA : *Accesses)
1232  if (const auto *MU = dyn_cast<MemoryUse>(&MA)) {
1233  auto *MD = MU->getDefiningAccess();
1234  if (!MSSA->isLiveOnEntryDef(MD) &&
1235  CurLoop->contains(MD->getBlock()))
1236  return false;
1237  // Disable hoisting past potentially interfering loads. Optimized
1238  // Uses may point to an access outside the loop, as getClobbering
1239  // checks the previous iteration when walking the backedge.
1240  // FIXME: More precise: no Uses that alias SI.
1241  if (!Flags->IsSink && !MSSA->dominates(SIMD, MU))
1242  return false;
1243  } else if (const auto *MD = dyn_cast<MemoryDef>(&MA))
1244  if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) {
1245  (void)LI; // Silence warning.
1246  assert(!LI->isUnordered() && "Expected unordered load");
1247  return false;
1248  }
1249  }
1250 
1252  Flags->LicmMssaOptCounter++;
1253  // If there are no clobbering Defs in the loop, store is safe to hoist.
1254  return MSSA->isLiveOnEntryDef(Source) ||
1255  !CurLoop->contains(Source->getBlock());
1256  }
1257  }
1258 
1259  assert(!I.mayReadOrWriteMemory() && "unhandled aliasing");
1260 
1261  // We've established mechanical ability and aliasing, it's up to the caller
1262  // to check fault safety
1263  return true;
1264 }
1265 
1266 /// Returns true if a PHINode is a trivially replaceable with an
1267 /// Instruction.
1268 /// This is true when all incoming values are that instruction.
1269 /// This pattern occurs most often with LCSSA PHI nodes.
1270 ///
1271 static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I) {
1272  for (const Value *IncValue : PN.incoming_values())
1273  if (IncValue != &I)
1274  return false;
1275 
1276  return true;
1277 }
1278 
1279 /// Return true if the instruction is free in the loop.
1280 static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop,
1281  const TargetTransformInfo *TTI) {
1282 
1283  if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) {
1285  return false;
1286  // For a GEP, we cannot simply use getUserCost because currently it
1287  // optimistically assume that a GEP will fold into addressing mode
1288  // regardless of its users.
1289  const BasicBlock *BB = GEP->getParent();
1290  for (const User *U : GEP->users()) {
1291  const Instruction *UI = cast<Instruction>(U);
1292  if (CurLoop->contains(UI) &&
1293  (BB != UI->getParent() ||
1294  (!isa<StoreInst>(UI) && !isa<LoadInst>(UI))))
1295  return false;
1296  }
1297  return true;
1298  } else
1299  return TTI->getUserCost(&I) == TargetTransformInfo::TCC_Free;
1300 }
1301 
1302 /// Return true if the only users of this instruction are outside of
1303 /// the loop. If this is true, we can sink the instruction to the exit
1304 /// blocks of the loop.
1305 ///
1306 /// We also return true if the instruction could be folded away in lowering.
1307 /// (e.g., a GEP can be folded into a load as an addressing mode in the loop).
1308 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
1309  const LoopSafetyInfo *SafetyInfo,
1310  TargetTransformInfo *TTI, bool &FreeInLoop) {
1311  const auto &BlockColors = SafetyInfo->getBlockColors();
1312  bool IsFree = isFreeInLoop(I, CurLoop, TTI);
1313  for (const User *U : I.users()) {
1314  const Instruction *UI = cast<Instruction>(U);
1315  if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
1316  const BasicBlock *BB = PN->getParent();
1317  // We cannot sink uses in catchswitches.
1318  if (isa<CatchSwitchInst>(BB->getTerminator()))
1319  return false;
1320 
1321  // We need to sink a callsite to a unique funclet. Avoid sinking if the
1322  // phi use is too muddled.
1323  if (isa<CallInst>(I))
1324  if (!BlockColors.empty() &&
1325  BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1)
1326  return false;
1327  }
1328 
1329  if (CurLoop->contains(UI)) {
1330  if (IsFree) {
1331  FreeInLoop = true;
1332  continue;
1333  }
1334  return false;
1335  }
1336  }
1337  return true;
1338 }
1339 
1341  Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
1342  const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU) {
1343  Instruction *New;
1344  if (auto *CI = dyn_cast<CallInst>(&I)) {
1345  const auto &BlockColors = SafetyInfo->getBlockColors();
1346 
1347  // Sinking call-sites need to be handled differently from other
1348  // instructions. The cloned call-site needs a funclet bundle operand
1349  // appropriate for its location in the CFG.
1351  for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles();
1352  BundleIdx != BundleEnd; ++BundleIdx) {
1353  OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx);
1354  if (Bundle.getTagID() == LLVMContext::OB_funclet)
1355  continue;
1356 
1357  OpBundles.emplace_back(Bundle);
1358  }
1359 
1360  if (!BlockColors.empty()) {
1361  const ColorVector &CV = BlockColors.find(&ExitBlock)->second;
1362  assert(CV.size() == 1 && "non-unique color for exit block!");
1363  BasicBlock *BBColor = CV.front();
1364  Instruction *EHPad = BBColor->getFirstNonPHI();
1365  if (EHPad->isEHPad())
1366  OpBundles.emplace_back("funclet", EHPad);
1367  }
1368 
1369  New = CallInst::Create(CI, OpBundles);
1370  } else {
1371  New = I.clone();
1372  }
1373 
1374  ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New);
1375  if (!I.getName().empty())
1376  New->setName(I.getName() + ".le");
1377 
1378  MemoryAccess *OldMemAcc;
1379  if (MSSAU && (OldMemAcc = MSSAU->getMemorySSA()->getMemoryAccess(&I))) {
1380  // Create a new MemoryAccess and let MemorySSA set its defining access.
1381  MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
1382  New, nullptr, New->getParent(), MemorySSA::Beginning);
1383  if (NewMemAcc) {
1384  if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc))
1385  MSSAU->insertDef(MemDef, /*RenameUses=*/true);
1386  else {
1387  auto *MemUse = cast<MemoryUse>(NewMemAcc);
1388  MSSAU->insertUse(MemUse);
1389  }
1390  }
1391  }
1392 
1393  // Build LCSSA PHI nodes for any in-loop operands. Note that this is
1394  // particularly cheap because we can rip off the PHI node that we're
1395  // replacing for the number and blocks of the predecessors.
1396  // OPT: If this shows up in a profile, we can instead finish sinking all
1397  // invariant instructions, and then walk their operands to re-establish
1398  // LCSSA. That will eliminate creating PHI nodes just to nuke them when
1399  // sinking bottom-up.
1400  for (User::op_iterator OI = New->op_begin(), OE = New->op_end(); OI != OE;
1401  ++OI)
1402  if (Instruction *OInst = dyn_cast<Instruction>(*OI))
1403  if (Loop *OLoop = LI->getLoopFor(OInst->getParent()))
1404  if (!OLoop->contains(&PN)) {
1405  PHINode *OpPN =
1406  PHINode::Create(OInst->getType(), PN.getNumIncomingValues(),
1407  OInst->getName() + ".lcssa", &ExitBlock.front());
1408  for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
1409  OpPN->addIncoming(OInst, PN.getIncomingBlock(i));
1410  *OI = OpPN;
1411  }
1412  return New;
1413 }
1414 
1416  AliasSetTracker *AST, MemorySSAUpdater *MSSAU) {
1417  if (AST)
1418  AST->deleteValue(&I);
1419  if (MSSAU)
1420  MSSAU->removeMemoryAccess(&I);
1421  SafetyInfo.removeInstruction(&I);
1422  I.eraseFromParent();
1423 }
1424 
1426  ICFLoopSafetyInfo &SafetyInfo,
1427  MemorySSAUpdater *MSSAU) {
1428  SafetyInfo.removeInstruction(&I);
1429  SafetyInfo.insertInstructionTo(&I, Dest.getParent());
1430  I.moveBefore(&Dest);
1431  if (MSSAU)
1432  if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
1433  MSSAU->getMemorySSA()->getMemoryAccess(&I)))
1434  MSSAU->moveToPlace(OldMemAcc, Dest.getParent(), MemorySSA::End);
1435 }
1436 
1438  PHINode *TPN, Instruction *I, LoopInfo *LI,
1440  const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop,
1441  MemorySSAUpdater *MSSAU) {
1442  assert(isTriviallyReplaceablePHI(*TPN, *I) &&
1443  "Expect only trivially replaceable PHI");
1444  BasicBlock *ExitBlock = TPN->getParent();
1445  Instruction *New;
1446  auto It = SunkCopies.find(ExitBlock);
1447  if (It != SunkCopies.end())
1448  New = It->second;
1449  else
1450  New = SunkCopies[ExitBlock] = CloneInstructionInExitBlock(
1451  *I, *ExitBlock, *TPN, LI, SafetyInfo, MSSAU);
1452  return New;
1453 }
1454 
1455 static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) {
1456  BasicBlock *BB = PN->getParent();
1457  if (!BB->canSplitPredecessors())
1458  return false;
1459  // It's not impossible to split EHPad blocks, but if BlockColors already exist
1460  // it require updating BlockColors for all offspring blocks accordingly. By
1461  // skipping such corner case, we can make updating BlockColors after splitting
1462  // predecessor fairly simple.
1463  if (!SafetyInfo->getBlockColors().empty() && BB->getFirstNonPHI()->isEHPad())
1464  return false;
1465  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
1466  BasicBlock *BBPred = *PI;
1467  if (isa<IndirectBrInst>(BBPred->getTerminator()))
1468  return false;
1469  }
1470  return true;
1471 }
1472 
1474  LoopInfo *LI, const Loop *CurLoop,
1475  LoopSafetyInfo *SafetyInfo,
1476  MemorySSAUpdater *MSSAU) {
1477 #ifndef NDEBUG
1478  SmallVector<BasicBlock *, 32> ExitBlocks;
1479  CurLoop->getUniqueExitBlocks(ExitBlocks);
1480  SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
1481  ExitBlocks.end());
1482 #endif
1483  BasicBlock *ExitBB = PN->getParent();
1484  assert(ExitBlockSet.count(ExitBB) && "Expect the PHI is in an exit block.");
1485 
1486  // Split predecessors of the loop exit to make instructions in the loop are
1487  // exposed to exit blocks through trivially replaceable PHIs while keeping the
1488  // loop in the canonical form where each predecessor of each exit block should
1489  // be contained within the loop. For example, this will convert the loop below
1490  // from
1491  //
1492  // LB1:
1493  // %v1 =
1494  // br %LE, %LB2
1495  // LB2:
1496  // %v2 =
1497  // br %LE, %LB1
1498  // LE:
1499  // %p = phi [%v1, %LB1], [%v2, %LB2] <-- non-trivially replaceable
1500  //
1501  // to
1502  //
1503  // LB1:
1504  // %v1 =
1505  // br %LE.split, %LB2
1506  // LB2:
1507  // %v2 =
1508  // br %LE.split2, %LB1
1509  // LE.split:
1510  // %p1 = phi [%v1, %LB1] <-- trivially replaceable
1511  // br %LE
1512  // LE.split2:
1513  // %p2 = phi [%v2, %LB2] <-- trivially replaceable
1514  // br %LE
1515  // LE:
1516  // %p = phi [%p1, %LE.split], [%p2, %LE.split2]
1517  //
1518  const auto &BlockColors = SafetyInfo->getBlockColors();
1519  SmallSetVector<BasicBlock *, 8> PredBBs(pred_begin(ExitBB), pred_end(ExitBB));
1520  while (!PredBBs.empty()) {
1521  BasicBlock *PredBB = *PredBBs.begin();
1522  assert(CurLoop->contains(PredBB) &&
1523  "Expect all predecessors are in the loop");
1524  if (PN->getBasicBlockIndex(PredBB) >= 0) {
1526  ExitBB, PredBB, ".split.loop.exit", DT, LI, MSSAU, true);
1527  // Since we do not allow splitting EH-block with BlockColors in
1528  // canSplitPredecessors(), we can simply assign predecessor's color to
1529  // the new block.
1530  if (!BlockColors.empty())
1531  // Grab a reference to the ColorVector to be inserted before getting the
1532  // reference to the vector we are copying because inserting the new
1533  // element in BlockColors might cause the map to be reallocated.
1534  SafetyInfo->copyColors(NewPred, PredBB);
1535  }
1536  PredBBs.remove(PredBB);
1537  }
1538 }
1539 
1540 /// When an instruction is found to only be used outside of the loop, this
1541 /// function moves it to the exit blocks and patches up SSA form as needed.
1542 /// This method is guaranteed to remove the original instruction from its
1543 /// position, and may either delete it or move it to outside of the loop.
1544 ///
1545 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
1546  const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
1548  LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
1549  ORE->emit([&]() {
1550  return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
1551  << "sinking " << ore::NV("Inst", &I);
1552  });
1553  bool Changed = false;
1554  if (isa<LoadInst>(I))
1555  ++NumMovedLoads;
1556  else if (isa<CallInst>(I))
1557  ++NumMovedCalls;
1558  ++NumSunk;
1559 
1560  // Iterate over users to be ready for actual sinking. Replace users via
1561  // unreachable blocks with undef and make all user PHIs trivially replaceable.
1562  SmallPtrSet<Instruction *, 8> VisitedUsers;
1563  for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) {
1564  auto *User = cast<Instruction>(*UI);
1565  Use &U = UI.getUse();
1566  ++UI;
1567 
1568  if (VisitedUsers.count(User) || CurLoop->contains(User))
1569  continue;
1570 
1571  if (!DT->isReachableFromEntry(User->getParent())) {
1572  U = UndefValue::get(I.getType());
1573  Changed = true;
1574  continue;
1575  }
1576 
1577  // The user must be a PHI node.
1578  PHINode *PN = cast<PHINode>(User);
1579 
1580  // Surprisingly, instructions can be used outside of loops without any
1581  // exits. This can only happen in PHI nodes if the incoming block is
1582  // unreachable.
1583  BasicBlock *BB = PN->getIncomingBlock(U);
1584  if (!DT->isReachableFromEntry(BB)) {
1585  U = UndefValue::get(I.getType());
1586  Changed = true;
1587  continue;
1588  }
1589 
1590  VisitedUsers.insert(PN);
1591  if (isTriviallyReplaceablePHI(*PN, I))
1592  continue;
1593 
1594  if (!canSplitPredecessors(PN, SafetyInfo))
1595  return Changed;
1596 
1597  // Split predecessors of the PHI so that we can make users trivially
1598  // replaceable.
1599  splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo, MSSAU);
1600 
1601  // Should rebuild the iterators, as they may be invalidated by
1602  // splitPredecessorsOfLoopExit().
1603  UI = I.user_begin();
1604  UE = I.user_end();
1605  }
1606 
1607  if (VisitedUsers.empty())
1608  return Changed;
1609 
1610 #ifndef NDEBUG
1611  SmallVector<BasicBlock *, 32> ExitBlocks;
1612  CurLoop->getUniqueExitBlocks(ExitBlocks);
1613  SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
1614  ExitBlocks.end());
1615 #endif
1616 
1617  // Clones of this instruction. Don't create more than one per exit block!
1619 
1620  // If this instruction is only used outside of the loop, then all users are
1621  // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
1622  // the instruction.
1624  for (auto *UI : Users) {
1625  auto *User = cast<Instruction>(UI);
1626 
1627  if (CurLoop->contains(User))
1628  continue;
1629 
1630  PHINode *PN = cast<PHINode>(User);
1631  assert(ExitBlockSet.count(PN->getParent()) &&
1632  "The LCSSA PHI is not in an exit block!");
1633  // The PHI must be trivially replaceable.
1635  PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU);
1636  PN->replaceAllUsesWith(New);
1637  eraseInstruction(*PN, *SafetyInfo, nullptr, nullptr);
1638  Changed = true;
1639  }
1640  return Changed;
1641 }
1642 
1643 /// When an instruction is found to only use loop invariant operands that
1644 /// is safe to hoist, this instruction is called to do the dirty work.
1645 ///
1646 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
1647  BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
1649  LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getName() << ": " << I
1650  << "\n");
1651  ORE->emit([&]() {
1652  return OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) << "hoisting "
1653  << ore::NV("Inst", &I);
1654  });
1655 
1656  // Metadata can be dependent on conditions we are hoisting above.
1657  // Conservatively strip all metadata on the instruction unless we were
1658  // guaranteed to execute I if we entered the loop, in which case the metadata
1659  // is valid in the loop preheader.
1660  if (I.hasMetadataOtherThanDebugLoc() &&
1661  // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning
1662  // time in isGuaranteedToExecute if we don't actually have anything to
1663  // drop. It is a compile time optimization, not required for correctness.
1664  !SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop))
1666 
1667  if (isa<PHINode>(I))
1668  // Move the new node to the end of the phi list in the destination block.
1669  moveInstructionBefore(I, *Dest->getFirstNonPHI(), *SafetyInfo, MSSAU);
1670  else
1671  // Move the new node to the destination block, before its terminator.
1672  moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo, MSSAU);
1673 
1674  // Apply line 0 debug locations when we are moving instructions to different
1675  // basic blocks because we want to avoid jumpy line tables.
1676  if (const DebugLoc &DL = I.getDebugLoc())
1677  I.setDebugLoc(DebugLoc::get(0, 0, DL.getScope(), DL.getInlinedAt()));
1678 
1679  if (isa<LoadInst>(I))
1680  ++NumMovedLoads;
1681  else if (isa<CallInst>(I))
1682  ++NumMovedCalls;
1683  ++NumHoisted;
1684 }
1685 
1686 /// Only sink or hoist an instruction if it is not a trapping instruction,
1687 /// or if the instruction is known not to trap when moved to the preheader.
1688 /// or if it is a trapping instruction and is guaranteed to execute.
1690  const DominatorTree *DT,
1691  const Loop *CurLoop,
1692  const LoopSafetyInfo *SafetyInfo,
1694  const Instruction *CtxI) {
1695  if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT))
1696  return true;
1697 
1698  bool GuaranteedToExecute =
1699  SafetyInfo->isGuaranteedToExecute(Inst, DT, CurLoop);
1700 
1701  if (!GuaranteedToExecute) {
1702  auto *LI = dyn_cast<LoadInst>(&Inst);
1703  if (LI && CurLoop->isLoopInvariant(LI->getPointerOperand()))
1704  ORE->emit([&]() {
1705  return OptimizationRemarkMissed(
1706  DEBUG_TYPE, "LoadWithLoopInvariantAddressCondExecuted", LI)
1707  << "failed to hoist load with loop-invariant address "
1708  "because load is conditionally executed";
1709  });
1710  }
1711 
1712  return GuaranteedToExecute;
1713 }
1714 
1715 namespace {
1716 class LoopPromoter : public LoadAndStorePromoter {
1717  Value *SomePtr; // Designated pointer to store to.
1718  const SmallSetVector<Value *, 8> &PointerMustAliases;
1719  SmallVectorImpl<BasicBlock *> &LoopExitBlocks;
1720  SmallVectorImpl<Instruction *> &LoopInsertPts;
1721  SmallVectorImpl<MemoryAccess *> &MSSAInsertPts;
1722  PredIteratorCache &PredCache;
1723  AliasSetTracker &AST;
1724  MemorySSAUpdater *MSSAU;
1725  LoopInfo &LI;
1726  DebugLoc DL;
1727  int Alignment;
1728  bool UnorderedAtomic;
1729  AAMDNodes AATags;
1730  ICFLoopSafetyInfo &SafetyInfo;
1731 
1732  Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
1733  if (Instruction *I = dyn_cast<Instruction>(V))
1734  if (Loop *L = LI.getLoopFor(I->getParent()))
1735  if (!L->contains(BB)) {
1736  // We need to create an LCSSA PHI node for the incoming value and
1737  // store that.
1738  PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB),
1739  I->getName() + ".lcssa", &BB->front());
1740  for (BasicBlock *Pred : PredCache.get(BB))
1741  PN->addIncoming(I, Pred);
1742  return PN;
1743  }
1744  return V;
1745  }
1746 
1747 public:
1748  LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S,
1749  const SmallSetVector<Value *, 8> &PMA,
1753  AliasSetTracker &ast, MemorySSAUpdater *MSSAU, LoopInfo &li,
1754  DebugLoc dl, int alignment, bool UnorderedAtomic,
1755  const AAMDNodes &AATags, ICFLoopSafetyInfo &SafetyInfo)
1756  : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
1757  LoopExitBlocks(LEB), LoopInsertPts(LIP), MSSAInsertPts(MSSAIP),
1758  PredCache(PIC), AST(ast), MSSAU(MSSAU), LI(li), DL(std::move(dl)),
1759  Alignment(alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags),
1760  SafetyInfo(SafetyInfo) {}
1761 
1762  bool isInstInList(Instruction *I,
1763  const SmallVectorImpl<Instruction *> &) const override {
1764  Value *Ptr;
1765  if (LoadInst *LI = dyn_cast<LoadInst>(I))
1766  Ptr = LI->getOperand(0);
1767  else
1768  Ptr = cast<StoreInst>(I)->getPointerOperand();
1769  return PointerMustAliases.count(Ptr);
1770  }
1771 
1772  void doExtraRewritesBeforeFinalDeletion() override {
1773  // Insert stores after in the loop exit blocks. Each exit block gets a
1774  // store of the live-out values that feed them. Since we've already told
1775  // the SSA updater about the defs in the loop and the preheader
1776  // definition, it is all set and we can start using it.
1777  for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
1778  BasicBlock *ExitBlock = LoopExitBlocks[i];
1779  Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
1780  LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock);
1781  Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
1782  Instruction *InsertPos = LoopInsertPts[i];
1783  StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
1784  if (UnorderedAtomic)
1786  NewSI->setAlignment(Alignment);
1787  NewSI->setDebugLoc(DL);
1788  if (AATags)
1789  NewSI->setAAMetadata(AATags);
1790 
1791  if (MSSAU) {
1792  MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i];
1793  MemoryAccess *NewMemAcc;
1794  if (!MSSAInsertPoint) {
1795  NewMemAcc = MSSAU->createMemoryAccessInBB(
1796  NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning);
1797  } else {
1798  NewMemAcc =
1799  MSSAU->createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint);
1800  }
1801  MSSAInsertPts[i] = NewMemAcc;
1802  MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
1803  // FIXME: true for safety, false may still be correct.
1804  }
1805  }
1806  }
1807 
1808  void replaceLoadWithValue(LoadInst *LI, Value *V) const override {
1809  // Update alias analysis.
1810  AST.copyValue(LI, V);
1811  }
1812  void instructionDeleted(Instruction *I) const override {
1813  SafetyInfo.removeInstruction(I);
1814  AST.deleteValue(I);
1815  if (MSSAU)
1816  MSSAU->removeMemoryAccess(I);
1817  }
1818 };
1819 
1820 
1821 /// Return true iff we can prove that a caller of this function can not inspect
1822 /// the contents of the provided object in a well defined program.
1823 bool isKnownNonEscaping(Value *Object, const TargetLibraryInfo *TLI) {
1824  if (isa<AllocaInst>(Object))
1825  // Since the alloca goes out of scope, we know the caller can't retain a
1826  // reference to it and be well defined. Thus, we don't need to check for
1827  // capture.
1828  return true;
1829 
1830  // For all other objects we need to know that the caller can't possibly
1831  // have gotten a reference to the object. There are two components of
1832  // that:
1833  // 1) Object can't be escaped by this function. This is what
1834  // PointerMayBeCaptured checks.
1835  // 2) Object can't have been captured at definition site. For this, we
1836  // need to know the return value is noalias. At the moment, we use a
1837  // weaker condition and handle only AllocLikeFunctions (which are
1838  // known to be noalias). TODO
1839  return isAllocLikeFn(Object, TLI) &&
1840  !PointerMayBeCaptured(Object, true, true);
1841 }
1842 
1843 } // namespace
1844 
1845 /// Try to promote memory values to scalars by sinking stores out of the
1846 /// loop and moving loads to before the loop. We do this by looping over
1847 /// the stores in the loop, looking for stores to Must pointers which are
1848 /// loop invariant.
1849 ///
1851  const SmallSetVector<Value *, 8> &PointerMustAliases,
1852  SmallVectorImpl<BasicBlock *> &ExitBlocks,
1853  SmallVectorImpl<Instruction *> &InsertPts,
1855  LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
1856  Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
1857  ICFLoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) {
1858  // Verify inputs.
1859  assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
1860  CurAST != nullptr && SafetyInfo != nullptr &&
1861  "Unexpected Input to promoteLoopAccessesToScalars");
1862 
1863  Value *SomePtr = *PointerMustAliases.begin();
1864  BasicBlock *Preheader = CurLoop->getLoopPreheader();
1865 
1866  // It is not safe to promote a load/store from the loop if the load/store is
1867  // conditional. For example, turning:
1868  //
1869  // for () { if (c) *P += 1; }
1870  //
1871  // into:
1872  //
1873  // tmp = *P; for () { if (c) tmp +=1; } *P = tmp;
1874  //
1875  // is not safe, because *P may only be valid to access if 'c' is true.
1876  //
1877  // The safety property divides into two parts:
1878  // p1) The memory may not be dereferenceable on entry to the loop. In this
1879  // case, we can't insert the required load in the preheader.
1880  // p2) The memory model does not allow us to insert a store along any dynamic
1881  // path which did not originally have one.
1882  //
1883  // If at least one store is guaranteed to execute, both properties are
1884  // satisfied, and promotion is legal.
1885  //
1886  // This, however, is not a necessary condition. Even if no store/load is
1887  // guaranteed to execute, we can still establish these properties.
1888  // We can establish (p1) by proving that hoisting the load into the preheader
1889  // is safe (i.e. proving dereferenceability on all paths through the loop). We
1890  // can use any access within the alias set to prove dereferenceability,
1891  // since they're all must alias.
1892  //
1893  // There are two ways establish (p2):
1894  // a) Prove the location is thread-local. In this case the memory model
1895  // requirement does not apply, and stores are safe to insert.
1896  // b) Prove a store dominates every exit block. In this case, if an exit
1897  // blocks is reached, the original dynamic path would have taken us through
1898  // the store, so inserting a store into the exit block is safe. Note that this
1899  // is different from the store being guaranteed to execute. For instance,
1900  // if an exception is thrown on the first iteration of the loop, the original
1901  // store is never executed, but the exit blocks are not executed either.
1902 
1903  bool DereferenceableInPH = false;
1904  bool SafeToInsertStore = false;
1905 
1907 
1908  // We start with an alignment of one and try to find instructions that allow
1909  // us to prove better alignment.
1910  unsigned Alignment = 1;
1911  // Keep track of which types of access we see
1912  bool SawUnorderedAtomic = false;
1913  bool SawNotAtomic = false;
1914  AAMDNodes AATags;
1915 
1916  const DataLayout &MDL = Preheader->getModule()->getDataLayout();
1917 
1918  bool IsKnownThreadLocalObject = false;
1919  if (SafetyInfo->anyBlockMayThrow()) {
1920  // If a loop can throw, we have to insert a store along each unwind edge.
1921  // That said, we can't actually make the unwind edge explicit. Therefore,
1922  // we have to prove that the store is dead along the unwind edge. We do
1923  // this by proving that the caller can't have a reference to the object
1924  // after return and thus can't possibly load from the object.
1925  Value *Object = GetUnderlyingObject(SomePtr, MDL);
1926  if (!isKnownNonEscaping(Object, TLI))
1927  return false;
1928  // Subtlety: Alloca's aren't visible to callers, but *are* potentially
1929  // visible to other threads if captured and used during their lifetimes.
1930  IsKnownThreadLocalObject = !isa<AllocaInst>(Object);
1931  }
1932 
1933  // Check that all of the pointers in the alias set have the same type. We
1934  // cannot (yet) promote a memory location that is loaded and stored in
1935  // different sizes. While we are at it, collect alignment and AA info.
1936  for (Value *ASIV : PointerMustAliases) {
1937  // Check that all of the pointers in the alias set have the same type. We
1938  // cannot (yet) promote a memory location that is loaded and stored in
1939  // different sizes.
1940  if (SomePtr->getType() != ASIV->getType())
1941  return false;
1942 
1943  for (User *U : ASIV->users()) {
1944  // Ignore instructions that are outside the loop.
1945  Instruction *UI = dyn_cast<Instruction>(U);
1946  if (!UI || !CurLoop->contains(UI))
1947  continue;
1948 
1949  // If there is an non-load/store instruction in the loop, we can't promote
1950  // it.
1951  if (LoadInst *Load = dyn_cast<LoadInst>(UI)) {
1952  if (!Load->isUnordered())
1953  return false;
1954 
1955  SawUnorderedAtomic |= Load->isAtomic();
1956  SawNotAtomic |= !Load->isAtomic();
1957 
1958  unsigned InstAlignment = Load->getAlignment();
1959  if (!InstAlignment)
1960  InstAlignment =
1961  MDL.getABITypeAlignment(Load->getType());
1962 
1963  // Note that proving a load safe to speculate requires proving
1964  // sufficient alignment at the target location. Proving it guaranteed
1965  // to execute does as well. Thus we can increase our guaranteed
1966  // alignment as well.
1967  if (!DereferenceableInPH || (InstAlignment > Alignment))
1968  if (isSafeToExecuteUnconditionally(*Load, DT, CurLoop, SafetyInfo,
1969  ORE, Preheader->getTerminator())) {
1970  DereferenceableInPH = true;
1971  Alignment = std::max(Alignment, InstAlignment);
1972  }
1973  } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) {
1974  // Stores *of* the pointer are not interesting, only stores *to* the
1975  // pointer.
1976  if (UI->getOperand(1) != ASIV)
1977  continue;
1978  if (!Store->isUnordered())
1979  return false;
1980 
1981  SawUnorderedAtomic |= Store->isAtomic();
1982  SawNotAtomic |= !Store->isAtomic();
1983 
1984  // If the store is guaranteed to execute, both properties are satisfied.
1985  // We may want to check if a store is guaranteed to execute even if we
1986  // already know that promotion is safe, since it may have higher
1987  // alignment than any other guaranteed stores, in which case we can
1988  // raise the alignment on the promoted store.
1989  unsigned InstAlignment = Store->getAlignment();
1990  if (!InstAlignment)
1991  InstAlignment =
1992  MDL.getABITypeAlignment(Store->getValueOperand()->getType());
1993 
1994  if (!DereferenceableInPH || !SafeToInsertStore ||
1995  (InstAlignment > Alignment)) {
1996  if (SafetyInfo->isGuaranteedToExecute(*UI, DT, CurLoop)) {
1997  DereferenceableInPH = true;
1998  SafeToInsertStore = true;
1999  Alignment = std::max(Alignment, InstAlignment);
2000  }
2001  }
2002 
2003  // If a store dominates all exit blocks, it is safe to sink.
2004  // As explained above, if an exit block was executed, a dominating
2005  // store must have been executed at least once, so we are not
2006  // introducing stores on paths that did not have them.
2007  // Note that this only looks at explicit exit blocks. If we ever
2008  // start sinking stores into unwind edges (see above), this will break.
2009  if (!SafeToInsertStore)
2010  SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) {
2011  return DT->dominates(Store->getParent(), Exit);
2012  });
2013 
2014  // If the store is not guaranteed to execute, we may still get
2015  // deref info through it.
2016  if (!DereferenceableInPH) {
2017  DereferenceableInPH = isDereferenceableAndAlignedPointer(
2018  Store->getPointerOperand(), Store->getValueOperand()->getType(),
2019  Store->getAlignment(), MDL, Preheader->getTerminator(), DT);
2020  }
2021  } else
2022  return false; // Not a load or store.
2023 
2024  // Merge the AA tags.
2025  if (LoopUses.empty()) {
2026  // On the first load/store, just take its AA tags.
2027  UI->getAAMetadata(AATags);
2028  } else if (AATags) {
2029  UI->getAAMetadata(AATags, /* Merge = */ true);
2030  }
2031 
2032  LoopUses.push_back(UI);
2033  }
2034  }
2035 
2036  // If we found both an unordered atomic instruction and a non-atomic memory
2037  // access, bail. We can't blindly promote non-atomic to atomic since we
2038  // might not be able to lower the result. We can't downgrade since that
2039  // would violate memory model. Also, align 0 is an error for atomics.
2040  if (SawUnorderedAtomic && SawNotAtomic)
2041  return false;
2042 
2043  // If we're inserting an atomic load in the preheader, we must be able to
2044  // lower it. We're only guaranteed to be able to lower naturally aligned
2045  // atomics.
2046  auto *SomePtrElemType = SomePtr->getType()->getPointerElementType();
2047  if (SawUnorderedAtomic &&
2048  Alignment < MDL.getTypeStoreSize(SomePtrElemType))
2049  return false;
2050 
2051  // If we couldn't prove we can hoist the load, bail.
2052  if (!DereferenceableInPH)
2053  return false;
2054 
2055  // We know we can hoist the load, but don't have a guaranteed store.
2056  // Check whether the location is thread-local. If it is, then we can insert
2057  // stores along paths which originally didn't have them without violating the
2058  // memory model.
2059  if (!SafeToInsertStore) {
2060  if (IsKnownThreadLocalObject)
2061  SafeToInsertStore = true;
2062  else {
2063  Value *Object = GetUnderlyingObject(SomePtr, MDL);
2064  SafeToInsertStore =
2065  (isAllocLikeFn(Object, TLI) || isa<AllocaInst>(Object)) &&
2066  !PointerMayBeCaptured(Object, true, true);
2067  }
2068  }
2069 
2070  // If we've still failed to prove we can sink the store, give up.
2071  if (!SafeToInsertStore)
2072  return false;
2073 
2074  // Otherwise, this is safe to promote, lets do it!
2075  LLVM_DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr
2076  << '\n');
2077  ORE->emit([&]() {
2078  return OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar",
2079  LoopUses[0])
2080  << "Moving accesses to memory location out of the loop";
2081  });
2082  ++NumPromoted;
2083 
2084  // Grab a debug location for the inserted loads/stores; given that the
2085  // inserted loads/stores have little relation to the original loads/stores,
2086  // this code just arbitrarily picks a location from one, since any debug
2087  // location is better than none.
2088  DebugLoc DL = LoopUses[0]->getDebugLoc();
2089 
2090  // We use the SSAUpdater interface to insert phi nodes as required.
2092  SSAUpdater SSA(&NewPHIs);
2093  LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
2094  InsertPts, MSSAInsertPts, PIC, *CurAST, MSSAU, *LI, DL,
2095  Alignment, SawUnorderedAtomic, AATags, *SafetyInfo);
2096 
2097  // Set up the preheader to have a definition of the value. It is the live-out
2098  // value from the preheader that uses in the loop will use.
2099  LoadInst *PreheaderLoad = new LoadInst(
2100  SomePtr->getType()->getPointerElementType(), SomePtr,
2101  SomePtr->getName() + ".promoted", Preheader->getTerminator());
2102  if (SawUnorderedAtomic)
2103  PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
2104  PreheaderLoad->setAlignment(Alignment);
2105  PreheaderLoad->setDebugLoc(DL);
2106  if (AATags)
2107  PreheaderLoad->setAAMetadata(AATags);
2108  SSA.AddAvailableValue(Preheader, PreheaderLoad);
2109 
2110  MemoryAccess *PreheaderLoadMemoryAccess;
2111  if (MSSAU) {
2112  PreheaderLoadMemoryAccess = MSSAU->createMemoryAccessInBB(
2113  PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End);
2114  MemoryUse *NewMemUse = cast<MemoryUse>(PreheaderLoadMemoryAccess);
2115  MSSAU->insertUse(NewMemUse);
2116  }
2117 
2118  // Rewrite all the loads in the loop and remember all the definitions from
2119  // stores in the loop.
2120  Promoter.run(LoopUses);
2121 
2122  if (MSSAU && VerifyMemorySSA)
2123  MSSAU->getMemorySSA()->verifyMemorySSA();
2124  // If the SSAUpdater didn't use the load in the preheader, just zap it now.
2125  if (PreheaderLoad->use_empty())
2126  eraseInstruction(*PreheaderLoad, *SafetyInfo, CurAST, MSSAU);
2127 
2128  return true;
2129 }
2130 
2131 /// Returns an owning pointer to an alias set which incorporates aliasing info
2132 /// from L and all subloops of L.
2133 /// FIXME: In new pass manager, there is no helper function to handle loop
2134 /// analysis such as cloneBasicBlockAnalysis, so the AST needs to be recomputed
2135 /// from scratch for every loop. Hook up with the helper functions when
2136 /// available in the new pass manager to avoid redundant computation.
2137 std::unique_ptr<AliasSetTracker>
2138 LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI,
2139  AliasAnalysis *AA) {
2140  std::unique_ptr<AliasSetTracker> CurAST;
2141  SmallVector<Loop *, 4> RecomputeLoops;
2142  for (Loop *InnerL : L->getSubLoops()) {
2143  auto MapI = LoopToAliasSetMap.find(InnerL);
2144  // If the AST for this inner loop is missing it may have been merged into
2145  // some other loop's AST and then that loop unrolled, and so we need to
2146  // recompute it.
2147  if (MapI == LoopToAliasSetMap.end()) {
2148  RecomputeLoops.push_back(InnerL);
2149  continue;
2150  }
2151  std::unique_ptr<AliasSetTracker> InnerAST = std::move(MapI->second);
2152 
2153  if (CurAST) {
2154  // What if InnerLoop was modified by other passes ?
2155  // Once we've incorporated the inner loop's AST into ours, we don't need
2156  // the subloop's anymore.
2157  CurAST->add(*InnerAST);
2158  } else {
2159  CurAST = std::move(InnerAST);
2160  }
2161  LoopToAliasSetMap.erase(MapI);
2162  }
2163  if (!CurAST)
2164  CurAST = make_unique<AliasSetTracker>(*AA);
2165 
2166  // Add everything from the sub loops that are no longer directly available.
2167  for (Loop *InnerL : RecomputeLoops)
2168  for (BasicBlock *BB : InnerL->blocks())
2169  CurAST->add(*BB);
2170 
2171  // And merge in this loop (without anything from inner loops).
2172  for (BasicBlock *BB : L->blocks())
2173  if (LI->getLoopFor(BB) == L)
2174  CurAST->add(*BB);
2175 
2176  return CurAST;
2177 }
2178 
2179 std::unique_ptr<AliasSetTracker>
2180 LoopInvariantCodeMotion::collectAliasInfoForLoopWithMSSA(
2181  Loop *L, AliasAnalysis *AA, MemorySSAUpdater *MSSAU) {
2182  auto *MSSA = MSSAU->getMemorySSA();
2183  auto CurAST = make_unique<AliasSetTracker>(*AA, MSSA, L);
2184  CurAST->addAllInstructionsInLoopUsingMSSA();
2185  return CurAST;
2186 }
2187 
2188 /// Simple analysis hook. Clone alias set info.
2189 ///
2190 void LegacyLICMPass::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To,
2191  Loop *L) {
2192  auto ASTIt = LICM.getLoopToAliasSetMap().find(L);
2193  if (ASTIt == LICM.getLoopToAliasSetMap().end())
2194  return;
2195 
2196  ASTIt->second->copyValue(From, To);
2197 }
2198 
2199 /// Simple Analysis hook. Delete value V from alias set
2200 ///
2201 void LegacyLICMPass::deleteAnalysisValue(Value *V, Loop *L) {
2202  auto ASTIt = LICM.getLoopToAliasSetMap().find(L);
2203  if (ASTIt == LICM.getLoopToAliasSetMap().end())
2204  return;
2205 
2206  ASTIt->second->deleteValue(V);
2207 }
2208 
2209 /// Simple Analysis hook. Delete value L from alias set map.
2210 ///
2211 void LegacyLICMPass::deleteAnalysisLoop(Loop *L) {
2212  if (!LICM.getLoopToAliasSetMap().count(L))
2213  return;
2214 
2215  LICM.getLoopToAliasSetMap().erase(L);
2216 }
2217 
2219  AliasSetTracker *CurAST, Loop *CurLoop,
2220  AliasAnalysis *AA) {
2221  // First check to see if any of the basic blocks in CurLoop invalidate *V.
2222  bool isInvalidatedAccordingToAST = CurAST->getAliasSetFor(MemLoc).isMod();
2223 
2224  if (!isInvalidatedAccordingToAST || !LICMN2Theshold)
2225  return isInvalidatedAccordingToAST;
2226 
2227  // Check with a diagnostic analysis if we can refine the information above.
2228  // This is to identify the limitations of using the AST.
2229  // The alias set mechanism used by LICM has a major weakness in that it
2230  // combines all things which may alias into a single set *before* asking
2231  // modref questions. As a result, a single readonly call within a loop will
2232  // collapse all loads and stores into a single alias set and report
2233  // invalidation if the loop contains any store. For example, readonly calls
2234  // with deopt states have this form and create a general alias set with all
2235  // loads and stores. In order to get any LICM in loops containing possible
2236  // deopt states we need a more precise invalidation of checking the mod ref
2237  // info of each instruction within the loop and LI. This has a complexity of
2238  // O(N^2), so currently, it is used only as a diagnostic tool since the
2239  // default value of LICMN2Threshold is zero.
2240 
2241  // Don't look at nested loops.
2242  if (CurLoop->begin() != CurLoop->end())
2243  return true;
2244 
2245  int N = 0;
2246  for (BasicBlock *BB : CurLoop->getBlocks())
2247  for (Instruction &I : *BB) {
2248  if (N >= LICMN2Theshold) {
2249  LLVM_DEBUG(dbgs() << "Alasing N2 threshold exhausted for "
2250  << *(MemLoc.Ptr) << "\n");
2251  return true;
2252  }
2253  N++;
2254  auto Res = AA->getModRefInfo(&I, MemLoc);
2255  if (isModSet(Res)) {
2256  LLVM_DEBUG(dbgs() << "Aliasing failed on " << I << " for "
2257  << *(MemLoc.Ptr) << "\n");
2258  return true;
2259  }
2260  }
2261  LLVM_DEBUG(dbgs() << "Aliasing okay for " << *(MemLoc.Ptr) << "\n");
2262  return false;
2263 }
2264 
2266  Loop *CurLoop,
2267  SinkAndHoistLICMFlags &Flags) {
2268  // For hoisting, use the walker to determine safety
2269  if (!Flags.IsSink) {
2271  // See declaration of SetLicmMssaOptCap for usage details.
2272  if (Flags.LicmMssaOptCounter >= Flags.LicmMssaOptCap)
2273  Source = MU->getDefiningAccess();
2274  else {
2275  Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU);
2276  Flags.LicmMssaOptCounter++;
2277  }
2278  return !MSSA->isLiveOnEntryDef(Source) &&
2279  CurLoop->contains(Source->getBlock());
2280  }
2281 
2282  // For sinking, we'd need to check all Defs below this use. The getClobbering
2283  // call will look on the backedge of the loop, but will check aliasing with
2284  // the instructions on the previous iteration.
2285  // For example:
2286  // for (i ... )
2287  // load a[i] ( Use (LoE)
2288  // store a[i] ( 1 = Def (2), with 2 = Phi for the loop.
2289  // i++;
2290  // The load sees no clobbering inside the loop, as the backedge alias check
2291  // does phi translation, and will check aliasing against store a[i-1].
2292  // However sinking the load outside the loop, below the store is incorrect.
2293 
2294  // For now, only sink if there are no Defs in the loop, and the existing ones
2295  // precede the use and are in the same block.
2296  // FIXME: Increase precision: Safe to sink if Use post dominates the Def;
2297  // needs PostDominatorTreeAnalysis.
2298  // FIXME: More precise: no Defs that alias this Use.
2299  if (Flags.NoOfMemAccTooLarge)
2300  return true;
2301  for (auto *BB : CurLoop->getBlocks())
2302  if (auto *Accesses = MSSA->getBlockDefs(BB))
2303  for (const auto &MA : *Accesses)
2304  if (const auto *MD = dyn_cast<MemoryDef>(&MA))
2305  if (MU->getBlock() != MD->getBlock() ||
2306  !MSSA->locallyDominates(MD, MU))
2307  return true;
2308  return false;
2309 }
2310 
2311 /// Little predicate that returns true if the specified basic block is in
2312 /// a subloop of the current one, not the current one itself.
2313 ///
2314 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) {
2315  assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
2316  return LI->getLoopFor(BB) != CurLoop;
2317 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:80
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, AliasSetTracker *AST, MemorySSAUpdater *MSSAU)
Definition: LICM.cpp:1415
uint64_t CallInst * C
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
bool canSplitPredecessors() const
Definition: BasicBlock.cpp:371
void insertInstructionTo(const Instruction *Inst, const BasicBlock *BB)
Inform the safety info that we are planning to insert a new instruction Inst into the basic block BB...
Definition: MustExecute.cpp:85
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:641
void ReplaceInstWithInst(BasicBlock::InstListType &BIL, BasicBlock::iterator &BI, Instruction *I)
Replace the instruction specified by BI with the instruction specified by I.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
bool promoteLoopAccessesToScalars(const SmallSetVector< Value *, 8 > &, SmallVectorImpl< BasicBlock *> &, SmallVectorImpl< Instruction *> &, SmallVectorImpl< MemoryAccess *> &, PredIteratorCache &, LoopInfo *, DominatorTree *, const TargetLibraryInfo *, Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *, OptimizationRemarkEmitter *)
Try to promote memory values to scalars by sinking stores out of the loop and moving loads to before ...
Definition: LICM.cpp:1850
static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I)
Returns true if a PHINode is a trivially replaceable with an Instruction.
Definition: LICM.cpp:1271
Helper class for SSA formation on a set of values defined in multiple blocks.
Definition: SSAUpdater.h:38
Diagnostic information for missed-optimization remarks.
DILocation * get() const
Get the underlying DILocation.
Definition: DebugLoc.cpp:21
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Value * getPointerOperand(Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
DiagnosticInfoOptimizationBase::Argument NV
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:82
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:776
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
This class represents lattice values for constants.
Definition: AllocatorList.h:23
This header provides classes for managing a pipeline of passes over loops in LLVM IR...
bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
Definition: MemorySSA.cpp:2082
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition: MemorySSA.h:257
This is the interface for a simple mod/ref and alias analysis over globals.
const_iterator end() const
EltTy front() const
bool hasMetadataOtherThanDebugLoc() const
Return true if this instruction has metadata attached to it other than a debug location.
Definition: Instruction.h:228
void dropUnknownNonDebugMetadata(ArrayRef< unsigned > KnownIDs)
Drop all unknown metadata except for debug locations.
Definition: Metadata.cpp:1195
iterator end()
Definition: Function.h:682
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
static constexpr LocationSize unknown()
bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, TargetLibraryInfo *, TargetTransformInfo *, Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *)
Walk the specified region of the CFG (defined by all blocks dominated by the specified block...
Definition: LICM.cpp:487
bool hasDedicatedExits() const
Return true if no exit block for the loop has a predecessor that is outside the loop.
Definition: LoopInfoImpl.h:85
bool isLCSSAForm(DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:409
const AccessList * getBlockAccesses(const BasicBlock *BB) const
Return the list of MemoryAccess&#39;s for a given basic block.
Definition: MemorySSA.h:758
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value...
Definition: SSAUpdater.cpp:71
The main scalar evolution driver.
static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT, LoopInfo *LI, const Loop *CurLoop, LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU)
Definition: LICM.cpp:1473
This class represents a function call, abstracting a target machine&#39;s calling convention.
This file contains the declarations for metadata subclasses.
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this load instruction.
Definition: Instructions.h:253
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:160
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:632
bool salvageDebugInfo(Instruction &I)
Assuming the instruction I is going to be deleted, attempt to salvage debug users of I by writing the...
Definition: Local.cpp:1630
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:732
void insertUse(MemoryUse *Use)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1192
BasicBlock * getSuccessor(unsigned i) const
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop, const TargetTransformInfo *TTI)
Return true if the instruction is free in the loop.
Definition: LICM.cpp:1280
STATISTIC(NumFunctions, "Total number of functions")
A debug info location.
Definition: DebugLoc.h:33
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:230
F(f)
An instruction for reading from memory.
Definition: Instructions.h:167
Hexagon Common GEP
Value * getCondition() const
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:137
iv Induction Variable Users
Definition: IVUsers.cpp:51
bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, TargetLibraryInfo *, Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *)
Walk the specified region of the CFG (defined by all blocks dominated by the specified block...
Definition: LICM.cpp:788
void reserve(size_type N)
Definition: SmallVector.h:369
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Definition: TinyPtrVector.h:30
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:32
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:299
bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, unsigned Align, const DataLayout &DL, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested...
Definition: Loads.cpp:128
op_iterator op_begin()
Definition: User.h:229
bool hasLoopInvariantOperands(const Instruction *I) const
Return true if all the operands of the specified instruction are loop invariant.
Definition: LoopInfo.cpp:67
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
Definition: LoopInfoImpl.h:683
static bool pointerInvalidatedByLoop(MemoryLocation MemLoc, AliasSetTracker *CurAST, Loop *CurLoop, AliasAnalysis *AA)
Definition: LICM.cpp:2218
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:268
Loop Invariant Code Motion
Definition: LICM.cpp:310
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1241
bool formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition: LCSSA.cpp:385
Represents read-only accesses to memory.
Definition: MemorySSA.h:319
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:47
AnalysisUsage & addRequired()
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:133
ArrayRef< BasicBlock * > get(BasicBlock *BB)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:963
virtual void computeLoopSafetyInfo(const Loop *CurLoop)
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
Definition: MustExecute.cpp:71
This is the interface for a SCEV-based alias analysis.
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
Type * getPointerElementType() const
Definition: Type.h:376
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:897
This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to give precise answers on "may...
Definition: MustExecute.h:123
bool hasAllowReciprocal() const
Determine whether the allow-reciprocal flag is set.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
virtual bool doFinalization(Module &)
doFinalization - Virtual method overriden by subclasses to do any necessary clean up after all passes...
Definition: Pass.h:109
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:703
uint32_t getTagID() const
Return the tag of this operand bundle as an integer.
Definition: InstrTypes.h:1028
const DefsList * getBlockDefs(const BasicBlock *BB) const
Return the list of MemoryDef&#39;s and MemoryPhi&#39;s for a given basic block.
Definition: MemorySSA.h:766
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:285
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1152
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:157
LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:126
virtual bool anyBlockMayThrow() const
Returns true iff any block of the loop for which this info is contains an instruction that may throw ...
Definition: MustExecute.cpp:67
BlockT * getHeader() const
Definition: LoopInfo.h:102
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:102
FunctionModRefBehavior getModRefBehavior(const CallBase *Call)
Return the behavior of the given call site.
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:273
static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU, Loop *CurLoop, SinkAndHoistLICMFlags &Flags)
Definition: LICM.cpp:2265
INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false, false) INITIALIZE_PASS_END(LegacyLICMPass
Instruction * clone() const
Create a copy of &#39;this&#39; instruction that is identical in all ways except the following: ...
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref&#39;ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:720
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Definition: LoopInfoImpl.h:237
PredIteratorCache - This class is an extremely trivial cache for predecessor iterator queries...
FunctionModRefBehavior
Summary of how a function affects memory in the program.
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:268
Instruction * getUniqueInstruction()
If this alias set is known to contain a single instruction and only a single unique instruction...
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:82
This class represents a no-op cast from one type to another.
Memory SSA
Definition: MemorySSA.cpp:64
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
An instruction for storing to memory.
Definition: Instructions.h:320
static cl::opt< bool > DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false), cl::desc("Disable memory promotion in LICM pass"))
Memory promotion is enabled by default.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
void copyColors(BasicBlock *New, BasicBlock *Old)
Copy colors of block Old into the block New.
Definition: MustExecute.cpp:29
iterator begin()
Definition: Function.h:680
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
Value * getOperand(unsigned i) const
Definition: User.h:169
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:210
void forgetLoopDispositions(const Loop *L)
Called when the client has changed the disposition of values in this loop.
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:105
bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal=false)
Checks whether the given location points to constant memory, or if OrLocal is true whether it points ...
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:875
void getAAMetadata(AAMDNodes &N, bool Merge=false) const
Fills the AAMDNodes structure with AA metadata from this instruction.
NodeT * getBlock() const
static Instruction * sinkThroughTriviallyReplaceablePHI(PHINode *TPN, Instruction *I, LoopInfo *LI, SmallDenseMap< BasicBlock *, Instruction *, 32 > &SunkCopies, const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop, MemorySSAUpdater *MSSAU)
Definition: LICM.cpp:1437
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock *> Preds, const char *Suffix, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
virtual bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
static cl::opt< int > LICMN2Theshold("licm-n2-threshold", cl::Hidden, cl::init(0), cl::desc("How many instruction to cross product using AA"))
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:189
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:216
virtual bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const =0
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:318
MemorySSAWalker * getSkipSelfWalker()
Definition: MemorySSA.cpp:1564
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
Definition: SetOperations.h:39
void setAAMetadata(const AAMDNodes &N)
Sets the metadata on this instruction from the AAMDNodes structure.
Definition: Metadata.cpp:1261
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
Conditional or Unconditional Branch instruction.
size_t size(BasicBlock *BB) const
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node&#39;s...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:91
This file contains the declarations for the subclasses of Constant, which represent the different fla...
const Instruction & front() const
Definition: BasicBlock.h:280
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Definition: Instruction.h:572
Expected to fold away in lowering.
Constant * ConstantFoldInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstruction - Try to constant fold the specified instruction.
Diagnostic information for applied optimization remarks.
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:112
Pass * createLICMPass()
Definition: LICM.cpp:313
Represent the analysis usage information of a pass.
op_iterator op_end()
Definition: User.h:231
cl::opt< unsigned > SetLicmMssaOptCap
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1199
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LICM.cpp:276
AliasSet & getAliasSetFor(const MemoryLocation &MemLoc)
Return the alias set which contains the specified memory location.
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:115
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:99
static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo)
Definition: LICM.cpp:1455
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
auto find_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1220
bool isMod() const
void setAlignment(unsigned Align)
bool onlyReadsMemory(const CallBase *Call)
Checks if the specified call is known to only read from non-volatile memory (or not access memory at ...
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1436
MemoryAccess * createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, MemorySSA::InsertionPlace Point)
Create a MemoryAccess in MemorySSA at a specified point in a block, with a specified clobbering defin...
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT, Loop *CurLoop)
Definition: LICM.cpp:974
size_t size() const
Definition: SmallVector.h:52
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
This function does not perform any non-local loads or stores to memory.
void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, MemorySSA::InsertionPlace Where)
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:61
static cl::opt< uint32_t > MaxNumUsesTraversed("licm-max-num-uses-traversed", cl::Hidden, cl::init(8), cl::desc("Max num uses visited for identifying load " "invariance in loop using invariant start (default = 8)"))
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:50
const Value * Ptr
The address of the start of the location.
Representation for a specific memory location.
size_type size() const
Definition: SmallPtrSet.h:92
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:333
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:112
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:297
static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE)
When an instruction is found to only use loop invariant operands that is safe to hoist, this instruction is called to do the dirty work.
Definition: LICM.cpp:1646
Iterator for intrusive lists based on ilist_node.
void removeInstruction(const Instruction *Inst)
Inform safety info that we are planning to remove the instruction Inst from its block.
Definition: MustExecute.cpp:91
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
bool doesNotWriteMemoryBefore(const BasicBlock *BB, const Loop *CurLoop) const
Returns true if we could not execute a memory-modifying instruction before we enter BB under assumpti...
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
#define DEBUG_TYPE
Definition: LICM.cpp:79
BlockVerifier::State From
static cl::opt< bool > ControlFlowHoisting("licm-control-flow-hoisting", cl::Hidden, cl::init(false), cl::desc("Enable control flow (and PHI) hoisting in LICM"))
void verifyMemorySSA() const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1848
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:377
iterator end()
Definition: BasicBlock.h:270
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1160
Helper class for promoting a collection of loads and stores into SSA Form using the SSAUpdater...
Definition: SSAUpdater.h:136
void copyValue(Value *From, Value *To)
This method should be used whenever a preexisting value in the program is copied or cloned...
bool locallyDominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in the same basic block, determine whether MemoryAccess A dominates MemoryA...
Definition: MemorySSA.cpp:2051
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:248
Provides information about what library functions are available for the current target.
iterator begin() const
Definition: LoopInfo.h:144
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:746
void wireOldPredecessorsToNewImmediatePredecessor(BasicBlock *Old, BasicBlock *New, ArrayRef< BasicBlock *> Preds, bool IdenticalEdgesWereMerged=true)
A new empty BasicBlock (New) now branches directly to Old.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:643
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:924
BasicBlock * getBlock() const
Definition: MemorySSA.h:159
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
bool isConditional() const
static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, TargetTransformInfo *TTI, bool &FreeInLoop)
Return true if the only users of this instruction are outside of the loop.
Definition: LICM.cpp:1308
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:124
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:706
unsigned getNumIncomingValues() const
Return the number of incoming edges.
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental.guard intrinsic.
Definition: GuardUtils.cpp:17
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
Class that has the common methods + fields of memory uses/defs.
Definition: MemorySSA.h:247
A lightweight accessor for an operand bundle meant to be passed around by value.
Definition: InstrTypes.h:1000
iterator_range< user_iterator > users()
Definition: Value.h:399
static bool isSafeToExecuteUnconditionally(Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE, const Instruction *CtxI=nullptr)
Only sink or hoist an instruction if it is not a trapping instruction, or if the instruction is known...
Definition: LICM.cpp:1689
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:601
LoopT * getParentLoop() const
Definition: LoopInfo.h:103
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
Definition: LoopInfo.h:133
LLVM_NODISCARD bool isModSet(const ModRefInfo MRI)
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this store instruction.
Definition: Instructions.h:378
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
static bool onlyAccessesArgPointees(FunctionModRefBehavior MRB)
Checks if functions with the specified behavior are known to read and write at most from objects poin...
iterator insert(iterator where, pointer New)
Definition: ilist.h:226
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:321
cl::opt< unsigned > SetLicmMssaNoAccForPromotionCap
iterator begin() const
Definition: SmallPtrSet.h:396
int getUserCost(const User *U, ArrayRef< const Value *> Operands) const
Estimate the cost of a given IR user when lowered.
cl::opt< bool > EnableMSSALoopDependency
Enables memory ssa as a dependency for loop passes.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:506
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:151
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
unsigned pred_size(const BasicBlock *BB)
Get the number of predecessors of BB.
Definition: CFG.h:121
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
user_iterator_impl< User > user_iterator
Definition: Value.h:368
bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast=false)
Tests if a value is a call or invoke to a library function that allocates memory (either malloc...
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass&#39;s AnalysisUsage.
Definition: LoopUtils.cpp:136
licm
Definition: LICM.cpp:310
MemoryUseOrDef * createMemoryAccessAfter(Instruction *I, MemoryAccess *Definition, MemoryAccess *InsertPt)
iterator end() const
Definition: LoopInfo.h:145
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:738
Captures loop safety information.
Definition: MustExecute.h:47
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:171
const_iterator begin() const
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
Definition: LoopIterator.h:172
SmallVector< DomTreeNode *, 16 > collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop)
Does a BFS from a given node to all of its children inside a given loop.
Definition: LoopUtils.cpp:432
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:290
MemoryAccess * getClobberingMemoryAccess(const Instruction *I)
Given a memory Mod/Ref/ModRef&#39;ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1023
static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI)
Little predicate that returns true if the specified basic block is in a subloop of the current one...
Definition: LICM.cpp:2314
void removeMemoryAccess(MemoryAccess *, bool OptimizePhis=false)
Remove a MemoryAccess from MemorySSA, including updating all definitions and uses.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:375
bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
void insertDef(MemoryDef *Def, bool RenameUses=false)
Insert a definition into the MemorySSA IR.
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
Definition: Local.cpp:359
LLVM Value Representation.
Definition: Value.h:72
void setAlignment(unsigned Align)
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:444
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:86
void initializeLegacyLICMPassPass(PassRegistry &)
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:583
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU, bool TargetExecutesOncePerLoop, SinkAndHoistLICMFlags *LICMFlags=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Returns true if is legal to hoist or sink this instruction disregarding the possible introduction of ...
Definition: LICM.cpp:1072
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:1177
void getUniqueExitBlocks(SmallVectorImpl< BlockT *> &ExitBlocks) const
Return all unique successor blocks of this loop.
Definition: LoopInfoImpl.h:115
This is the interface for LLVM&#39;s primary stateless and local alias analysis.
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE)
When an instruction is found to only be used outside of the loop, this function moves it to the exit ...
Definition: LICM.cpp:1545
A container for analyses that lazily runs them and caches their results.
static Instruction * CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI, const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU)
Definition: LICM.cpp:1340
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:259
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopIterator.h:180
ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc)
getModRefInfo (for call sites) - Return information about whether a particular call site modifies or ...
#define LLVM_DEBUG(X)
Definition: Debug.h:122
op_range incoming_values()
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:158
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:173
The optimization diagnostic interface.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition: BasicBlock.cpp:120
bool use_empty() const
Definition: Value.h:322
unsigned size() const
void deleteValue(Value *PtrVal)
This method is used to remove a pointer value from the AliasSetTracker entirely.
loop versioning Loop Versioning For LICM
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
const BasicBlock * getParent() const
Definition: Instruction.h:66
static void moveInstructionBefore(Instruction &I, Instruction &Dest, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater *MSSAU)
Definition: LICM.cpp:1425
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, unsigned MaxUsesToExplore=DefaultMaxUsesToExplore)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
user_iterator user_end()
Definition: Value.h:383
bool mayReadOrWriteMemory() const
Return true if this instruction may read or write memory.
Definition: Instruction.h:532
const DenseMap< BasicBlock *, ColorVector > & getBlockColors() const
Returns block colors map that is used to update funclet operand bundles.
Definition: MustExecute.cpp:25