LLVM  14.0.0git
LICM.cpp
Go to the documentation of this file.
1 //===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs loop invariant code motion, attempting to remove as much
10 // code from the body of a loop as possible. It does this by either hoisting
11 // code into the preheader block, or by sinking code to the exit blocks if it is
12 // safe. This pass also promotes must-aliased memory locations in the loop to
13 // live in registers, thus hoisting and sinking "invariant" loads and stores.
14 //
15 // Hoisting operations out of loops is a canonicalization transform. It
16 // enables and simplifies subsequent optimizations in the middle-end.
17 // Rematerialization of hoisted instructions to reduce register pressure is the
18 // responsibility of the back-end, which has more accurate information about
19 // register pressure and also handles other optimizations than LICM that
20 // increase live-ranges.
21 //
22 // This pass uses alias analysis for two purposes:
23 //
24 // 1. Moving loop invariant loads and calls out of loops. If we can determine
25 // that a load or call inside of a loop never aliases anything stored to,
26 // we can hoist it or sink it like any other instruction.
27 // 2. Scalar Promotion of Memory - If there is a store instruction inside of
28 // the loop, we try to move the store to happen AFTER the loop instead of
29 // inside of the loop. This can only happen if a few conditions are true:
30 // A. The pointer stored through is loop invariant
31 // B. There are no stores or loads in the loop which _may_ alias the
32 // pointer. There are no calls in the loop which mod/ref the pointer.
33 // If these conditions are true, we can promote the loads and stores in the
34 // loop of the pointer to use a temporary alloca'd variable. We then use
35 // the SSAUpdater to construct the appropriate SSA form for the value.
36 //
37 //===----------------------------------------------------------------------===//
38 
40 #include "llvm/ADT/SetOperations.h"
41 #include "llvm/ADT/Statistic.h"
51 #include "llvm/Analysis/Loads.h"
52 #include "llvm/Analysis/LoopInfo.h"
54 #include "llvm/Analysis/LoopPass.h"
64 #include "llvm/IR/CFG.h"
65 #include "llvm/IR/Constants.h"
66 #include "llvm/IR/DataLayout.h"
68 #include "llvm/IR/DerivedTypes.h"
69 #include "llvm/IR/Dominators.h"
70 #include "llvm/IR/Instructions.h"
71 #include "llvm/IR/IntrinsicInst.h"
72 #include "llvm/IR/LLVMContext.h"
73 #include "llvm/IR/Metadata.h"
74 #include "llvm/IR/PatternMatch.h"
76 #include "llvm/InitializePasses.h"
78 #include "llvm/Support/Debug.h"
80 #include "llvm/Transforms/Scalar.h"
87 #include <algorithm>
88 #include <utility>
89 using namespace llvm;
90 
91 #define DEBUG_TYPE "licm"
92 
93 STATISTIC(NumCreatedBlocks, "Number of blocks created");
94 STATISTIC(NumClonedBranches, "Number of branches cloned");
95 STATISTIC(NumSunk, "Number of instructions sunk out of loop");
96 STATISTIC(NumHoisted, "Number of instructions hoisted out of loop");
97 STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
98 STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
99 STATISTIC(NumPromoted, "Number of memory locations promoted to registers");
100 
101 /// Memory promotion is enabled by default.
102 static cl::opt<bool>
103  DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false),
104  cl::desc("Disable memory promotion in LICM pass"));
105 
107  "licm-control-flow-hoisting", cl::Hidden, cl::init(false),
108  cl::desc("Enable control flow (and PHI) hoisting in LICM"));
109 
111  "licm-coldness-threshold", cl::Hidden, cl::init(4),
112  cl::desc("Relative coldness Threshold of hoisting/sinking destination "
113  "block for LICM to be considered beneficial"));
114 
116  "licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
117  cl::desc("Max num uses visited for identifying load "
118  "invariance in loop using invariant start (default = 8)"));
119 
120 // Experimental option to allow imprecision in LICM in pathological cases, in
121 // exchange for faster compile. This is to be removed if MemorySSA starts to
122 // address the same issue. This flag applies only when LICM uses MemorySSA
123 // instead on AliasSetTracker. LICM calls MemorySSAWalker's
124 // getClobberingMemoryAccess, up to the value of the Cap, getting perfect
125 // accuracy. Afterwards, LICM will call into MemorySSA's getDefiningAccess,
126 // which may not be precise, since optimizeUses is capped. The result is
127 // correct, but we may not get as "far up" as possible to get which access is
128 // clobbering the one queried.
130  "licm-mssa-optimization-cap", cl::init(100), cl::Hidden,
131  cl::desc("Enable imprecision in LICM in pathological cases, in exchange "
132  "for faster compile. Caps the MemorySSA clobbering calls."));
133 
134 // Experimentally, memory promotion carries less importance than sinking and
135 // hoisting. Limit when we do promotion when using MemorySSA, in order to save
136 // compile time.
138  "licm-mssa-max-acc-promotion", cl::init(250), cl::Hidden,
139  cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no "
140  "effect. When MSSA in LICM is enabled, then this is the maximum "
141  "number of accesses allowed to be present in a loop in order to "
142  "enable memory promotion."));
143 
144 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
145 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
146  const LoopSafetyInfo *SafetyInfo,
147  TargetTransformInfo *TTI, bool &FreeInLoop,
148  bool LoopNestMode);
149 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
150  BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
151  MemorySSAUpdater *MSSAU, ScalarEvolution *SE,
153 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
154  BlockFrequencyInfo *BFI, const Loop *CurLoop,
155  ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU,
158  const DominatorTree *DT,
159  const TargetLibraryInfo *TLI,
160  const Loop *CurLoop,
161  const LoopSafetyInfo *SafetyInfo,
163  const Instruction *CtxI = nullptr);
164 static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
165  AliasSetTracker *CurAST, Loop *CurLoop,
166  AAResults *AA);
168  Loop *CurLoop, Instruction &I,
169  SinkAndHoistLICMFlags &Flags);
171  MemoryUse &MU);
173  Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
174  const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU);
175 
176 static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
177  MemorySSAUpdater *MSSAU);
178 
179 static void moveInstructionBefore(Instruction &I, Instruction &Dest,
180  ICFLoopSafetyInfo &SafetyInfo,
181  MemorySSAUpdater *MSSAU, ScalarEvolution *SE);
182 
183 static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,
184  function_ref<void(Instruction *)> Fn);
187 
188 namespace {
189 struct LoopInvariantCodeMotion {
190  bool runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT,
193  OptimizationRemarkEmitter *ORE, bool LoopNestMode = false);
194 
195  LoopInvariantCodeMotion(unsigned LicmMssaOptCap,
196  unsigned LicmMssaNoAccForPromotionCap)
197  : LicmMssaOptCap(LicmMssaOptCap),
198  LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {}
199 
200 private:
201  unsigned LicmMssaOptCap;
202  unsigned LicmMssaNoAccForPromotionCap;
203 };
204 
205 struct LegacyLICMPass : public LoopPass {
206  static char ID; // Pass identification, replacement for typeid
207  LegacyLICMPass(
208  unsigned LicmMssaOptCap = SetLicmMssaOptCap,
209  unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap)
210  : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap) {
212  }
213 
214  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
215  if (skipLoop(L))
216  return false;
217 
218  LLVM_DEBUG(dbgs() << "Perform LICM on Loop with header at block "
219  << L->getHeader()->getNameOrAsOperand() << "\n");
220 
221  auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
222  MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
225  hasProfileData ? &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI()
226  : nullptr;
227  // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
228  // pass. Function analyses need to be preserved across loop transformations
229  // but ORE cannot be preserved (see comment before the pass definition).
231  return LICM.runOnLoop(
232  L, &getAnalysis<AAResultsWrapperPass>().getAAResults(),
233  &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
234  &getAnalysis<DominatorTreeWrapperPass>().getDomTree(), BFI,
235  &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
236  *L->getHeader()->getParent()),
237  &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
238  *L->getHeader()->getParent()),
239  SE ? &SE->getSE() : nullptr, MSSA, &ORE);
240  }
241 
242  /// This transformation requires natural loop information & requires that
243  /// loop preheaders be inserted into the CFG...
244  ///
245  void getAnalysisUsage(AnalysisUsage &AU) const override {
256  }
257 
258 private:
259  LoopInvariantCodeMotion LICM;
260 };
261 } // namespace
262 
265  if (!AR.MSSA)
266  report_fatal_error("LICM requires MemorySSA (loop-mssa)");
267 
268  // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
269  // pass. Function analyses need to be preserved across loop transformations
270  // but ORE cannot be preserved (see comment before the pass definition).
272 
273  LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
274  if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, AR.BFI, &AR.TLI, &AR.TTI,
275  &AR.SE, AR.MSSA, &ORE))
276  return PreservedAnalyses::all();
277 
278  auto PA = getLoopPassPreservedAnalyses();
279 
280  PA.preserve<DominatorTreeAnalysis>();
281  PA.preserve<LoopAnalysis>();
282  PA.preserve<MemorySSAAnalysis>();
283 
284  return PA;
285 }
286 
289  LPMUpdater &) {
290  if (!AR.MSSA)
291  report_fatal_error("LNICM requires MemorySSA (loop-mssa)");
292 
293  // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
294  // pass. Function analyses need to be preserved across loop transformations
295  // but ORE cannot be preserved (see comment before the pass definition).
297 
298  LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
299 
300  Loop &OutermostLoop = LN.getOutermostLoop();
301  bool Changed = LICM.runOnLoop(&OutermostLoop, &AR.AA, &AR.LI, &AR.DT, AR.BFI,
302  &AR.TLI, &AR.TTI, &AR.SE, AR.MSSA, &ORE, true);
303 
304  if (!Changed)
305  return PreservedAnalyses::all();
306 
307  auto PA = getLoopPassPreservedAnalyses();
308 
309  PA.preserve<DominatorTreeAnalysis>();
310  PA.preserve<LoopAnalysis>();
311  PA.preserve<MemorySSAAnalysis>();
312 
313  return PA;
314 }
315 
316 char LegacyLICMPass::ID = 0;
317 INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion",
318  false, false)
323 INITIALIZE_PASS_DEPENDENCY(LazyBFIPass)
324 INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
325  false)
326 
327 Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }
328 Pass *llvm::createLICMPass(unsigned LicmMssaOptCap,
329  unsigned LicmMssaNoAccForPromotionCap) {
330  return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
331 }
332 
334  MemorySSA *MSSA)
336  IsSink, L, MSSA) {}
337 
339  unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap, bool IsSink,
340  Loop *L, MemorySSA *MSSA)
341  : LicmMssaOptCap(LicmMssaOptCap),
342  LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
343  IsSink(IsSink) {
344  assert(((L != nullptr) == (MSSA != nullptr)) &&
345  "Unexpected values for SinkAndHoistLICMFlags");
346  if (!MSSA)
347  return;
348 
349  unsigned AccessCapCount = 0;
350  for (auto *BB : L->getBlocks())
351  if (const auto *Accesses = MSSA->getBlockAccesses(BB))
352  for (const auto &MA : *Accesses) {
353  (void)MA;
354  ++AccessCapCount;
355  if (AccessCapCount > LicmMssaNoAccForPromotionCap) {
356  NoOfMemAccTooLarge = true;
357  return;
358  }
359  }
360 }
361 
362 /// Hoist expressions out of the specified loop. Note, alias info for inner
363 /// loop is not preserved so it is not a good idea to run LICM multiple
364 /// times on one loop.
365 bool LoopInvariantCodeMotion::runOnLoop(
366  Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT,
369  bool LoopNestMode) {
370  bool Changed = false;
371 
372  assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
373 
374  // If this loop has metadata indicating that LICM is not to be performed then
375  // just exit.
377  return false;
378  }
379 
380  // Don't sink stores from loops with coroutine suspend instructions.
381  // LICM would sink instructions into the default destination of
382  // the coroutine switch. The default destination of the switch is to
383  // handle the case where the coroutine is suspended, by which point the
384  // coroutine frame may have been destroyed. No instruction can be sunk there.
385  // FIXME: This would unfortunately hurt the performance of coroutines, however
386  // there is currently no general solution for this. Similar issues could also
387  // potentially happen in other passes where instructions are being moved
388  // across that edge.
389  bool HasCoroSuspendInst = llvm::any_of(L->getBlocks(), [](BasicBlock *BB) {
390  return llvm::any_of(*BB, [](Instruction &I) {
391  IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
392  return II && II->getIntrinsicID() == Intrinsic::coro_suspend;
393  });
394  });
395 
396  MemorySSAUpdater MSSAU(MSSA);
397  SinkAndHoistLICMFlags Flags(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
398  /*IsSink=*/true, L, MSSA);
399 
400  // Get the preheader block to move instructions into...
401  BasicBlock *Preheader = L->getLoopPreheader();
402 
403  // Compute loop safety information.
404  ICFLoopSafetyInfo SafetyInfo;
405  SafetyInfo.computeLoopSafetyInfo(L);
406 
407  // We want to visit all of the instructions in this loop... that are not parts
408  // of our subloops (they have already had their invariants hoisted out of
409  // their loop, into this loop, so there is no need to process the BODIES of
410  // the subloops).
411  //
412  // Traverse the body of the loop in depth first order on the dominator tree so
413  // that we are guaranteed to see definitions before we see uses. This allows
414  // us to sink instructions in one pass, without iteration. After sinking
415  // instructions, we perform another pass to hoist them out of the loop.
416  if (L->hasDedicatedExits())
417  Changed |= LoopNestMode
418  ? sinkRegionForLoopNest(DT->getNode(L->getHeader()), AA, LI,
419  DT, BFI, TLI, TTI, L, &MSSAU,
420  &SafetyInfo, Flags, ORE)
421  : sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI,
422  TLI, TTI, L, &MSSAU, &SafetyInfo, Flags, ORE);
423  Flags.setIsSink(false);
424  if (Preheader)
425  Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L,
426  &MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode);
427 
428  // Now that all loop invariants have been removed from the loop, promote any
429  // memory references to scalars that we can.
430  // Don't sink stores from loops without dedicated block exits. Exits
431  // containing indirect branches are not transformed by loop simplify,
432  // make sure we catch that. An additional load may be generated in the
433  // preheader for SSA updater, so also avoid sinking when no preheader
434  // is available.
435  if (!DisablePromotion && Preheader && L->hasDedicatedExits() &&
436  !Flags.tooManyMemoryAccesses() && !HasCoroSuspendInst) {
437  // Figure out the loop exits and their insertion points
438  SmallVector<BasicBlock *, 8> ExitBlocks;
439  L->getUniqueExitBlocks(ExitBlocks);
440 
441  // We can't insert into a catchswitch.
442  bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) {
443  return isa<CatchSwitchInst>(Exit->getTerminator());
444  });
445 
446  if (!HasCatchSwitch) {
448  SmallVector<MemoryAccess *, 8> MSSAInsertPts;
449  InsertPts.reserve(ExitBlocks.size());
450  MSSAInsertPts.reserve(ExitBlocks.size());
451  for (BasicBlock *ExitBlock : ExitBlocks) {
452  InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
453  MSSAInsertPts.push_back(nullptr);
454  }
455 
457 
458  // Promoting one set of accesses may make the pointers for another set
459  // loop invariant, so run this in a loop (with the MaybePromotable set
460  // decreasing in size over time).
461  bool Promoted = false;
462  bool LocalPromoted;
463  do {
464  LocalPromoted = false;
465  for (const SmallSetVector<Value *, 8> &PointerMustAliases :
466  collectPromotionCandidates(MSSA, AA, L)) {
467  LocalPromoted |= promoteLoopAccessesToScalars(
468  PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC,
469  LI, DT, TLI, L, &MSSAU, &SafetyInfo, ORE);
470  }
471  Promoted |= LocalPromoted;
472  } while (LocalPromoted);
473 
474  // Once we have promoted values across the loop body we have to
475  // recursively reform LCSSA as any nested loop may now have values defined
476  // within the loop used in the outer loop.
477  // FIXME: This is really heavy handed. It would be a bit better to use an
478  // SSAUpdater strategy during promotion that was LCSSA aware and reformed
479  // it as it went.
480  if (Promoted)
481  formLCSSARecursively(*L, *DT, LI, SE);
482 
483  Changed |= Promoted;
484  }
485  }
486 
487  // Check that neither this loop nor its parent have had LCSSA broken. LICM is
488  // specifically moving instructions across the loop boundary and so it is
489  // especially in need of sanity checking here.
490  assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!");
491  assert((L->isOutermost() || L->getParentLoop()->isLCSSAForm(*DT)) &&
492  "Parent loop not left in LCSSA form after LICM!");
493 
494  if (VerifyMemorySSA)
495  MSSA->verifyMemorySSA();
496 
497  if (Changed && SE)
498  SE->forgetLoopDispositions(L);
499  return Changed;
500 }
501 
502 /// Walk the specified region of the CFG (defined by all blocks dominated by
503 /// the specified block, and that are in the current loop) in reverse depth
504 /// first order w.r.t the DominatorTree. This allows us to visit uses before
505 /// definitions, allowing us to sink a loop body in one pass without iteration.
506 ///
510  Loop *CurLoop, MemorySSAUpdater *MSSAU,
511  ICFLoopSafetyInfo *SafetyInfo,
512  SinkAndHoistLICMFlags &Flags,
513  OptimizationRemarkEmitter *ORE, Loop *OutermostLoop) {
514 
515  // Verify inputs.
516  assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
517  CurLoop != nullptr && MSSAU != nullptr && SafetyInfo != nullptr &&
518  "Unexpected input to sinkRegion.");
519 
520  // We want to visit children before parents. We will enque all the parents
521  // before their children in the worklist and process the worklist in reverse
522  // order.
524 
525  bool Changed = false;
526  for (DomTreeNode *DTN : reverse(Worklist)) {
527  BasicBlock *BB = DTN->getBlock();
528  // Only need to process the contents of this block if it is not part of a
529  // subloop (which would already have been processed).
530  if (inSubLoop(BB, CurLoop, LI))
531  continue;
532 
533  for (BasicBlock::iterator II = BB->end(); II != BB->begin();) {
534  Instruction &I = *--II;
535 
536  // The instruction is not used in the loop if it is dead. In this case,
537  // we just delete it instead of sinking it.
538  if (isInstructionTriviallyDead(&I, TLI)) {
539  LLVM_DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
542  ++II;
543  eraseInstruction(I, *SafetyInfo, MSSAU);
544  Changed = true;
545  continue;
546  }
547 
548  // Check to see if we can sink this instruction to the exit blocks
549  // of the loop. We can do this if the all users of the instruction are
550  // outside of the loop. In this case, it doesn't even matter if the
551  // operands of the instruction are loop invariant.
552  //
553  bool FreeInLoop = false;
554  bool LoopNestMode = OutermostLoop != nullptr;
555  if (!I.mayHaveSideEffects() &&
556  isNotUsedOrFreeInLoop(I, LoopNestMode ? OutermostLoop : CurLoop,
557  SafetyInfo, TTI, FreeInLoop, LoopNestMode) &&
558  canSinkOrHoistInst(I, AA, DT, CurLoop, /*CurAST*/nullptr, MSSAU, true,
559  &Flags, ORE)) {
560  if (sink(I, LI, DT, BFI, CurLoop, SafetyInfo, MSSAU, ORE)) {
561  if (!FreeInLoop) {
562  ++II;
564  eraseInstruction(I, *SafetyInfo, MSSAU);
565  }
566  Changed = true;
567  }
568  }
569  }
570  }
571  if (VerifyMemorySSA)
572  MSSAU->getMemorySSA()->verifyMemorySSA();
573  return Changed;
574 }
575 
577  DomTreeNode *N, AAResults *AA, LoopInfo *LI, DominatorTree *DT,
579  Loop *CurLoop, MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo,
581 
582  bool Changed = false;
584  Worklist.insert(CurLoop);
585  appendLoopsToWorklist(*CurLoop, Worklist);
586  while (!Worklist.empty()) {
587  Loop *L = Worklist.pop_back_val();
588  Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI,
589  TTI, L, MSSAU, SafetyInfo, Flags, ORE, CurLoop);
590  }
591  return Changed;
592 }
593 
594 namespace {
595 // This is a helper class for hoistRegion to make it able to hoist control flow
596 // in order to be able to hoist phis. The way this works is that we initially
597 // start hoisting to the loop preheader, and when we see a loop invariant branch
598 // we make note of this. When we then come to hoist an instruction that's
599 // conditional on such a branch we duplicate the branch and the relevant control
600 // flow, then hoist the instruction into the block corresponding to its original
601 // block in the duplicated control flow.
602 class ControlFlowHoister {
603 private:
604  // Information about the loop we are hoisting from
605  LoopInfo *LI;
606  DominatorTree *DT;
607  Loop *CurLoop;
608  MemorySSAUpdater *MSSAU;
609 
610  // A map of blocks in the loop to the block their instructions will be hoisted
611  // to.
612  DenseMap<BasicBlock *, BasicBlock *> HoistDestinationMap;
613 
614  // The branches that we can hoist, mapped to the block that marks a
615  // convergence point of their control flow.
616  DenseMap<BranchInst *, BasicBlock *> HoistableBranches;
617 
618 public:
619  ControlFlowHoister(LoopInfo *LI, DominatorTree *DT, Loop *CurLoop,
620  MemorySSAUpdater *MSSAU)
621  : LI(LI), DT(DT), CurLoop(CurLoop), MSSAU(MSSAU) {}
622 
623  void registerPossiblyHoistableBranch(BranchInst *BI) {
624  // We can only hoist conditional branches with loop invariant operands.
625  if (!ControlFlowHoisting || !BI->isConditional() ||
626  !CurLoop->hasLoopInvariantOperands(BI))
627  return;
628 
629  // The branch destinations need to be in the loop, and we don't gain
630  // anything by duplicating conditional branches with duplicate successors,
631  // as it's essentially the same as an unconditional branch.
632  BasicBlock *TrueDest = BI->getSuccessor(0);
633  BasicBlock *FalseDest = BI->getSuccessor(1);
634  if (!CurLoop->contains(TrueDest) || !CurLoop->contains(FalseDest) ||
635  TrueDest == FalseDest)
636  return;
637 
638  // We can hoist BI if one branch destination is the successor of the other,
639  // or both have common successor which we check by seeing if the
640  // intersection of their successors is non-empty.
641  // TODO: This could be expanded to allowing branches where both ends
642  // eventually converge to a single block.
643  SmallPtrSet<BasicBlock *, 4> TrueDestSucc, FalseDestSucc;
644  TrueDestSucc.insert(succ_begin(TrueDest), succ_end(TrueDest));
645  FalseDestSucc.insert(succ_begin(FalseDest), succ_end(FalseDest));
646  BasicBlock *CommonSucc = nullptr;
647  if (TrueDestSucc.count(FalseDest)) {
648  CommonSucc = FalseDest;
649  } else if (FalseDestSucc.count(TrueDest)) {
650  CommonSucc = TrueDest;
651  } else {
652  set_intersect(TrueDestSucc, FalseDestSucc);
653  // If there's one common successor use that.
654  if (TrueDestSucc.size() == 1)
655  CommonSucc = *TrueDestSucc.begin();
656  // If there's more than one pick whichever appears first in the block list
657  // (we can't use the value returned by TrueDestSucc.begin() as it's
658  // unpredicatable which element gets returned).
659  else if (!TrueDestSucc.empty()) {
660  Function *F = TrueDest->getParent();
661  auto IsSucc = [&](BasicBlock &BB) { return TrueDestSucc.count(&BB); };
662  auto It = llvm::find_if(*F, IsSucc);
663  assert(It != F->end() && "Could not find successor in function");
664  CommonSucc = &*It;
665  }
666  }
667  // The common successor has to be dominated by the branch, as otherwise
668  // there will be some other path to the successor that will not be
669  // controlled by this branch so any phi we hoist would be controlled by the
670  // wrong condition. This also takes care of avoiding hoisting of loop back
671  // edges.
672  // TODO: In some cases this could be relaxed if the successor is dominated
673  // by another block that's been hoisted and we can guarantee that the
674  // control flow has been replicated exactly.
675  if (CommonSucc && DT->dominates(BI, CommonSucc))
676  HoistableBranches[BI] = CommonSucc;
677  }
678 
679  bool canHoistPHI(PHINode *PN) {
680  // The phi must have loop invariant operands.
681  if (!ControlFlowHoisting || !CurLoop->hasLoopInvariantOperands(PN))
682  return false;
683  // We can hoist phis if the block they are in is the target of hoistable
684  // branches which cover all of the predecessors of the block.
685  SmallPtrSet<BasicBlock *, 8> PredecessorBlocks;
686  BasicBlock *BB = PN->getParent();
687  for (BasicBlock *PredBB : predecessors(BB))
688  PredecessorBlocks.insert(PredBB);
689  // If we have less predecessor blocks than predecessors then the phi will
690  // have more than one incoming value for the same block which we can't
691  // handle.
692  // TODO: This could be handled be erasing some of the duplicate incoming
693  // values.
694  if (PredecessorBlocks.size() != pred_size(BB))
695  return false;
696  for (auto &Pair : HoistableBranches) {
697  if (Pair.second == BB) {
698  // Which blocks are predecessors via this branch depends on if the
699  // branch is triangle-like or diamond-like.
700  if (Pair.first->getSuccessor(0) == BB) {
701  PredecessorBlocks.erase(Pair.first->getParent());
702  PredecessorBlocks.erase(Pair.first->getSuccessor(1));
703  } else if (Pair.first->getSuccessor(1) == BB) {
704  PredecessorBlocks.erase(Pair.first->getParent());
705  PredecessorBlocks.erase(Pair.first->getSuccessor(0));
706  } else {
707  PredecessorBlocks.erase(Pair.first->getSuccessor(0));
708  PredecessorBlocks.erase(Pair.first->getSuccessor(1));
709  }
710  }
711  }
712  // PredecessorBlocks will now be empty if for every predecessor of BB we
713  // found a hoistable branch source.
714  return PredecessorBlocks.empty();
715  }
716 
717  BasicBlock *getOrCreateHoistedBlock(BasicBlock *BB) {
718  if (!ControlFlowHoisting)
719  return CurLoop->getLoopPreheader();
720  // If BB has already been hoisted, return that
721  if (HoistDestinationMap.count(BB))
722  return HoistDestinationMap[BB];
723 
724  // Check if this block is conditional based on a pending branch
725  auto HasBBAsSuccessor =
727  return BB != Pair.second && (Pair.first->getSuccessor(0) == BB ||
728  Pair.first->getSuccessor(1) == BB);
729  };
730  auto It = llvm::find_if(HoistableBranches, HasBBAsSuccessor);
731 
732  // If not involved in a pending branch, hoist to preheader
733  BasicBlock *InitialPreheader = CurLoop->getLoopPreheader();
734  if (It == HoistableBranches.end()) {
735  LLVM_DEBUG(dbgs() << "LICM using "
736  << InitialPreheader->getNameOrAsOperand()
737  << " as hoist destination for "
738  << BB->getNameOrAsOperand() << "\n");
739  HoistDestinationMap[BB] = InitialPreheader;
740  return InitialPreheader;
741  }
742  BranchInst *BI = It->first;
743  assert(std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor) ==
744  HoistableBranches.end() &&
745  "BB is expected to be the target of at most one branch");
746 
747  LLVMContext &C = BB->getContext();
748  BasicBlock *TrueDest = BI->getSuccessor(0);
749  BasicBlock *FalseDest = BI->getSuccessor(1);
750  BasicBlock *CommonSucc = HoistableBranches[BI];
751  BasicBlock *HoistTarget = getOrCreateHoistedBlock(BI->getParent());
752 
753  // Create hoisted versions of blocks that currently don't have them
754  auto CreateHoistedBlock = [&](BasicBlock *Orig) {
755  if (HoistDestinationMap.count(Orig))
756  return HoistDestinationMap[Orig];
757  BasicBlock *New =
758  BasicBlock::Create(C, Orig->getName() + ".licm", Orig->getParent());
759  HoistDestinationMap[Orig] = New;
760  DT->addNewBlock(New, HoistTarget);
761  if (CurLoop->getParentLoop())
762  CurLoop->getParentLoop()->addBasicBlockToLoop(New, *LI);
763  ++NumCreatedBlocks;
764  LLVM_DEBUG(dbgs() << "LICM created " << New->getName()
765  << " as hoist destination for " << Orig->getName()
766  << "\n");
767  return New;
768  };
769  BasicBlock *HoistTrueDest = CreateHoistedBlock(TrueDest);
770  BasicBlock *HoistFalseDest = CreateHoistedBlock(FalseDest);
771  BasicBlock *HoistCommonSucc = CreateHoistedBlock(CommonSucc);
772 
773  // Link up these blocks with branches.
774  if (!HoistCommonSucc->getTerminator()) {
775  // The new common successor we've generated will branch to whatever that
776  // hoist target branched to.
777  BasicBlock *TargetSucc = HoistTarget->getSingleSuccessor();
778  assert(TargetSucc && "Expected hoist target to have a single successor");
779  HoistCommonSucc->moveBefore(TargetSucc);
780  BranchInst::Create(TargetSucc, HoistCommonSucc);
781  }
782  if (!HoistTrueDest->getTerminator()) {
783  HoistTrueDest->moveBefore(HoistCommonSucc);
784  BranchInst::Create(HoistCommonSucc, HoistTrueDest);
785  }
786  if (!HoistFalseDest->getTerminator()) {
787  HoistFalseDest->moveBefore(HoistCommonSucc);
788  BranchInst::Create(HoistCommonSucc, HoistFalseDest);
789  }
790 
791  // If BI is being cloned to what was originally the preheader then
792  // HoistCommonSucc will now be the new preheader.
793  if (HoistTarget == InitialPreheader) {
794  // Phis in the loop header now need to use the new preheader.
795  InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc);
797  HoistTarget->getSingleSuccessor(), HoistCommonSucc, {HoistTarget});
798  // The new preheader dominates the loop header.
799  DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc);
800  DomTreeNode *HeaderNode = DT->getNode(CurLoop->getHeader());
801  DT->changeImmediateDominator(HeaderNode, PreheaderNode);
802  // The preheader hoist destination is now the new preheader, with the
803  // exception of the hoist destination of this branch.
804  for (auto &Pair : HoistDestinationMap)
805  if (Pair.second == InitialPreheader && Pair.first != BI->getParent())
806  Pair.second = HoistCommonSucc;
807  }
808 
809  // Now finally clone BI.
811  HoistTarget->getTerminator(),
812  BranchInst::Create(HoistTrueDest, HoistFalseDest, BI->getCondition()));
813  ++NumClonedBranches;
814 
815  assert(CurLoop->getLoopPreheader() &&
816  "Hoisting blocks should not have destroyed preheader");
817  return HoistDestinationMap[BB];
818  }
819 };
820 } // namespace
821 
822 // Hoisting/sinking instruction out of a loop isn't always beneficial. It's only
823 // only worthwhile if the destination block is actually colder than current
824 // block.
828  // Check block frequency only when runtime profile is available
829  // to avoid pathological cases. With static profile, lean towards
830  // hosting because it helps canonicalize the loop for vectorizer.
831  if (!DstBlock->getParent()->hasProfileData())
832  return true;
833 
835  return true;
836 
837  BasicBlock *SrcBlock = I.getParent();
838  if (BFI->getBlockFreq(DstBlock).getFrequency() / HoistSinkColdnessThreshold >
839  BFI->getBlockFreq(SrcBlock).getFrequency()) {
840  ORE->emit([&]() {
841  return OptimizationRemarkMissed(DEBUG_TYPE, "SinkHoistInst", &I)
842  << "failed to sink or hoist instruction because containing block "
843  "has lower frequency than destination block";
844  });
845  return false;
846  }
847 
848  return true;
849 }
850 
851 /// Walk the specified region of the CFG (defined by all blocks dominated by
852 /// the specified block, and that are in the current loop) in depth first
853 /// order w.r.t the DominatorTree. This allows us to visit definitions before
854 /// uses, allowing us to hoist a loop body in one pass without iteration.
855 ///
858  TargetLibraryInfo *TLI, Loop *CurLoop,
859  MemorySSAUpdater *MSSAU, ScalarEvolution *SE,
860  ICFLoopSafetyInfo *SafetyInfo,
861  SinkAndHoistLICMFlags &Flags,
862  OptimizationRemarkEmitter *ORE, bool LoopNestMode) {
863  // Verify inputs.
864  assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
865  CurLoop != nullptr && MSSAU != nullptr && SafetyInfo != nullptr &&
866  "Unexpected input to hoistRegion.");
867 
868  ControlFlowHoister CFH(LI, DT, CurLoop, MSSAU);
869 
870  // Keep track of instructions that have been hoisted, as they may need to be
871  // re-hoisted if they end up not dominating all of their uses.
872  SmallVector<Instruction *, 16> HoistedInstructions;
873 
874  // For PHI hoisting to work we need to hoist blocks before their successors.
875  // We can do this by iterating through the blocks in the loop in reverse
876  // post-order.
877  LoopBlocksRPO Worklist(CurLoop);
878  Worklist.perform(LI);
879  bool Changed = false;
880  for (BasicBlock *BB : Worklist) {
881  // Only need to process the contents of this block if it is not part of a
882  // subloop (which would already have been processed).
883  if (!LoopNestMode && inSubLoop(BB, CurLoop, LI))
884  continue;
885 
887  // Try constant folding this instruction. If all the operands are
888  // constants, it is technically hoistable, but it would be better to
889  // just fold it.
891  &I, I.getModule()->getDataLayout(), TLI)) {
892  LLVM_DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C
893  << '\n');
894  // FIXME MSSA: Such replacements may make accesses unoptimized (D51960).
895  I.replaceAllUsesWith(C);
896  if (isInstructionTriviallyDead(&I, TLI))
897  eraseInstruction(I, *SafetyInfo, MSSAU);
898  Changed = true;
899  continue;
900  }
901 
902  // Try hoisting the instruction out to the preheader. We can only do
903  // this if all of the operands of the instruction are loop invariant and
904  // if it is safe to hoist the instruction. We also check block frequency
905  // to make sure instruction only gets hoisted into colder blocks.
906  // TODO: It may be safe to hoist if we are hoisting to a conditional block
907  // and we have accurately duplicated the control flow from the loop header
908  // to that block.
909  if (CurLoop->hasLoopInvariantOperands(&I) &&
910  canSinkOrHoistInst(I, AA, DT, CurLoop, /*CurAST*/ nullptr, MSSAU,
911  true, &Flags, ORE) &&
912  worthSinkOrHoistInst(I, CurLoop->getLoopPreheader(), ORE, BFI) &&
914  I, DT, TLI, CurLoop, SafetyInfo, ORE,
915  CurLoop->getLoopPreheader()->getTerminator())) {
916  hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
917  MSSAU, SE, ORE);
918  HoistedInstructions.push_back(&I);
919  Changed = true;
920  continue;
921  }
922 
923  // Attempt to remove floating point division out of the loop by
924  // converting it to a reciprocal multiplication.
925  if (I.getOpcode() == Instruction::FDiv && I.hasAllowReciprocal() &&
926  CurLoop->isLoopInvariant(I.getOperand(1))) {
927  auto Divisor = I.getOperand(1);
928  auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0);
929  auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor);
930  ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags());
931  SafetyInfo->insertInstructionTo(ReciprocalDivisor, I.getParent());
932  ReciprocalDivisor->insertBefore(&I);
933 
934  auto Product =
935  BinaryOperator::CreateFMul(I.getOperand(0), ReciprocalDivisor);
936  Product->setFastMathFlags(I.getFastMathFlags());
937  SafetyInfo->insertInstructionTo(Product, I.getParent());
938  Product->insertAfter(&I);
939  I.replaceAllUsesWith(Product);
940  eraseInstruction(I, *SafetyInfo, MSSAU);
941 
942  hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB),
943  SafetyInfo, MSSAU, SE, ORE);
944  HoistedInstructions.push_back(ReciprocalDivisor);
945  Changed = true;
946  continue;
947  }
948 
949  auto IsInvariantStart = [&](Instruction &I) {
950  using namespace PatternMatch;
951  return I.use_empty() &&
952  match(&I, m_Intrinsic<Intrinsic::invariant_start>());
953  };
954  auto MustExecuteWithoutWritesBefore = [&](Instruction &I) {
955  return SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) &&
956  SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop);
957  };
958  if ((IsInvariantStart(I) || isGuard(&I)) &&
959  CurLoop->hasLoopInvariantOperands(&I) &&
960  MustExecuteWithoutWritesBefore(I)) {
961  hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
962  MSSAU, SE, ORE);
963  HoistedInstructions.push_back(&I);
964  Changed = true;
965  continue;
966  }
967 
968  if (PHINode *PN = dyn_cast<PHINode>(&I)) {
969  if (CFH.canHoistPHI(PN)) {
970  // Redirect incoming blocks first to ensure that we create hoisted
971  // versions of those blocks before we hoist the phi.
972  for (unsigned int i = 0; i < PN->getNumIncomingValues(); ++i)
973  PN->setIncomingBlock(
974  i, CFH.getOrCreateHoistedBlock(PN->getIncomingBlock(i)));
975  hoist(*PN, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
976  MSSAU, SE, ORE);
977  assert(DT->dominates(PN, BB) && "Conditional PHIs not expected");
978  Changed = true;
979  continue;
980  }
981  }
982 
983  // Remember possibly hoistable branches so we can actually hoist them
984  // later if needed.
985  if (BranchInst *BI = dyn_cast<BranchInst>(&I))
986  CFH.registerPossiblyHoistableBranch(BI);
987  }
988  }
989 
990  // If we hoisted instructions to a conditional block they may not dominate
991  // their uses that weren't hoisted (such as phis where some operands are not
992  // loop invariant). If so make them unconditional by moving them to their
993  // immediate dominator. We iterate through the instructions in reverse order
994  // which ensures that when we rehoist an instruction we rehoist its operands,
995  // and also keep track of where in the block we are rehoisting to to make sure
996  // that we rehoist instructions before the instructions that use them.
997  Instruction *HoistPoint = nullptr;
998  if (ControlFlowHoisting) {
999  for (Instruction *I : reverse(HoistedInstructions)) {
1000  if (!llvm::all_of(I->uses(),
1001  [&](Use &U) { return DT->dominates(I, U); })) {
1002  BasicBlock *Dominator =
1003  DT->getNode(I->getParent())->getIDom()->getBlock();
1004  if (!HoistPoint || !DT->dominates(HoistPoint->getParent(), Dominator)) {
1005  if (HoistPoint)
1006  assert(DT->dominates(Dominator, HoistPoint->getParent()) &&
1007  "New hoist point expected to dominate old hoist point");
1008  HoistPoint = Dominator->getTerminator();
1009  }
1010  LLVM_DEBUG(dbgs() << "LICM rehoisting to "
1011  << HoistPoint->getParent()->getNameOrAsOperand()
1012  << ": " << *I << "\n");
1013  moveInstructionBefore(*I, *HoistPoint, *SafetyInfo, MSSAU, SE);
1014  HoistPoint = I;
1015  Changed = true;
1016  }
1017  }
1018  }
1019  if (VerifyMemorySSA)
1020  MSSAU->getMemorySSA()->verifyMemorySSA();
1021 
1022  // Now that we've finished hoisting make sure that LI and DT are still
1023  // valid.
1024 #ifdef EXPENSIVE_CHECKS
1025  if (Changed) {
1027  "Dominator tree verification failed");
1028  LI->verify(*DT);
1029  }
1030 #endif
1031 
1032  return Changed;
1033 }
1034 
1035 // Return true if LI is invariant within scope of the loop. LI is invariant if
1036 // CurLoop is dominated by an invariant.start representing the same memory
1037 // location and size as the memory location LI loads from, and also the
1038 // invariant.start has no uses.
1040  Loop *CurLoop) {
1041  Value *Addr = LI->getOperand(0);
1042  const DataLayout &DL = LI->getModule()->getDataLayout();
1043  const TypeSize LocSizeInBits = DL.getTypeSizeInBits(LI->getType());
1044 
1045  // It is not currently possible for clang to generate an invariant.start
1046  // intrinsic with scalable vector types because we don't support thread local
1047  // sizeless types and we don't permit sizeless types in structs or classes.
1048  // Furthermore, even if support is added for this in future the intrinsic
1049  // itself is defined to have a size of -1 for variable sized objects. This
1050  // makes it impossible to verify if the intrinsic envelops our region of
1051  // interest. For example, both <vscale x 32 x i8> and <vscale x 16 x i8>
1052  // types would have a -1 parameter, but the former is clearly double the size
1053  // of the latter.
1054  if (LocSizeInBits.isScalable())
1055  return false;
1056 
1057  // if the type is i8 addrspace(x)*, we know this is the type of
1058  // llvm.invariant.start operand
1059  auto *PtrInt8Ty = PointerType::get(Type::getInt8Ty(LI->getContext()),
1060  LI->getPointerAddressSpace());
1061  unsigned BitcastsVisited = 0;
1062  // Look through bitcasts until we reach the i8* type (this is invariant.start
1063  // operand type).
1064  while (Addr->getType() != PtrInt8Ty) {
1065  auto *BC = dyn_cast<BitCastInst>(Addr);
1066  // Avoid traversing high number of bitcast uses.
1067  if (++BitcastsVisited > MaxNumUsesTraversed || !BC)
1068  return false;
1069  Addr = BC->getOperand(0);
1070  }
1071  // If we've ended up at a global/constant, bail. We shouldn't be looking at
1072  // uselists for non-local Values in a loop pass.
1073  if (isa<Constant>(Addr))
1074  return false;
1075 
1076  unsigned UsesVisited = 0;
1077  // Traverse all uses of the load operand value, to see if invariant.start is
1078  // one of the uses, and whether it dominates the load instruction.
1079  for (auto *U : Addr->users()) {
1080  // Avoid traversing for Load operand with high number of users.
1081  if (++UsesVisited > MaxNumUsesTraversed)
1082  return false;
1083  IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
1084  // If there are escaping uses of invariant.start instruction, the load maybe
1085  // non-invariant.
1086  if (!II || II->getIntrinsicID() != Intrinsic::invariant_start ||
1087  !II->use_empty())
1088  continue;
1089  ConstantInt *InvariantSize = cast<ConstantInt>(II->getArgOperand(0));
1090  // The intrinsic supports having a -1 argument for variable sized objects
1091  // so we should check for that here.
1092  if (InvariantSize->isNegative())
1093  continue;
1094  uint64_t InvariantSizeInBits = InvariantSize->getSExtValue() * 8;
1095  // Confirm the invariant.start location size contains the load operand size
1096  // in bits. Also, the invariant.start should dominate the load, and we
1097  // should not hoist the load out of a loop that contains this dominating
1098  // invariant.start.
1099  if (LocSizeInBits.getFixedSize() <= InvariantSizeInBits &&
1100  DT->properlyDominates(II->getParent(), CurLoop->getHeader()))
1101  return true;
1102  }
1103 
1104  return false;
1105 }
1106 
1107 namespace {
1108 /// Return true if-and-only-if we know how to (mechanically) both hoist and
1109 /// sink a given instruction out of a loop. Does not address legality
1110 /// concerns such as aliasing or speculation safety.
1111 bool isHoistableAndSinkableInst(Instruction &I) {
1112  // Only these instructions are hoistable/sinkable.
1113  return (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<CallInst>(I) ||
1114  isa<FenceInst>(I) || isa<CastInst>(I) || isa<UnaryOperator>(I) ||
1115  isa<BinaryOperator>(I) || isa<SelectInst>(I) ||
1116  isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
1117  isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
1118  isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
1119  isa<InsertValueInst>(I) || isa<FreezeInst>(I));
1120 }
1121 /// Return true if all of the alias sets within this AST are known not to
1122 /// contain a Mod, or if MSSA knows there are no MemoryDefs in the loop.
1123 bool isReadOnly(AliasSetTracker *CurAST, const MemorySSAUpdater *MSSAU,
1124  const Loop *L) {
1125  if (CurAST) {
1126  for (AliasSet &AS : *CurAST) {
1127  if (!AS.isForwardingAliasSet() && AS.isMod()) {
1128  return false;
1129  }
1130  }
1131  return true;
1132  } else { /*MSSAU*/
1133  for (auto *BB : L->getBlocks())
1134  if (MSSAU->getMemorySSA()->getBlockDefs(BB))
1135  return false;
1136  return true;
1137  }
1138 }
1139 
1140 /// Return true if I is the only Instruction with a MemoryAccess in L.
1141 bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
1142  const MemorySSAUpdater *MSSAU) {
1143  for (auto *BB : L->getBlocks())
1144  if (auto *Accs = MSSAU->getMemorySSA()->getBlockAccesses(BB)) {
1145  int NotAPhi = 0;
1146  for (const auto &Acc : *Accs) {
1147  if (isa<MemoryPhi>(&Acc))
1148  continue;
1149  const auto *MUD = cast<MemoryUseOrDef>(&Acc);
1150  if (MUD->getMemoryInst() != I || NotAPhi++ == 1)
1151  return false;
1152  }
1153  }
1154  return true;
1155 }
1156 }
1157 
1159  Loop *CurLoop, AliasSetTracker *CurAST,
1160  MemorySSAUpdater *MSSAU,
1161  bool TargetExecutesOncePerLoop,
1162  SinkAndHoistLICMFlags *Flags,
1164  assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
1165  "Either AliasSetTracker or MemorySSA should be initialized.");
1166 
1167  // If we don't understand the instruction, bail early.
1168  if (!isHoistableAndSinkableInst(I))
1169  return false;
1170 
1171  MemorySSA *MSSA = MSSAU ? MSSAU->getMemorySSA() : nullptr;
1172  if (MSSA)
1173  assert(Flags != nullptr && "Flags cannot be null.");
1174 
1175  // Loads have extra constraints we have to verify before we can hoist them.
1176  if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
1177  if (!LI->isUnordered())
1178  return false; // Don't sink/hoist volatile or ordered atomic loads!
1179 
1180  // Loads from constant memory are always safe to move, even if they end up
1181  // in the same alias set as something that ends up being modified.
1182  if (AA->pointsToConstantMemory(LI->getOperand(0)))
1183  return true;
1184  if (LI->hasMetadata(LLVMContext::MD_invariant_load))
1185  return true;
1186 
1187  if (LI->isAtomic() && !TargetExecutesOncePerLoop)
1188  return false; // Don't risk duplicating unordered loads
1189 
1190  // This checks for an invariant.start dominating the load.
1191  if (isLoadInvariantInLoop(LI, DT, CurLoop))
1192  return true;
1193 
1194  bool Invalidated;
1195  if (CurAST)
1196  Invalidated = pointerInvalidatedByLoop(MemoryLocation::get(LI), CurAST,
1197  CurLoop, AA);
1198  else
1199  Invalidated = pointerInvalidatedByLoopWithMSSA(
1200  MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop, I, *Flags);
1201  // Check loop-invariant address because this may also be a sinkable load
1202  // whose address is not necessarily loop-invariant.
1203  if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand()))
1204  ORE->emit([&]() {
1205  return OptimizationRemarkMissed(
1206  DEBUG_TYPE, "LoadWithLoopInvariantAddressInvalidated", LI)
1207  << "failed to move load with loop-invariant address "
1208  "because the loop may invalidate its value";
1209  });
1210 
1211  return !Invalidated;
1212  } else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
1213  // Don't sink or hoist dbg info; it's legal, but not useful.
1214  if (isa<DbgInfoIntrinsic>(I))
1215  return false;
1216 
1217  // Don't sink calls which can throw.
1218  if (CI->mayThrow())
1219  return false;
1220 
1221  // Convergent attribute has been used on operations that involve
1222  // inter-thread communication which results are implicitly affected by the
1223  // enclosing control flows. It is not safe to hoist or sink such operations
1224  // across control flow.
1225  if (CI->isConvergent())
1226  return false;
1227 
1228  using namespace PatternMatch;
1229  if (match(CI, m_Intrinsic<Intrinsic::assume>()))
1230  // Assumes don't actually alias anything or throw
1231  return true;
1232 
1233  if (match(CI, m_Intrinsic<Intrinsic::experimental_widenable_condition>()))
1234  // Widenable conditions don't actually alias anything or throw
1235  return true;
1236 
1237  // Handle simple cases by querying alias analysis.
1238  FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI);
1239  if (Behavior == FMRB_DoesNotAccessMemory)
1240  return true;
1241  if (AAResults::onlyReadsMemory(Behavior)) {
1242  // A readonly argmemonly function only reads from memory pointed to by
1243  // it's arguments with arbitrary offsets. If we can prove there are no
1244  // writes to this memory in the loop, we can hoist or sink.
1245  if (AAResults::onlyAccessesArgPointees(Behavior)) {
1246  // TODO: expand to writeable arguments
1247  for (Value *Op : CI->args())
1248  if (Op->getType()->isPointerTy()) {
1249  bool Invalidated;
1250  if (CurAST)
1251  Invalidated = pointerInvalidatedByLoop(
1252  MemoryLocation::getBeforeOrAfter(Op), CurAST, CurLoop, AA);
1253  else
1254  Invalidated = pointerInvalidatedByLoopWithMSSA(
1255  MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop, I,
1256  *Flags);
1257  if (Invalidated)
1258  return false;
1259  }
1260  return true;
1261  }
1262 
1263  // If this call only reads from memory and there are no writes to memory
1264  // in the loop, we can hoist or sink the call as appropriate.
1265  if (isReadOnly(CurAST, MSSAU, CurLoop))
1266  return true;
1267  }
1268 
1269  // FIXME: This should use mod/ref information to see if we can hoist or
1270  // sink the call.
1271 
1272  return false;
1273  } else if (auto *FI = dyn_cast<FenceInst>(&I)) {
1274  // Fences alias (most) everything to provide ordering. For the moment,
1275  // just give up if there are any other memory operations in the loop.
1276  if (CurAST) {
1277  auto Begin = CurAST->begin();
1278  assert(Begin != CurAST->end() && "must contain FI");
1279  if (std::next(Begin) != CurAST->end())
1280  // constant memory for instance, TODO: handle better
1281  return false;
1282  auto *UniqueI = Begin->getUniqueInstruction();
1283  if (!UniqueI)
1284  // other memory op, give up
1285  return false;
1286  (void)FI; // suppress unused variable warning
1287  assert(UniqueI == FI && "AS must contain FI");
1288  return true;
1289  } else // MSSAU
1290  return isOnlyMemoryAccess(FI, CurLoop, MSSAU);
1291  } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
1292  if (!SI->isUnordered())
1293  return false; // Don't sink/hoist volatile or ordered atomic store!
1294 
1295  // We can only hoist a store that we can prove writes a value which is not
1296  // read or overwritten within the loop. For those cases, we fallback to
1297  // load store promotion instead. TODO: We can extend this to cases where
1298  // there is exactly one write to the location and that write dominates an
1299  // arbitrary number of reads in the loop.
1300  if (CurAST) {
1301  auto &AS = CurAST->getAliasSetFor(MemoryLocation::get(SI));
1302 
1303  if (AS.isRef() || !AS.isMustAlias())
1304  // Quick exit test, handled by the full path below as well.
1305  return false;
1306  auto *UniqueI = AS.getUniqueInstruction();
1307  if (!UniqueI)
1308  // other memory op, give up
1309  return false;
1310  assert(UniqueI == SI && "AS must contain SI");
1311  return true;
1312  } else { // MSSAU
1313  if (isOnlyMemoryAccess(SI, CurLoop, MSSAU))
1314  return true;
1315  // If there are more accesses than the Promotion cap or no "quota" to
1316  // check clobber, then give up as we're not walking a list that long.
1317  if (Flags->tooManyMemoryAccesses() || Flags->tooManyClobberingCalls())
1318  return false;
1319  // If there are interfering Uses (i.e. their defining access is in the
1320  // loop), or ordered loads (stored as Defs!), don't move this store.
1321  // Could do better here, but this is conservatively correct.
1322  // TODO: Cache set of Uses on the first walk in runOnLoop, update when
1323  // moving accesses. Can also extend to dominating uses.
1324  auto *SIMD = MSSA->getMemoryAccess(SI);
1325  for (auto *BB : CurLoop->getBlocks())
1326  if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
1327  for (const auto &MA : *Accesses)
1328  if (const auto *MU = dyn_cast<MemoryUse>(&MA)) {
1329  auto *MD = MU->getDefiningAccess();
1330  if (!MSSA->isLiveOnEntryDef(MD) &&
1331  CurLoop->contains(MD->getBlock()))
1332  return false;
1333  // Disable hoisting past potentially interfering loads. Optimized
1334  // Uses may point to an access outside the loop, as getClobbering
1335  // checks the previous iteration when walking the backedge.
1336  // FIXME: More precise: no Uses that alias SI.
1337  if (!Flags->getIsSink() && !MSSA->dominates(SIMD, MU))
1338  return false;
1339  } else if (const auto *MD = dyn_cast<MemoryDef>(&MA)) {
1340  if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) {
1341  (void)LI; // Silence warning.
1342  assert(!LI->isUnordered() && "Expected unordered load");
1343  return false;
1344  }
1345  // Any call, while it may not be clobbering SI, it may be a use.
1346  if (auto *CI = dyn_cast<CallInst>(MD->getMemoryInst())) {
1347  // Check if the call may read from the memory location written
1348  // to by SI. Check CI's attributes and arguments; the number of
1349  // such checks performed is limited above by NoOfMemAccTooLarge.
1351  if (isModOrRefSet(MRI))
1352  return false;
1353  }
1354  }
1355  }
1357  Flags->incrementClobberingCalls();
1358  // If there are no clobbering Defs in the loop, store is safe to hoist.
1359  return MSSA->isLiveOnEntryDef(Source) ||
1360  !CurLoop->contains(Source->getBlock());
1361  }
1362  }
1363 
1364  assert(!I.mayReadOrWriteMemory() && "unhandled aliasing");
1365 
1366  // We've established mechanical ability and aliasing, it's up to the caller
1367  // to check fault safety
1368  return true;
1369 }
1370 
1371 /// Returns true if a PHINode is a trivially replaceable with an
1372 /// Instruction.
1373 /// This is true when all incoming values are that instruction.
1374 /// This pattern occurs most often with LCSSA PHI nodes.
1375 ///
1376 static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I) {
1377  for (const Value *IncValue : PN.incoming_values())
1378  if (IncValue != &I)
1379  return false;
1380 
1381  return true;
1382 }
1383 
1384 /// Return true if the instruction is free in the loop.
1385 static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop,
1386  const TargetTransformInfo *TTI) {
1387 
1388  if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) {
1389  if (TTI->getUserCost(GEP, TargetTransformInfo::TCK_SizeAndLatency) !=
1390  TargetTransformInfo::TCC_Free)
1391  return false;
1392  // For a GEP, we cannot simply use getUserCost because currently it
1393  // optimistically assume that a GEP will fold into addressing mode
1394  // regardless of its users.
1395  const BasicBlock *BB = GEP->getParent();
1396  for (const User *U : GEP->users()) {
1397  const Instruction *UI = cast<Instruction>(U);
1398  if (CurLoop->contains(UI) &&
1399  (BB != UI->getParent() ||
1400  (!isa<StoreInst>(UI) && !isa<LoadInst>(UI))))
1401  return false;
1402  }
1403  return true;
1404  } else
1405  return TTI->getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency) ==
1406  TargetTransformInfo::TCC_Free;
1407 }
1408 
1409 /// Return true if the only users of this instruction are outside of
1410 /// the loop. If this is true, we can sink the instruction to the exit
1411 /// blocks of the loop.
1412 ///
1413 /// We also return true if the instruction could be folded away in lowering.
1414 /// (e.g., a GEP can be folded into a load as an addressing mode in the loop).
1415 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
1416  const LoopSafetyInfo *SafetyInfo,
1417  TargetTransformInfo *TTI, bool &FreeInLoop,
1418  bool LoopNestMode) {
1419  const auto &BlockColors = SafetyInfo->getBlockColors();
1420  bool IsFree = isFreeInLoop(I, CurLoop, TTI);
1421  for (const User *U : I.users()) {
1422  const Instruction *UI = cast<Instruction>(U);
1423  if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
1424  const BasicBlock *BB = PN->getParent();
1425  // We cannot sink uses in catchswitches.
1426  if (isa<CatchSwitchInst>(BB->getTerminator()))
1427  return false;
1428 
1429  // We need to sink a callsite to a unique funclet. Avoid sinking if the
1430  // phi use is too muddled.
1431  if (isa<CallInst>(I))
1432  if (!BlockColors.empty() &&
1433  BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1)
1434  return false;
1435 
1436  if (LoopNestMode) {
1437  while (isa<PHINode>(UI) && UI->hasOneUser() &&
1438  UI->getNumOperands() == 1) {
1439  if (!CurLoop->contains(UI))
1440  break;
1441  UI = cast<Instruction>(UI->user_back());
1442  }
1443  }
1444  }
1445 
1446  if (CurLoop->contains(UI)) {
1447  if (IsFree) {
1448  FreeInLoop = true;
1449  continue;
1450  }
1451  return false;
1452  }
1453  }
1454  return true;
1455 }
1456 
1458  Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
1459  const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU) {
1460  Instruction *New;
1461  if (auto *CI = dyn_cast<CallInst>(&I)) {
1462  const auto &BlockColors = SafetyInfo->getBlockColors();
1463 
1464  // Sinking call-sites need to be handled differently from other
1465  // instructions. The cloned call-site needs a funclet bundle operand
1466  // appropriate for its location in the CFG.
1468  for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles();
1469  BundleIdx != BundleEnd; ++BundleIdx) {
1470  OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx);
1471  if (Bundle.getTagID() == LLVMContext::OB_funclet)
1472  continue;
1473 
1474  OpBundles.emplace_back(Bundle);
1475  }
1476 
1477  if (!BlockColors.empty()) {
1478  const ColorVector &CV = BlockColors.find(&ExitBlock)->second;
1479  assert(CV.size() == 1 && "non-unique color for exit block!");
1480  BasicBlock *BBColor = CV.front();
1481  Instruction *EHPad = BBColor->getFirstNonPHI();
1482  if (EHPad->isEHPad())
1483  OpBundles.emplace_back("funclet", EHPad);
1484  }
1485 
1486  New = CallInst::Create(CI, OpBundles);
1487  } else {
1488  New = I.clone();
1489  }
1490 
1491  ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New);
1492  if (!I.getName().empty())
1493  New->setName(I.getName() + ".le");
1494 
1495  if (MSSAU && MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
1496  // Create a new MemoryAccess and let MemorySSA set its defining access.
1497  MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
1498  New, nullptr, New->getParent(), MemorySSA::Beginning);
1499  if (NewMemAcc) {
1500  if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc))
1501  MSSAU->insertDef(MemDef, /*RenameUses=*/true);
1502  else {
1503  auto *MemUse = cast<MemoryUse>(NewMemAcc);
1504  MSSAU->insertUse(MemUse, /*RenameUses=*/true);
1505  }
1506  }
1507  }
1508 
1509  // Build LCSSA PHI nodes for any in-loop operands (if legal). Note that
1510  // this is particularly cheap because we can rip off the PHI node that we're
1511  // replacing for the number and blocks of the predecessors.
1512  // OPT: If this shows up in a profile, we can instead finish sinking all
1513  // invariant instructions, and then walk their operands to re-establish
1514  // LCSSA. That will eliminate creating PHI nodes just to nuke them when
1515  // sinking bottom-up.
1516  for (Use &Op : New->operands())
1517  if (LI->wouldBeOutOfLoopUseRequiringLCSSA(Op.get(), PN.getParent())) {
1518  auto *OInst = cast<Instruction>(Op.get());
1519  PHINode *OpPN =
1520  PHINode::Create(OInst->getType(), PN.getNumIncomingValues(),
1521  OInst->getName() + ".lcssa", &ExitBlock.front());
1522  for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
1523  OpPN->addIncoming(OInst, PN.getIncomingBlock(i));
1524  Op = OpPN;
1525  }
1526  return New;
1527 }
1528 
1530  MemorySSAUpdater *MSSAU) {
1531  if (MSSAU)
1532  MSSAU->removeMemoryAccess(&I);
1533  SafetyInfo.removeInstruction(&I);
1534  I.eraseFromParent();
1535 }
1536 
1538  ICFLoopSafetyInfo &SafetyInfo,
1539  MemorySSAUpdater *MSSAU,
1540  ScalarEvolution *SE) {
1541  SafetyInfo.removeInstruction(&I);
1542  SafetyInfo.insertInstructionTo(&I, Dest.getParent());
1543  I.moveBefore(&Dest);
1544  if (MSSAU)
1545  if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
1546  MSSAU->getMemorySSA()->getMemoryAccess(&I)))
1547  MSSAU->moveToPlace(OldMemAcc, Dest.getParent(),
1548  MemorySSA::BeforeTerminator);
1549  if (SE)
1550  SE->forgetValue(&I);
1551 }
1552 
1554  PHINode *TPN, Instruction *I, LoopInfo *LI,
1556  const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop,
1557  MemorySSAUpdater *MSSAU) {
1559  "Expect only trivially replaceable PHI");
1560  BasicBlock *ExitBlock = TPN->getParent();
1561  Instruction *New;
1562  auto It = SunkCopies.find(ExitBlock);
1563  if (It != SunkCopies.end())
1564  New = It->second;
1565  else
1566  New = SunkCopies[ExitBlock] = cloneInstructionInExitBlock(
1567  *I, *ExitBlock, *TPN, LI, SafetyInfo, MSSAU);
1568  return New;
1569 }
1570 
1571 static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) {
1572  BasicBlock *BB = PN->getParent();
1573  if (!BB->canSplitPredecessors())
1574  return false;
1575  // It's not impossible to split EHPad blocks, but if BlockColors already exist
1576  // it require updating BlockColors for all offspring blocks accordingly. By
1577  // skipping such corner case, we can make updating BlockColors after splitting
1578  // predecessor fairly simple.
1579  if (!SafetyInfo->getBlockColors().empty() && BB->getFirstNonPHI()->isEHPad())
1580  return false;
1581  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
1582  BasicBlock *BBPred = *PI;
1583  if (isa<IndirectBrInst>(BBPred->getTerminator()) ||
1584  isa<CallBrInst>(BBPred->getTerminator()))
1585  return false;
1586  }
1587  return true;
1588 }
1589 
1591  LoopInfo *LI, const Loop *CurLoop,
1592  LoopSafetyInfo *SafetyInfo,
1593  MemorySSAUpdater *MSSAU) {
1594 #ifndef NDEBUG
1595  SmallVector<BasicBlock *, 32> ExitBlocks;
1596  CurLoop->getUniqueExitBlocks(ExitBlocks);
1597  SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
1598  ExitBlocks.end());
1599 #endif
1600  BasicBlock *ExitBB = PN->getParent();
1601  assert(ExitBlockSet.count(ExitBB) && "Expect the PHI is in an exit block.");
1602 
1603  // Split predecessors of the loop exit to make instructions in the loop are
1604  // exposed to exit blocks through trivially replaceable PHIs while keeping the
1605  // loop in the canonical form where each predecessor of each exit block should
1606  // be contained within the loop. For example, this will convert the loop below
1607  // from
1608  //
1609  // LB1:
1610  // %v1 =
1611  // br %LE, %LB2
1612  // LB2:
1613  // %v2 =
1614  // br %LE, %LB1
1615  // LE:
1616  // %p = phi [%v1, %LB1], [%v2, %LB2] <-- non-trivially replaceable
1617  //
1618  // to
1619  //
1620  // LB1:
1621  // %v1 =
1622  // br %LE.split, %LB2
1623  // LB2:
1624  // %v2 =
1625  // br %LE.split2, %LB1
1626  // LE.split:
1627  // %p1 = phi [%v1, %LB1] <-- trivially replaceable
1628  // br %LE
1629  // LE.split2:
1630  // %p2 = phi [%v2, %LB2] <-- trivially replaceable
1631  // br %LE
1632  // LE:
1633  // %p = phi [%p1, %LE.split], [%p2, %LE.split2]
1634  //
1635  const auto &BlockColors = SafetyInfo->getBlockColors();
1636  SmallSetVector<BasicBlock *, 8> PredBBs(pred_begin(ExitBB), pred_end(ExitBB));
1637  while (!PredBBs.empty()) {
1638  BasicBlock *PredBB = *PredBBs.begin();
1639  assert(CurLoop->contains(PredBB) &&
1640  "Expect all predecessors are in the loop");
1641  if (PN->getBasicBlockIndex(PredBB) >= 0) {
1643  ExitBB, PredBB, ".split.loop.exit", DT, LI, MSSAU, true);
1644  // Since we do not allow splitting EH-block with BlockColors in
1645  // canSplitPredecessors(), we can simply assign predecessor's color to
1646  // the new block.
1647  if (!BlockColors.empty())
1648  // Grab a reference to the ColorVector to be inserted before getting the
1649  // reference to the vector we are copying because inserting the new
1650  // element in BlockColors might cause the map to be reallocated.
1651  SafetyInfo->copyColors(NewPred, PredBB);
1652  }
1653  PredBBs.remove(PredBB);
1654  }
1655 }
1656 
1657 /// When an instruction is found to only be used outside of the loop, this
1658 /// function moves it to the exit blocks and patches up SSA form as needed.
1659 /// This method is guaranteed to remove the original instruction from its
1660 /// position, and may either delete it or move it to outside of the loop.
1661 ///
1662 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
1663  BlockFrequencyInfo *BFI, const Loop *CurLoop,
1664  ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU,
1666  bool Changed = false;
1667  LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
1668 
1669  // Iterate over users to be ready for actual sinking. Replace users via
1670  // unreachable blocks with undef and make all user PHIs trivially replaceable.
1671  SmallPtrSet<Instruction *, 8> VisitedUsers;
1672  for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) {
1673  auto *User = cast<Instruction>(*UI);
1674  Use &U = UI.getUse();
1675  ++UI;
1676 
1677  if (VisitedUsers.count(User) || CurLoop->contains(User))
1678  continue;
1679 
1680  if (!DT->isReachableFromEntry(User->getParent())) {
1681  U = UndefValue::get(I.getType());
1682  Changed = true;
1683  continue;
1684  }
1685 
1686  // The user must be a PHI node.
1687  PHINode *PN = cast<PHINode>(User);
1688 
1689  // Surprisingly, instructions can be used outside of loops without any
1690  // exits. This can only happen in PHI nodes if the incoming block is
1691  // unreachable.
1692  BasicBlock *BB = PN->getIncomingBlock(U);
1693  if (!DT->isReachableFromEntry(BB)) {
1694  U = UndefValue::get(I.getType());
1695  Changed = true;
1696  continue;
1697  }
1698 
1699  VisitedUsers.insert(PN);
1700  if (isTriviallyReplaceablePHI(*PN, I))
1701  continue;
1702 
1703  if (!canSplitPredecessors(PN, SafetyInfo))
1704  return Changed;
1705 
1706  // Split predecessors of the PHI so that we can make users trivially
1707  // replaceable.
1708  splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo, MSSAU);
1709 
1710  // Should rebuild the iterators, as they may be invalidated by
1711  // splitPredecessorsOfLoopExit().
1712  UI = I.user_begin();
1713  UE = I.user_end();
1714  }
1715 
1716  if (VisitedUsers.empty())
1717  return Changed;
1718 
1719  ORE->emit([&]() {
1720  return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
1721  << "sinking " << ore::NV("Inst", &I);
1722  });
1723  if (isa<LoadInst>(I))
1724  ++NumMovedLoads;
1725  else if (isa<CallInst>(I))
1726  ++NumMovedCalls;
1727  ++NumSunk;
1728 
1729 #ifndef NDEBUG
1730  SmallVector<BasicBlock *, 32> ExitBlocks;
1731  CurLoop->getUniqueExitBlocks(ExitBlocks);
1732  SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
1733  ExitBlocks.end());
1734 #endif
1735 
1736  // Clones of this instruction. Don't create more than one per exit block!
1738 
1739  // If this instruction is only used outside of the loop, then all users are
1740  // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
1741  // the instruction.
1742  // First check if I is worth sinking for all uses. Sink only when it is worth
1743  // across all uses.
1744  SmallSetVector<User*, 8> Users(I.user_begin(), I.user_end());
1745  SmallVector<PHINode *, 8> ExitPNs;
1746  for (auto *UI : Users) {
1747  auto *User = cast<Instruction>(UI);
1748 
1749  if (CurLoop->contains(User))
1750  continue;
1751 
1752  PHINode *PN = cast<PHINode>(User);
1753  assert(ExitBlockSet.count(PN->getParent()) &&
1754  "The LCSSA PHI is not in an exit block!");
1755  if (!worthSinkOrHoistInst(I, PN->getParent(), ORE, BFI)) {
1756  return Changed;
1757  }
1758 
1759  ExitPNs.push_back(PN);
1760  }
1761 
1762  for (auto *PN : ExitPNs) {
1763 
1764  // The PHI must be trivially replaceable.
1766  PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU);
1767  PN->replaceAllUsesWith(New);
1768  eraseInstruction(*PN, *SafetyInfo, nullptr);
1769  Changed = true;
1770  }
1771  return Changed;
1772 }
1773 
1774 /// When an instruction is found to only use loop invariant operands that
1775 /// is safe to hoist, this instruction is called to do the dirty work.
1776 ///
1777 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
1778  BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
1779  MemorySSAUpdater *MSSAU, ScalarEvolution *SE,
1781  LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getNameOrAsOperand() << ": "
1782  << I << "\n");
1783  ORE->emit([&]() {
1784  return OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) << "hoisting "
1785  << ore::NV("Inst", &I);
1786  });
1787 
1788  // Metadata can be dependent on conditions we are hoisting above.
1789  // Conservatively strip all metadata on the instruction unless we were
1790  // guaranteed to execute I if we entered the loop, in which case the metadata
1791  // is valid in the loop preheader.
1792  // Similarly, If I is a call and it is not guaranteed to execute in the loop,
1793  // then moving to the preheader means we should strip attributes on the call
1794  // that can cause UB since we may be hoisting above conditions that allowed
1795  // inferring those attributes. They may not be valid at the preheader.
1796  if ((I.hasMetadataOtherThanDebugLoc() || isa<CallInst>(I)) &&
1797  // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning
1798  // time in isGuaranteedToExecute if we don't actually have anything to
1799  // drop. It is a compile time optimization, not required for correctness.
1800  !SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop))
1801  I.dropUndefImplyingAttrsAndUnknownMetadata();
1802 
1803  if (isa<PHINode>(I))
1804  // Move the new node to the end of the phi list in the destination block.
1805  moveInstructionBefore(I, *Dest->getFirstNonPHI(), *SafetyInfo, MSSAU, SE);
1806  else
1807  // Move the new node to the destination block, before its terminator.
1808  moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo, MSSAU, SE);
1809 
1810  I.updateLocationAfterHoist();
1811 
1812  if (isa<LoadInst>(I))
1813  ++NumMovedLoads;
1814  else if (isa<CallInst>(I))
1815  ++NumMovedCalls;
1816  ++NumHoisted;
1817 }
1818 
1819 /// Only sink or hoist an instruction if it is not a trapping instruction,
1820 /// or if the instruction is known not to trap when moved to the preheader.
1821 /// or if it is a trapping instruction and is guaranteed to execute.
1823  const DominatorTree *DT,
1824  const TargetLibraryInfo *TLI,
1825  const Loop *CurLoop,
1826  const LoopSafetyInfo *SafetyInfo,
1828  const Instruction *CtxI) {
1829  if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT, TLI))
1830  return true;
1831 
1832  bool GuaranteedToExecute =
1833  SafetyInfo->isGuaranteedToExecute(Inst, DT, CurLoop);
1834 
1835  if (!GuaranteedToExecute) {
1836  auto *LI = dyn_cast<LoadInst>(&Inst);
1837  if (LI && CurLoop->isLoopInvariant(LI->getPointerOperand()))
1838  ORE->emit([&]() {
1839  return OptimizationRemarkMissed(
1840  DEBUG_TYPE, "LoadWithLoopInvariantAddressCondExecuted", LI)
1841  << "failed to hoist load with loop-invariant address "
1842  "because load is conditionally executed";
1843  });
1844  }
1845 
1846  return GuaranteedToExecute;
1847 }
1848 
1849 namespace {
1850 class LoopPromoter : public LoadAndStorePromoter {
1851  Value *SomePtr; // Designated pointer to store to.
1852  const SmallSetVector<Value *, 8> &PointerMustAliases;
1853  SmallVectorImpl<BasicBlock *> &LoopExitBlocks;
1854  SmallVectorImpl<Instruction *> &LoopInsertPts;
1855  SmallVectorImpl<MemoryAccess *> &MSSAInsertPts;
1856  PredIteratorCache &PredCache;
1857  MemorySSAUpdater *MSSAU;
1858  LoopInfo &LI;
1859  DebugLoc DL;
1860  Align Alignment;
1861  bool UnorderedAtomic;
1862  AAMDNodes AATags;
1863  ICFLoopSafetyInfo &SafetyInfo;
1864 
1865  // We're about to add a use of V in a loop exit block. Insert an LCSSA phi
1866  // (if legal) if doing so would add an out-of-loop use to an instruction
1867  // defined in-loop.
1868  Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
1870  return V;
1871 
1872  Instruction *I = cast<Instruction>(V);
1873  // We need to create an LCSSA PHI node for the incoming value and
1874  // store that.
1875  PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB),
1876  I->getName() + ".lcssa", &BB->front());
1877  for (BasicBlock *Pred : PredCache.get(BB))
1878  PN->addIncoming(I, Pred);
1879  return PN;
1880  }
1881 
1882 public:
1883  LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S,
1884  const SmallSetVector<Value *, 8> &PMA,
1888  MemorySSAUpdater *MSSAU, LoopInfo &li, DebugLoc dl,
1889  Align Alignment, bool UnorderedAtomic, const AAMDNodes &AATags,
1890  ICFLoopSafetyInfo &SafetyInfo)
1891  : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
1892  LoopExitBlocks(LEB), LoopInsertPts(LIP), MSSAInsertPts(MSSAIP),
1893  PredCache(PIC), MSSAU(MSSAU), LI(li), DL(std::move(dl)),
1894  Alignment(Alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags),
1895  SafetyInfo(SafetyInfo) {}
1896 
1897  bool isInstInList(Instruction *I,
1898  const SmallVectorImpl<Instruction *> &) const override {
1899  Value *Ptr;
1900  if (LoadInst *LI = dyn_cast<LoadInst>(I))
1901  Ptr = LI->getOperand(0);
1902  else
1903  Ptr = cast<StoreInst>(I)->getPointerOperand();
1904  return PointerMustAliases.count(Ptr);
1905  }
1906 
1907  void doExtraRewritesBeforeFinalDeletion() override {
1908  // Insert stores after in the loop exit blocks. Each exit block gets a
1909  // store of the live-out values that feed them. Since we've already told
1910  // the SSA updater about the defs in the loop and the preheader
1911  // definition, it is all set and we can start using it.
1912  for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
1913  BasicBlock *ExitBlock = LoopExitBlocks[i];
1914  Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
1915  LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock);
1916  Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
1917  Instruction *InsertPos = LoopInsertPts[i];
1918  StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
1919  if (UnorderedAtomic)
1921  NewSI->setAlignment(Alignment);
1922  NewSI->setDebugLoc(DL);
1923  if (AATags)
1924  NewSI->setAAMetadata(AATags);
1925 
1926  MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i];
1927  MemoryAccess *NewMemAcc;
1928  if (!MSSAInsertPoint) {
1929  NewMemAcc = MSSAU->createMemoryAccessInBB(
1930  NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning);
1931  } else {
1932  NewMemAcc =
1933  MSSAU->createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint);
1934  }
1935  MSSAInsertPts[i] = NewMemAcc;
1936  MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
1937  // FIXME: true for safety, false may still be correct.
1938  }
1939  }
1940 
1941  void instructionDeleted(Instruction *I) const override {
1942  SafetyInfo.removeInstruction(I);
1943  MSSAU->removeMemoryAccess(I);
1944  }
1945 };
1946 
1947 bool isNotCapturedBeforeOrInLoop(const Value *V, const Loop *L,
1948  DominatorTree *DT) {
1949  // We can perform the captured-before check against any instruction in the
1950  // loop header, as the loop header is reachable from any instruction inside
1951  // the loop.
1952  // TODO: ReturnCaptures=true shouldn't be necessary here.
1953  return !PointerMayBeCapturedBefore(V, /* ReturnCaptures */ true,
1954  /* StoreCaptures */ true,
1955  L->getHeader()->getTerminator(), DT);
1956 }
1957 
1958 /// Return true iff we can prove that a caller of this function can not inspect
1959 /// the contents of the provided object in a well defined program.
1960 bool isKnownNonEscaping(Value *Object, const Loop *L,
1961  const TargetLibraryInfo *TLI, DominatorTree *DT) {
1962  if (isa<AllocaInst>(Object))
1963  // Since the alloca goes out of scope, we know the caller can't retain a
1964  // reference to it and be well defined. Thus, we don't need to check for
1965  // capture.
1966  return true;
1967 
1968  // For all other objects we need to know that the caller can't possibly
1969  // have gotten a reference to the object. There are two components of
1970  // that:
1971  // 1) Object can't be escaped by this function. This is what
1972  // PointerMayBeCaptured checks.
1973  // 2) Object can't have been captured at definition site. For this, we
1974  // need to know the return value is noalias. At the moment, we use a
1975  // weaker condition and handle only AllocLikeFunctions (which are
1976  // known to be noalias). TODO
1977  return isAllocLikeFn(Object, TLI) &&
1978  isNotCapturedBeforeOrInLoop(Object, L, DT);
1979 }
1980 
1981 } // namespace
1982 
1983 /// Try to promote memory values to scalars by sinking stores out of the
1984 /// loop and moving loads to before the loop. We do this by looping over
1985 /// the stores in the loop, looking for stores to Must pointers which are
1986 /// loop invariant.
1987 ///
1989  const SmallSetVector<Value *, 8> &PointerMustAliases,
1990  SmallVectorImpl<BasicBlock *> &ExitBlocks,
1991  SmallVectorImpl<Instruction *> &InsertPts,
1993  LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
1994  Loop *CurLoop, MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo,
1996  // Verify inputs.
1997  assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
1998  SafetyInfo != nullptr &&
1999  "Unexpected Input to promoteLoopAccessesToScalars");
2000 
2001  Value *SomePtr = *PointerMustAliases.begin();
2002  BasicBlock *Preheader = CurLoop->getLoopPreheader();
2003 
2004  // It is not safe to promote a load/store from the loop if the load/store is
2005  // conditional. For example, turning:
2006  //
2007  // for () { if (c) *P += 1; }
2008  //
2009  // into:
2010  //
2011  // tmp = *P; for () { if (c) tmp +=1; } *P = tmp;
2012  //
2013  // is not safe, because *P may only be valid to access if 'c' is true.
2014  //
2015  // The safety property divides into two parts:
2016  // p1) The memory may not be dereferenceable on entry to the loop. In this
2017  // case, we can't insert the required load in the preheader.
2018  // p2) The memory model does not allow us to insert a store along any dynamic
2019  // path which did not originally have one.
2020  //
2021  // If at least one store is guaranteed to execute, both properties are
2022  // satisfied, and promotion is legal.
2023  //
2024  // This, however, is not a necessary condition. Even if no store/load is
2025  // guaranteed to execute, we can still establish these properties.
2026  // We can establish (p1) by proving that hoisting the load into the preheader
2027  // is safe (i.e. proving dereferenceability on all paths through the loop). We
2028  // can use any access within the alias set to prove dereferenceability,
2029  // since they're all must alias.
2030  //
2031  // There are two ways establish (p2):
2032  // a) Prove the location is thread-local. In this case the memory model
2033  // requirement does not apply, and stores are safe to insert.
2034  // b) Prove a store dominates every exit block. In this case, if an exit
2035  // blocks is reached, the original dynamic path would have taken us through
2036  // the store, so inserting a store into the exit block is safe. Note that this
2037  // is different from the store being guaranteed to execute. For instance,
2038  // if an exception is thrown on the first iteration of the loop, the original
2039  // store is never executed, but the exit blocks are not executed either.
2040 
2041  bool DereferenceableInPH = false;
2042  bool SafeToInsertStore = false;
2043 
2045 
2046  // We start with an alignment of one and try to find instructions that allow
2047  // us to prove better alignment.
2048  Align Alignment;
2049  // Keep track of which types of access we see
2050  bool SawUnorderedAtomic = false;
2051  bool SawNotAtomic = false;
2052  AAMDNodes AATags;
2053 
2054  const DataLayout &MDL = Preheader->getModule()->getDataLayout();
2055 
2056  bool IsKnownThreadLocalObject = false;
2057  if (SafetyInfo->anyBlockMayThrow()) {
2058  // If a loop can throw, we have to insert a store along each unwind edge.
2059  // That said, we can't actually make the unwind edge explicit. Therefore,
2060  // we have to prove that the store is dead along the unwind edge. We do
2061  // this by proving that the caller can't have a reference to the object
2062  // after return and thus can't possibly load from the object.
2063  Value *Object = getUnderlyingObject(SomePtr);
2064  if (!isKnownNonEscaping(Object, CurLoop, TLI, DT))
2065  return false;
2066  // Subtlety: Alloca's aren't visible to callers, but *are* potentially
2067  // visible to other threads if captured and used during their lifetimes.
2068  IsKnownThreadLocalObject = !isa<AllocaInst>(Object);
2069  }
2070 
2071  // Check that all of the pointers in the alias set have the same type. We
2072  // cannot (yet) promote a memory location that is loaded and stored in
2073  // different sizes. While we are at it, collect alignment and AA info.
2074  for (Value *ASIV : PointerMustAliases) {
2075  // Check that all of the pointers in the alias set have the same type. We
2076  // cannot (yet) promote a memory location that is loaded and stored in
2077  // different sizes.
2078  if (SomePtr->getType() != ASIV->getType())
2079  return false;
2080 
2081  for (User *U : ASIV->users()) {
2082  // Ignore instructions that are outside the loop.
2083  Instruction *UI = dyn_cast<Instruction>(U);
2084  if (!UI || !CurLoop->contains(UI))
2085  continue;
2086 
2087  // If there is an non-load/store instruction in the loop, we can't promote
2088  // it.
2089  if (LoadInst *Load = dyn_cast<LoadInst>(UI)) {
2090  if (!Load->isUnordered())
2091  return false;
2092 
2093  SawUnorderedAtomic |= Load->isAtomic();
2094  SawNotAtomic |= !Load->isAtomic();
2095 
2096  Align InstAlignment = Load->getAlign();
2097 
2098  // Note that proving a load safe to speculate requires proving
2099  // sufficient alignment at the target location. Proving it guaranteed
2100  // to execute does as well. Thus we can increase our guaranteed
2101  // alignment as well.
2102  if (!DereferenceableInPH || (InstAlignment > Alignment))
2103  if (isSafeToExecuteUnconditionally(*Load, DT, TLI, CurLoop,
2104  SafetyInfo, ORE,
2105  Preheader->getTerminator())) {
2106  DereferenceableInPH = true;
2107  Alignment = std::max(Alignment, InstAlignment);
2108  }
2109  } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) {
2110  // Stores *of* the pointer are not interesting, only stores *to* the
2111  // pointer.
2112  if (UI->getOperand(1) != ASIV)
2113  continue;
2114  if (!Store->isUnordered())
2115  return false;
2116 
2117  SawUnorderedAtomic |= Store->isAtomic();
2118  SawNotAtomic |= !Store->isAtomic();
2119 
2120  // If the store is guaranteed to execute, both properties are satisfied.
2121  // We may want to check if a store is guaranteed to execute even if we
2122  // already know that promotion is safe, since it may have higher
2123  // alignment than any other guaranteed stores, in which case we can
2124  // raise the alignment on the promoted store.
2125  Align InstAlignment = Store->getAlign();
2126 
2127  if (!DereferenceableInPH || !SafeToInsertStore ||
2128  (InstAlignment > Alignment)) {
2129  if (SafetyInfo->isGuaranteedToExecute(*UI, DT, CurLoop)) {
2130  DereferenceableInPH = true;
2131  SafeToInsertStore = true;
2132  Alignment = std::max(Alignment, InstAlignment);
2133  }
2134  }
2135 
2136  // If a store dominates all exit blocks, it is safe to sink.
2137  // As explained above, if an exit block was executed, a dominating
2138  // store must have been executed at least once, so we are not
2139  // introducing stores on paths that did not have them.
2140  // Note that this only looks at explicit exit blocks. If we ever
2141  // start sinking stores into unwind edges (see above), this will break.
2142  if (!SafeToInsertStore)
2143  SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) {
2144  return DT->dominates(Store->getParent(), Exit);
2145  });
2146 
2147  // If the store is not guaranteed to execute, we may still get
2148  // deref info through it.
2149  if (!DereferenceableInPH) {
2150  DereferenceableInPH = isDereferenceableAndAlignedPointer(
2151  Store->getPointerOperand(), Store->getValueOperand()->getType(),
2152  Store->getAlign(), MDL, Preheader->getTerminator(), DT, TLI);
2153  }
2154  } else
2155  return false; // Not a load or store.
2156 
2157  // Merge the AA tags.
2158  if (LoopUses.empty()) {
2159  // On the first load/store, just take its AA tags.
2160  AATags = UI->getAAMetadata();
2161  } else if (AATags) {
2162  AATags = AATags.merge(UI->getAAMetadata());
2163  }
2164 
2165  LoopUses.push_back(UI);
2166  }
2167  }
2168 
2169  // If we found both an unordered atomic instruction and a non-atomic memory
2170  // access, bail. We can't blindly promote non-atomic to atomic since we
2171  // might not be able to lower the result. We can't downgrade since that
2172  // would violate memory model. Also, align 0 is an error for atomics.
2173  if (SawUnorderedAtomic && SawNotAtomic)
2174  return false;
2175 
2176  // If we're inserting an atomic load in the preheader, we must be able to
2177  // lower it. We're only guaranteed to be able to lower naturally aligned
2178  // atomics.
2179  auto *SomePtrElemType = SomePtr->getType()->getPointerElementType();
2180  if (SawUnorderedAtomic &&
2181  Alignment < MDL.getTypeStoreSize(SomePtrElemType))
2182  return false;
2183 
2184  // If we couldn't prove we can hoist the load, bail.
2185  if (!DereferenceableInPH)
2186  return false;
2187 
2188  // We know we can hoist the load, but don't have a guaranteed store.
2189  // Check whether the location is thread-local. If it is, then we can insert
2190  // stores along paths which originally didn't have them without violating the
2191  // memory model.
2192  if (!SafeToInsertStore) {
2193  if (IsKnownThreadLocalObject)
2194  SafeToInsertStore = true;
2195  else {
2196  Value *Object = getUnderlyingObject(SomePtr);
2197  SafeToInsertStore =
2198  (isAllocLikeFn(Object, TLI) || isa<AllocaInst>(Object)) &&
2199  isNotCapturedBeforeOrInLoop(Object, CurLoop, DT);
2200  }
2201  }
2202 
2203  // If we've still failed to prove we can sink the store, give up.
2204  if (!SafeToInsertStore)
2205  return false;
2206 
2207  // Otherwise, this is safe to promote, lets do it!
2208  LLVM_DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr
2209  << '\n');
2210  ORE->emit([&]() {
2211  return OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar",
2212  LoopUses[0])
2213  << "Moving accesses to memory location out of the loop";
2214  });
2215  ++NumPromoted;
2216 
2217  // Look at all the loop uses, and try to merge their locations.
2218  std::vector<const DILocation *> LoopUsesLocs;
2219  for (auto U : LoopUses)
2220  LoopUsesLocs.push_back(U->getDebugLoc().get());
2221  auto DL = DebugLoc(DILocation::getMergedLocations(LoopUsesLocs));
2222 
2223  // We use the SSAUpdater interface to insert phi nodes as required.
2225  SSAUpdater SSA(&NewPHIs);
2226  LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
2227  InsertPts, MSSAInsertPts, PIC, MSSAU, *LI, DL,
2228  Alignment, SawUnorderedAtomic, AATags, *SafetyInfo);
2229 
2230  // Set up the preheader to have a definition of the value. It is the live-out
2231  // value from the preheader that uses in the loop will use.
2232  LoadInst *PreheaderLoad = new LoadInst(
2233  SomePtr->getType()->getPointerElementType(), SomePtr,
2234  SomePtr->getName() + ".promoted", Preheader->getTerminator());
2235  if (SawUnorderedAtomic)
2236  PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
2237  PreheaderLoad->setAlignment(Alignment);
2238  PreheaderLoad->setDebugLoc(DebugLoc());
2239  if (AATags)
2240  PreheaderLoad->setAAMetadata(AATags);
2241  SSA.AddAvailableValue(Preheader, PreheaderLoad);
2242 
2243  MemoryAccess *PreheaderLoadMemoryAccess = MSSAU->createMemoryAccessInBB(
2244  PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End);
2245  MemoryUse *NewMemUse = cast<MemoryUse>(PreheaderLoadMemoryAccess);
2246  MSSAU->insertUse(NewMemUse, /*RenameUses=*/true);
2247 
2248  if (VerifyMemorySSA)
2249  MSSAU->getMemorySSA()->verifyMemorySSA();
2250  // Rewrite all the loads in the loop and remember all the definitions from
2251  // stores in the loop.
2252  Promoter.run(LoopUses);
2253 
2254  if (VerifyMemorySSA)
2255  MSSAU->getMemorySSA()->verifyMemorySSA();
2256  // If the SSAUpdater didn't use the load in the preheader, just zap it now.
2257  if (PreheaderLoad->use_empty())
2258  eraseInstruction(*PreheaderLoad, *SafetyInfo, MSSAU);
2259 
2260  return true;
2261 }
2262 
2263 static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,
2264  function_ref<void(Instruction *)> Fn) {
2265  for (const BasicBlock *BB : L->blocks())
2266  if (const auto *Accesses = MSSA->getBlockAccesses(BB))
2267  for (const auto &Access : *Accesses)
2268  if (const auto *MUD = dyn_cast<MemoryUseOrDef>(&Access))
2269  Fn(MUD->getMemoryInst());
2270 }
2271 
2274  AliasSetTracker AST(*AA);
2275 
2276  auto IsPotentiallyPromotable = [L](const Instruction *I) {
2277  if (const auto *SI = dyn_cast<StoreInst>(I))
2278  return L->isLoopInvariant(SI->getPointerOperand());
2279  if (const auto *LI = dyn_cast<LoadInst>(I))
2280  return L->isLoopInvariant(LI->getPointerOperand());
2281  return false;
2282  };
2283 
2284  // Populate AST with potentially promotable accesses and remove them from
2285  // MaybePromotable, so they will not be checked again on the next iteration.
2286  SmallPtrSet<Value *, 16> AttemptingPromotion;
2287  foreachMemoryAccess(MSSA, L, [&](Instruction *I) {
2288  if (IsPotentiallyPromotable(I)) {
2289  AttemptingPromotion.insert(I);
2290  AST.add(I);
2291  }
2292  });
2293 
2294  // We're only interested in must-alias sets that contain a mod.
2296  for (AliasSet &AS : AST)
2297  if (!AS.isForwardingAliasSet() && AS.isMod() && AS.isMustAlias())
2298  Sets.push_back(&AS);
2299 
2300  if (Sets.empty())
2301  return {}; // Nothing to promote...
2302 
2303  // Discard any sets for which there is an aliasing non-promotable access.
2304  foreachMemoryAccess(MSSA, L, [&](Instruction *I) {
2305  if (AttemptingPromotion.contains(I))
2306  return;
2307 
2308  llvm::erase_if(Sets, [&](const AliasSet *AS) {
2309  return AS->aliasesUnknownInst(I, *AA);
2310  });
2311  });
2312 
2314  for (const AliasSet *Set : Sets) {
2315  SmallSetVector<Value *, 8> PointerMustAliases;
2316  for (const auto &ASI : *Set)
2317  PointerMustAliases.insert(ASI.getValue());
2318  Result.push_back(std::move(PointerMustAliases));
2319  }
2320 
2321  return Result;
2322 }
2323 
2325  AliasSetTracker *CurAST, Loop *CurLoop,
2326  AAResults *AA) {
2327  return CurAST->getAliasSetFor(MemLoc).isMod();
2328 }
2329 
2331  Loop *CurLoop, Instruction &I,
2332  SinkAndHoistLICMFlags &Flags) {
2333  // For hoisting, use the walker to determine safety
2334  if (!Flags.getIsSink()) {
2336  // See declaration of SetLicmMssaOptCap for usage details.
2337  if (Flags.tooManyClobberingCalls())
2338  Source = MU->getDefiningAccess();
2339  else {
2341  Flags.incrementClobberingCalls();
2342  }
2343  return !MSSA->isLiveOnEntryDef(Source) &&
2344  CurLoop->contains(Source->getBlock());
2345  }
2346 
2347  // For sinking, we'd need to check all Defs below this use. The getClobbering
2348  // call will look on the backedge of the loop, but will check aliasing with
2349  // the instructions on the previous iteration.
2350  // For example:
2351  // for (i ... )
2352  // load a[i] ( Use (LoE)
2353  // store a[i] ( 1 = Def (2), with 2 = Phi for the loop.
2354  // i++;
2355  // The load sees no clobbering inside the loop, as the backedge alias check
2356  // does phi translation, and will check aliasing against store a[i-1].
2357  // However sinking the load outside the loop, below the store is incorrect.
2358 
2359  // For now, only sink if there are no Defs in the loop, and the existing ones
2360  // precede the use and are in the same block.
2361  // FIXME: Increase precision: Safe to sink if Use post dominates the Def;
2362  // needs PostDominatorTreeAnalysis.
2363  // FIXME: More precise: no Defs that alias this Use.
2364  if (Flags.tooManyMemoryAccesses())
2365  return true;
2366  for (auto *BB : CurLoop->getBlocks())
2367  if (pointerInvalidatedByBlockWithMSSA(*BB, *MSSA, *MU))
2368  return true;
2369  // When sinking, the source block may not be part of the loop so check it.
2370  if (!CurLoop->contains(&I))
2371  return pointerInvalidatedByBlockWithMSSA(*I.getParent(), *MSSA, *MU);
2372 
2373  return false;
2374 }
2375 
2377  MemoryUse &MU) {
2378  if (const auto *Accesses = MSSA.getBlockDefs(&BB))
2379  for (const auto &MA : *Accesses)
2380  if (const auto *MD = dyn_cast<MemoryDef>(&MA))
2381  if (MU.getBlock() != MD->getBlock() || !MSSA.locallyDominates(MD, &MU))
2382  return true;
2383  return false;
2384 }
2385 
2386 /// Little predicate that returns true if the specified basic block is in
2387 /// a subloop of the current one, not the current one itself.
2388 ///
2389 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) {
2390  assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
2391  return LI->getLoopFor(BB) != CurLoop;
2392 }
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::LoopSafetyInfo::isGuaranteedToExecute
virtual bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const =0
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
llvm::OptimizationRemarkMissed
Diagnostic information for missed-optimization remarks.
Definition: DiagnosticInfo.h:730
llvm::predecessors
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:127
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:64
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
llvm::MemorySSA::verifyMemorySSA
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1901
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
llvm::initializeLegacyLICMPassPass
void initializeLegacyLICMPassPass(PassRegistry &)
llvm::SmallPtrSetImpl::erase
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:378
llvm::AliasSetTracker::end
const_iterator end() const
Definition: AliasSetTracker.h:415
llvm::PHINode::incoming_values
op_range incoming_values()
Definition: Instructions.h:2719
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
Metadata.h
HoistSinkColdnessThreshold
static cl::opt< unsigned > HoistSinkColdnessThreshold("licm-coldness-threshold", cl::Hidden, cl::init(4), cl::desc("Relative coldness Threshold of hoisting/sinking destination " "block for LICM to be considered beneficial"))
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::isSafeToSpeculativelyExecute
bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
Definition: ValueTracking.cpp:4611
pointerInvalidatedByLoop
static bool pointerInvalidatedByLoop(MemoryLocation MemLoc, AliasSetTracker *CurAST, Loop *CurLoop, AAResults *AA)
Definition: LICM.cpp:2324
llvm::isModOrRefSet
LLVM_NODISCARD bool isModOrRefSet(const ModRefInfo MRI)
Definition: AliasAnalysis.h:190
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
IntrinsicInst.h
DebugInfoMetadata.h
llvm::LoadAndStorePromoter
Helper class for promoting a collection of loads and stores into SSA Form using the SSAUpdater.
Definition: SSAUpdater.h:136
Scalar.h
SetOperations.h
llvm::succ_end
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:102
llvm::ScalarEvolution::forgetLoopDispositions
void forgetLoopDispositions(const Loop *L)
Called when the client has changed the disposition of values in this loop.
Definition: ScalarEvolution.cpp:7608
llvm::hoistRegion
bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, BlockFrequencyInfo *, TargetLibraryInfo *, Loop *, MemorySSAUpdater *, ScalarEvolution *, ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, bool)
Walk the specified region of the CFG (defined by all blocks dominated by the specified block,...
Definition: LICM.cpp:856
llvm::TypeSize::getFixedSize
ScalarTy getFixedSize() const
Definition: TypeSize.h:426
Loads.h
MemorySSAUpdater.h
llvm::Function
Definition: Function.h:62
llvm::promoteLoopAccessesToScalars
bool promoteLoopAccessesToScalars(const SmallSetVector< Value *, 8 > &, SmallVectorImpl< BasicBlock * > &, SmallVectorImpl< Instruction * > &, SmallVectorImpl< MemoryAccess * > &, PredIteratorCache &, LoopInfo *, DominatorTree *, const TargetLibraryInfo *, Loop *, MemorySSAUpdater *, ICFLoopSafetyInfo *, OptimizationRemarkEmitter *)
Try to promote memory values to scalars by sinking stores out of the loop and moving loads to before ...
Definition: LICM.cpp:1988
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::PredIteratorCache
PredIteratorCache - This class is an extremely trivial cache for predecessor iterator queries.
Definition: PredIteratorCache.h:27
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:52
isDereferenceableAndAlignedPointer
static bool isDereferenceableAndAlignedPointer(const Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, const Instruction *CtxI, const DominatorTree *DT, const TargetLibraryInfo *TLI, SmallPtrSetImpl< const Value * > &Visited, unsigned MaxDepth)
Test if V is always a pointer to allocated and suitably aligned memory for a simple load or store.
Definition: Loads.cpp:44
llvm::PriorityWorklist< T, SmallVector< T, N >, SmallDenseMap< T, ptrdiff_t > >::insert
bool insert(const T &X)
Insert a new element into the PriorityWorklist.
Definition: PriorityWorklist.h:91
llvm::SetLicmMssaNoAccForPromotionCap
cl::opt< unsigned > SetLicmMssaNoAccForPromotionCap
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::LoopInfoBase::verify
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
Definition: LoopInfoImpl.h:690
Statistic.h
llvm::AliasSet::isMod
bool isMod() const
Definition: AliasSetTracker.h:205
CaptureTracking.h
llvm::DataLayout::getTypeStoreSize
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Definition: DataLayout.h:471
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::sinkRegionForLoopNest
bool sinkRegionForLoopNest(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, BlockFrequencyInfo *, TargetLibraryInfo *, TargetTransformInfo *, Loop *, MemorySSAUpdater *, ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *)
Call sinkRegion on loops contained within the specified loop in order from innermost to outermost.
Definition: LICM.cpp:576
LazyBlockFrequencyInfo.h
llvm::BasicBlock::replaceSuccessorsPhiUsesWith
void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
Definition: BasicBlock.cpp:461
llvm::erase_if
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:1732
isLoadInvariantInLoop
static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT, Loop *CurLoop)
Definition: LICM.cpp:1039
llvm::SmallDenseMap
Definition: DenseMap.h:880
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:460
llvm::MemorySSA::dominates
bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
Definition: MemorySSA.cpp:2154
llvm::SinkAndHoistLICMFlags::tooManyClobberingCalls
bool tooManyClobberingCalls()
Definition: LoopUtils.h:131
ValueTracking.h
Local.h
OptimizationRemarkEmitter.h
llvm::isAllocLikeFn
bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast=false)
Tests if a value is a call or invoke to a library function that allocates memory (either malloc,...
Definition: MemoryBuiltins.cpp:305
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::getLoopAnalysisUsage
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:149
GlobalsModRef.h
llvm::AliasSetTracker
Definition: AliasSetTracker.h:329
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
worthSinkOrHoistInst
static bool worthSinkOrHoistInst(Instruction &I, BasicBlock *DstBlock, OptimizationRemarkEmitter *ORE, BlockFrequencyInfo *BFI)
Definition: LICM.cpp:825
llvm::AAMDNodes
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:651
llvm::LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage
static void getLazyBFIAnalysisUsage(AnalysisUsage &AU)
Helper for client passes to set up the analysis usage on behalf of this pass.
Definition: LazyBlockFrequencyInfo.cpp:62
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
ScalarEvolution.h
MaxNumUsesTraversed
static cl::opt< uint32_t > MaxNumUsesTraversed("licm-max-num-uses-traversed", cl::Hidden, cl::init(8), cl::desc("Max num uses visited for identifying load " "invariance in loop using invariant start (default = 8)"))
MemoryBuiltins.h
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:333
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1268
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:52
llvm::BasicBlock::getSingleSuccessor
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:298
llvm::StoreInst::setAlignment
void setAlignment(Align Align)
Definition: Instructions.h:357
PIC
PassInstrumentationCallbacks PIC
Definition: PassBuilderBindings.cpp:55
llvm::DominatorTreeBase::getNode
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Definition: GenericDomTree.h:351
llvm::hasDisableLICMTransformsHint
bool hasDisableLICMTransformsHint(const Loop *L)
Look for the loop attribute that disables the LICM transformation heuristics.
Definition: LoopUtils.cpp:356
DEBUG_TYPE
#define DEBUG_TYPE
Definition: LICM.cpp:91
llvm::TinyPtrVector::front
EltTy front() const
Definition: TinyPtrVector.h:230
ControlFlowHoisting
static cl::opt< bool > ControlFlowHoisting("licm-control-flow-hoisting", cl::Hidden, cl::init(false), cl::desc("Enable control flow (and PHI) hoisting in LICM"))
llvm::Value::hasOneUser
bool hasOneUser() const
Return true if there is exactly one user of this value.
Definition: Value.cpp:157
llvm::formLCSSARecursively
bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition: LCSSA.cpp:403
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::SinkAndHoistLICMFlags::LicmMssaNoAccForPromotionCap
unsigned LicmMssaNoAccForPromotionCap
Definition: LoopUtils.h:138
llvm::DomTreeNodeBase::getIDom
DomTreeNodeBase * getIDom() const
Definition: GenericDomTree.h:89
llvm::MemoryUse
Represents read-only accesses to memory.
Definition: MemorySSA.h:320
llvm::SinkAndHoistLICMFlags::incrementClobberingCalls
void incrementClobberingCalls()
Definition: LoopUtils.h:132
llvm::LoopStandardAnalysisResults::DT
DominatorTree & DT
Definition: LoopAnalysisManager.h:55
llvm::Sched::Fast
@ Fast
Definition: TargetLowering.h:105
BasicAliasAnalysis.h
ConstantFolding.h
llvm::LinearPolySize::isScalable
bool isScalable() const
Returns whether the size is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:299
MustExecute.h
llvm::MemorySSAUpdater::createMemoryAccessInBB
MemoryAccess * createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, MemorySSA::InsertionPlace Point)
Create a MemoryAccess in MemorySSA at a specified point in a block, with a specified clobbering defin...
Definition: MemorySSAUpdater.cpp:1433
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::LazyBlockFrequencyInfoPass
This is an alternative analysis pass to BlockFrequencyInfoWrapperPass.
Definition: LazyBlockFrequencyInfo.h:100
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
AliasAnalysis.h
licm
licm
Definition: LICM.cpp:324
llvm::LoopBlocksRPO
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
Definition: LoopIterator.h:172
llvm::isGuard
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
Definition: GuardUtils.cpp:18
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:115
CommandLine.h
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::remove
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:157
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::LoopBase::getParentLoop
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Definition: LoopInfo.h:113
llvm::MemorySSAUpdater::insertUse
void insertUse(MemoryUse *Use, bool RenameUses=false)
Definition: MemorySSAUpdater.cpp:245
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1551
llvm::StoreInst::setOrdering
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this store instruction.
Definition: Instructions.h:368
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::MemorySSAWrapperPass
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:981
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::begin
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:82
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Constants.h
collectPromotionCandidates
static SmallVector< SmallSetVector< Value *, 8 >, 0 > collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L)
Definition: LICM.cpp:2273
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
inSubLoop
static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI)
Little predicate that returns true if the specified basic block is in a subloop of the current one,...
Definition: LICM.cpp:2389
llvm::AAResults
Definition: AliasAnalysis.h:508
llvm::LoopSafetyInfo
Captures loop safety information.
Definition: MustExecute.h:60
llvm::MemorySSA::isLiveOnEntryDef
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:742
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
foreachMemoryAccess
static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L, function_ref< void(Instruction *)> Fn)
Definition: LICM.cpp:2263
llvm::PredIteratorCache::size
size_t size(BasicBlock *BB) const
Definition: PredIteratorCache.h:65
llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags
SinkAndHoistLICMFlags(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap, bool IsSink, Loop *L=nullptr, MemorySSA *MSSA=nullptr)
Definition: LICM.cpp:338
llvm::User
Definition: User.h:44
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::SinkAndHoistLICMFlags
Flags controlling how much is checked when sinking or hoisting instructions.
Definition: LoopUtils.h:118
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::PriorityWorklist< T, SmallVector< T, N >, SmallDenseMap< T, ptrdiff_t > >::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: PriorityWorklist.h:154
llvm::AliasSet
Definition: AliasSetTracker.h:49
llvm::ReplaceInstWithInst
void ReplaceInstWithInst(BasicBlock::InstListType &BIL, BasicBlock::iterator &BI, Instruction *I)
Replace the instruction specified by BI with the instruction specified by I.
Definition: BasicBlockUtils.cpp:468
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
llvm::LoopSafetyInfo::getBlockColors
const DenseMap< BasicBlock *, ColorVector > & getBlockColors() const
Returns block colors map that is used to update funclet operand bundles.
Definition: MustExecute.cpp:35
TargetLibraryInfo.h
AssumeBundleBuilder.h
llvm::BasicBlock::getFirstInsertionPt
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:253
false
Definition: StackSlotColoring.cpp:142
llvm::MemorySSAUpdater::moveToPlace
void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, MemorySSA::InsertionPlace Where)
Definition: MemorySSAUpdater.cpp:1204
llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:171
llvm::Instruction
Definition: Instruction.h:45
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:287
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
cloneInstructionInExitBlock
static Instruction * cloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI, const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU)
Definition: LICM.cpp:1457
llvm::MemorySSA::getBlockAccesses
const AccessList * getBlockAccesses(const BasicBlock *BB) const
Return the list of MemoryAccess's for a given basic block.
Definition: MemorySSA.h:762
LoopUtils.h
llvm::getUnderlyingObject
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
Definition: ValueTracking.cpp:4378
llvm::LPPassManager
Definition: LoopPass.h:75
llvm::Instruction::getAAMetadata
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Definition: Metadata.cpp:1370
llvm::SplitBlockPredecessors
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
Definition: BasicBlockUtils.cpp:1148
llvm::BasicBlock::getModule
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:148
llvm::DominatorTreeBase::changeImmediateDominator
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
Definition: GenericDomTree.h:655
isSafeToExecuteUnconditionally
static bool isSafeToExecuteUnconditionally(Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE, const Instruction *CtxI=nullptr)
Only sink or hoist an instruction if it is not a trapping instruction, or if the instruction is known...
Definition: LICM.cpp:1822
PatternMatch.h
llvm::BasicBlock::getFirstNonPHI
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:216
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::empty
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition: Instructions.h:2725
llvm::MemoryAccess::getBlock
BasicBlock * getBlock() const
Definition: MemorySSA.h:159
LoopIterator.h
llvm::Value::use_empty
bool use_empty() const
Definition: Value.h:344
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::MemorySSA::locallyDominates
bool locallyDominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in the same basic block, determine whether MemoryAccess A dominates MemoryA...
Definition: MemorySSA.cpp:2123
llvm::OperandBundleUse::getTagID
uint32_t getTagID() const
Return the tag of this operand bundle as an integer.
Definition: InstrTypes.h:1084
CFG.h
LoopInfo.h
pointerInvalidatedByLoopWithMSSA
static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU, Loop *CurLoop, Instruction &I, SinkAndHoistLICMFlags &Flags)
Definition: LICM.cpp:2330
llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3149
llvm::sinkRegion
bool sinkRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, BlockFrequencyInfo *, TargetLibraryInfo *, TargetTransformInfo *, Loop *CurLoop, MemorySSAUpdater *, ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, Loop *OutermostLoop=nullptr)
Walk the specified region of the CFG (defined by all blocks dominated by the specified block,...
Definition: LICM.cpp:507
llvm::OutputFileType::Object
@ Object
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:168
isTriviallyReplaceablePHI
static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I)
Returns true if a PHINode is a trivially replaceable with an Instruction.
Definition: LICM.cpp:1376
llvm::MemorySSAWalker::getClobberingMemoryAccess
MemoryAccess * getClobberingMemoryAccess(const Instruction *I)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1041
llvm::getLoopPassPreservedAnalyses
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Definition: LoopAnalysisManager.cpp:140
llvm::cl::opt< bool >
SSA
Memory SSA
Definition: MemorySSA.cpp:73
llvm::LoadInst::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:273
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:304
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::AliasSetTracker::add
void add(Value *Ptr, LocationSize Size, const AAMDNodes &AAInfo)
These methods are used to add different types of instructions to the alias sets.
Definition: AliasSetTracker.cpp:397
llvm::Value::getNameOrAsOperand
std::string getNameOrAsOperand() const
Definition: Value.cpp:443
LICM
loop versioning Loop Versioning For LICM
Definition: LoopVersioningLICM.cpp:658
llvm::DomTreeNodeBase::getBlock
NodeT * getBlock() const
Definition: GenericDomTree.h:88
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:465
uint64_t
llvm::LoopPass
Definition: LoopPass.h:27
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2387
llvm::MemorySSAUpdater
Definition: MemorySSAUpdater.h:56
llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition: LoopPassManager.h:263
llvm::MemorySSAUpdater::getMemorySSA
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
Definition: MemorySSAUpdater.h:244
eraseInstruction
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater *MSSAU)
Definition: LICM.cpp:1529
llvm::LoadInst::setOrdering
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this load instruction.
Definition: Instructions.h:237
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:80
llvm::LoopBase::getUniqueExitBlocks
void getUniqueExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all unique successor blocks of this loop.
Definition: LoopInfoImpl.h:122
llvm::Instruction::user_back
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:91
llvm::PHINode::setIncomingBlock
void setIncomingBlock(unsigned i, BasicBlock *BB)
Definition: Instructions.h:2768
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::SmallPriorityWorklist
A version of PriorityWorklist that selects small size optimized data structures for the vector and ma...
Definition: PriorityWorklist.h:256
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2783
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DenseMap
Definition: DenseMap.h:714
llvm::ICFLoopSafetyInfo::anyBlockMayThrow
bool anyBlockMayThrow() const override
Returns true iff any block of the loop for which this info is contains an instruction that may throw ...
Definition: MustExecute.cpp:77
llvm::MemorySSA
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:705
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:967
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::MemorySSA::getMemoryAccess
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:722
PredIteratorCache.h
llvm::succ_begin
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:99
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:928
llvm::LoadInst::setAlignment
void setAlignment(Align Align)
Definition: Instructions.h:227
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:576
llvm::PHINode::getBasicBlockIndex
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
Definition: Instructions.h:2811
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:167
llvm::SmallPtrSetImpl::begin
iterator begin() const
Definition: SmallPtrSet.h:402
llvm::Instruction::setDebugLoc
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:367
llvm::ScalarEvolution::forgetValue
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
Definition: ScalarEvolution.cpp:7583
llvm::ICFLoopSafetyInfo::doesNotWriteMemoryBefore
bool doesNotWriteMemoryBefore(const BasicBlock *BB, const Loop *CurLoop) const
Returns true if we could not execute a memory-modifying instruction before we enter BB under assumpti...
Definition: MustExecute.cpp:277
llvm::PriorityWorklist< T, SmallVector< T, N >, SmallDenseMap< T, ptrdiff_t > >::empty
bool empty() const
Determine if the PriorityWorklist is empty or not.
Definition: PriorityWorklist.h:68
splitPredecessorsOfLoopExit
static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT, LoopInfo *LI, const Loop *CurLoop, LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU)
Definition: LICM.cpp:1590
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::ModRefInfo
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition: AliasAnalysis.h:149
llvm::LoopBase::hasDedicatedExits
bool hasDedicatedExits() const
Return true if no exit block for the loop has a predecessor that is outside the loop.
Definition: LoopInfoImpl.h:92
llvm::canSinkOrHoistInst
bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU, bool TargetExecutesOncePerLoop, SinkAndHoistLICMFlags *LICMFlags=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Returns true if is legal to hoist or sink this instruction disregarding the possible introduction of ...
Definition: LICM.cpp:1158
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::LNICMPass::run
PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LICM.cpp:287
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
llvm::AliasSetTracker::getAliasSetFor
AliasSet & getAliasSetFor(const MemoryLocation &MemLoc)
Return the alias set which contains the specified memory location.
Definition: AliasSetTracker.cpp:346
llvm::DominatorTree::isReachableFromEntry
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:328
isFreeInLoop
static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop, const TargetTransformInfo *TTI)
Return true if the instruction is free in the loop.
Definition: LICM.cpp:1385
llvm::salvageKnowledge
void salvageKnowledge(Instruction *I, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Calls BuildAssumeFromInst and if the resulting llvm.assume is valid insert if before I.
Definition: AssumeBundleBuilder.cpp:292
llvm::FMRB_DoesNotAccessMemory
@ FMRB_DoesNotAccessMemory
This function does not perform any non-local loads or stores to memory.
Definition: AliasAnalysis.h:269
LoopPassManager.h
llvm::LoopNest::getOutermostLoop
Loop & getOutermostLoop() const
Return the outermost loop in the loop nest.
Definition: LoopNestAnalysis.h:80
llvm::MemorySSAAnalysis
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:931
llvm::collectChildrenInLoop
SmallVector< DomTreeNode *, 16 > collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop)
Does a BFS from a given node to all of its children inside a given loop.
Definition: LoopUtils.cpp:458
hasProfileData
static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI)
Definition: PartialInlining.cpp:717
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::SinkAndHoistLICMFlags::NoOfMemAccTooLarge
bool NoOfMemAccTooLarge
Definition: LoopUtils.h:135
llvm::MemorySSAUpdater::createMemoryAccessAfter
MemoryUseOrDef * createMemoryAccessAfter(Instruction *I, MemoryAccess *Definition, MemoryAccess *InsertPt)
Definition: MemorySSAUpdater.cpp:1451
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:100
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::isInstructionTriviallyDead
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:398
llvm::LoopInfo
Definition: LoopInfo.h:1083
llvm::AAMDNodes::merge
AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
Definition: TypeBasedAliasAnalysis.cpp:524
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::MemorySSAUpdater::insertDef
void insertDef(MemoryDef *Def, bool RenameUses=false)
Insert a definition into the MemorySSA IR.
Definition: MemorySSAUpdater.cpp:314
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1558
DataLayout.h
llvm::TinyPtrVector::size
unsigned size() const
Definition: TinyPtrVector.h:172
llvm::MemorySSA::getSkipSelfWalker
MemorySSAWalker * getSkipSelfWalker()
Definition: MemorySSA.cpp:1614
LoopPass.h
Motion
Loop Invariant Code Motion
Definition: LICM.cpp:324
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::MemorySSA::getBlockDefs
const DefsList * getBlockDefs(const BasicBlock *BB) const
Return the list of MemoryDef's and MemoryPhi's for a given basic block.
Definition: MemorySSA.h:770
llvm::pred_size
unsigned pred_size(const BasicBlock *BB)
Get the number of predecessors of BB.
Definition: CFG.h:124
llvm::appendLoopsToWorklist
void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
Definition: LoopUtils.cpp:1478
llvm::LoopBase::isOutermost
bool isOutermost() const
Return true if the loop does not have a parent (natural) loop.
Definition: LoopInfo.h:168
llvm::TargetTransformInfo::getUserCost
InstructionCost getUserCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
Definition: TargetTransformInfo.cpp:219
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
llvm::ms_demangle::IntrinsicFunctionKind::New
@ New
llvm::OperandBundleUse
A lightweight accessor for an operand bundle meant to be passed around by value.
Definition: InstrTypes.h:1056
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:990
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::LoopStandardAnalysisResults::LI
LoopInfo & LI
Definition: LoopAnalysisManager.h:56
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::ICFLoopSafetyInfo::insertInstructionTo
void insertInstructionTo(const Instruction *Inst, const BasicBlock *BB)
Inform the safety info that we are planning to insert a new instruction Inst into the basic block BB.
Definition: MustExecute.cpp:95
llvm::FunctionModRefBehavior
FunctionModRefBehavior
Summary of how a function affects memory in the program.
Definition: AliasAnalysis.h:263
llvm::DominatorTreeBase::properlyDominates
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Definition: GenericDomTree.h:392
llvm::LoopStandardAnalysisResults::BFI
BlockFrequencyInfo * BFI
Definition: LoopAnalysisManager.h:60
llvm::LoopNest::getParent
Function * getParent() const
Return the function to which the loop-nest belongs.
Definition: LoopNestAnalysis.h:153
SSAUpdater.h
llvm::ConstantInt::getSExtValue
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:148
BlockFrequencyInfo.h
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:421
llvm::Loop::hasLoopInvariantOperands
bool hasLoopInvariantOperands(const Instruction *I) const
Return true if all the operands of the specified instruction are loop invariant.
Definition: LoopInfo.cpp:70
llvm::MemoryAccess
Definition: MemorySSA.h:137
llvm::PredIterator
Definition: CFG.h:43
llvm::DomTreeNodeBase< BasicBlock >
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:175
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:152
llvm::AAResults::getModRefBehavior
FunctionModRefBehavior getModRefBehavior(const CallBase *Call)
Return the behavior of the given call site.
Definition: AliasAnalysis.cpp:423
llvm::DominatorTreeBase::addNewBlock
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
Definition: GenericDomTree.h:619
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::SmallPtrSetImplBase::size
size_type size() const
Definition: SmallPtrSet.h:92
llvm::find_if
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1578
llvm::BasicBlock::front
const Instruction & front() const
Definition: BasicBlock.h:308
llvm::ICFLoopSafetyInfo::removeInstruction
void removeInstruction(const Instruction *Inst)
Inform safety info that we are planning to remove the instruction Inst from its block.
Definition: MustExecute.cpp:101
std
Definition: BitVector.h:838
llvm::Loop::isLCSSAForm
bool isLCSSAForm(const DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:461
llvm::Value::user_iterator
user_iterator_impl< User > user_iterator
Definition: Value.h:390
llvm::MemoryUseOrDef::getDefiningAccess
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition: MemorySSA.h:257
llvm::LoopStandardAnalysisResults::TTI
TargetTransformInfo & TTI
Definition: LoopAnalysisManager.h:59
get
Should compile to something r4 addze r3 instead we get
Definition: README.txt:24
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:83
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:324
llvm::ICFLoopSafetyInfo
This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to give precise answers on "may...
Definition: MustExecute.h:132
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition: Instructions.h:2675
llvm::MemorySSAUpdater::wireOldPredecessorsToNewImmediatePredecessor
void wireOldPredecessorsToNewImmediatePredecessor(BasicBlock *Old, BasicBlock *New, ArrayRef< BasicBlock * > Preds, bool IdenticalEdgesWereMerged=true)
A new empty BasicBlock (New) now branches directly to Old.
Definition: MemorySSAUpdater.cpp:1272
llvm::pred_end
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:112
llvm::ConstantFP::get
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:972
llvm::TypeSize
Definition: TypeSize.h:417
llvm::Instruction::setAAMetadata
void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
Definition: Metadata.cpp:1379
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::count
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:215
llvm::salvageDebugInfo
void salvageDebugInfo(Instruction &I)
Assuming the instruction I is going to be deleted, attempt to salvage debug users of I by writing the...
Definition: Local.cpp:1726
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:221
llvm::Instruction::isEHPad
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:661
llvm::MemoryUseOrDef
Class that has the common methods + fields of memory uses/defs.
Definition: MemorySSA.h:247
llvm::Function::hasProfileData
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
Definition: Function.h:303
ScalarEvolutionAliasAnalysis.h
llvm::LoopSafetyInfo::copyColors
void copyColors(BasicBlock *New, BasicBlock *Old)
Copy colors of block Old into the block New.
Definition: MustExecute.cpp:39
Unordered
QP Compare Ordered Unordered
Definition: README_P9.txt:299
llvm::AAResults::pointsToConstantMemory
bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal=false)
Checks whether the given location points to constant memory, or if OrLocal is true whether it points ...
Definition: AliasAnalysis.cpp:163
llvm::LoopStandardAnalysisResults::SE
ScalarEvolution & SE
Definition: LoopAnalysisManager.h:57
llvm::LoopStandardAnalysisResults::AA
AAResults & AA
Definition: LoopAnalysisManager.h:53
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:45
GuardUtils.h
pointerInvalidatedByBlockWithMSSA
static bool pointerInvalidatedByBlockWithMSSA(BasicBlock &BB, MemorySSA &MSSA, MemoryUse &MU)
Definition: LICM.cpp:2376
llvm::AliasSetTracker::begin
const_iterator begin() const
Definition: AliasSetTracker.h:414
DisablePromotion
static cl::opt< bool > DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false), cl::desc("Disable memory promotion in LICM pass"))
Memory promotion is enabled by default.
hoist
static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE)
When an instruction is found to only use loop invariant operands that is safe to hoist,...
Definition: LICM.cpp:1777
llvm::pred_begin
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:109
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:252
llvm::BasicBlock::getInstList
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:363
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:685
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
llvm::createLICMPass
Pass * createLICMPass()
Definition: LICM.cpp:327
MemorySSA.h
Instructions.h
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition: User.h:191
llvm::LICMPass::run
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LICM.cpp:263
llvm::ConstantInt::isNegative
bool isNegative() const
Definition: Constants.h:189
llvm::BasicBlock::moveBefore
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition: BasicBlock.cpp:137
llvm::LoopBase::addBasicBlockToLoop
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Definition: LoopInfoImpl.h:242
llvm::LoadAndStorePromoter::doExtraRewritesBeforeFinalDeletion
virtual void doExtraRewritesBeforeFinalDeletion()
This hook is invoked after all the stores are found and inserted as available values.
Definition: SSAUpdater.h:161
Dominators.h
llvm::Type::getPointerElementType
Type * getPointerElementType() const
Definition: Type.h:369
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1328
llvm::SmallPtrSetImplBase::empty
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:91
N
#define N
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::ICFLoopSafetyInfo::computeLoopSafetyInfo
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
Definition: MustExecute.cpp:81
llvm::PHINode::getIncomingBlock
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Definition: Instructions.h:2749
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false, false) INITIALIZE_PASS_END(LegacyLICMPass
llvm::DominatorTreeBase::verify
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
Definition: GenericDomTree.h:802
Users
iv Induction Variable Users
Definition: IVUsers.cpp:52
llvm::LoopStandardAnalysisResults::MSSA
MemorySSA * MSSA
Definition: LoopAnalysisManager.h:62
llvm::PHINode
Definition: Instructions.h:2633
LICM.h
llvm::set_intersect
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
Definition: SetOperations.h:39
llvm::SmallVectorImpl< BasicBlock * >
llvm::PredIteratorCache::get
ArrayRef< BasicBlock * > get(BasicBlock *BB)
Definition: PredIteratorCache.h:66
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:401
isNotUsedOrFreeInLoop
static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, TargetTransformInfo *TTI, bool &FreeInLoop, bool LoopNestMode)
Return true if the only users of this instruction are outside of the loop.
Definition: LICM.cpp:1415
DerivedTypes.h
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::AliasSet::getUniqueInstruction
Instruction * getUniqueInstruction()
If this alias set is known to contain a single instruction and only a single unique instruction,...
Definition: AliasSetTracker.cpp:261
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
AliasSetTracker.h
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1475
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:172
llvm::ICFLoopSafetyInfo::isGuaranteedToExecute
bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
Definition: MustExecute.cpp:270
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::VerifyMemorySSA
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:91
LLVMContext.h
llvm::SinkAndHoistLICMFlags::tooManyMemoryAccesses
bool tooManyMemoryAccesses()
Definition: LoopUtils.h:130
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::LoadAndStorePromoter::instructionDeleted
virtual void instructionDeleted(Instruction *I) const
Called before each instruction is deleted.
Definition: SSAUpdater.h:168
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:412
moveInstructionBefore
static void moveInstructionBefore(Instruction &I, Instruction &Dest, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Definition: LICM.cpp:1537
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3068
llvm::ConstantFoldInstruction
Constant * ConstantFoldInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstruction - Try to constant fold the specified instruction.
Definition: ConstantFolding.cpp:1093
raw_ostream.h
llvm::LoopBlocksRPO::perform
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopIterator.h:180
llvm::TinyPtrVector
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Definition: TinyPtrVector.h:30
llvm::SSAUpdater
Helper class for SSA formation on a set of values defined in multiple blocks.
Definition: SSAUpdater.h:38
llvm::SmallPtrSetImpl::contains
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:388
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:624
BasicBlockUtils.h
llvm::SetLicmMssaOptCap
cl::opt< unsigned > SetLicmMssaOptCap
llvm::LazyBranchProbabilityInfoPass
This is an alternative analysis pass to BranchProbabilityInfoWrapperPass.
Definition: LazyBranchProbabilityInfo.h:50
llvm::LoopStandardAnalysisResults::TLI
TargetLibraryInfo & TLI
Definition: LoopAnalysisManager.h:58
InitializePasses.h
llvm::PointerMayBeCapturedBefore
bool PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures, bool StoreCaptures, const Instruction *I, const DominatorTree *DT, bool IncludeI=false, unsigned MaxUsesToExplore=0, const LoopInfo *LI=nullptr)
PointerMayBeCapturedBefore - Return true if this pointer value may be captured by the enclosing funct...
Definition: CaptureTracking.cpp:245
llvm::LoopNest
This class represents a loop nest and can be used to query its properties.
Definition: LoopNestAnalysis.h:28
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::AAResults::getModRefInfo
ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc)
getModRefInfo (for call sites) - Return information about whether a particular call site modifies or ...
Definition: AliasAnalysis.cpp:218
sink
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, BlockFrequencyInfo *BFI, const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE)
When an instruction is found to only be used outside of the loop, this function moves it to the exit ...
Definition: LICM.cpp:1662
llvm::BranchInst::isConditional
bool isConditional() const
Definition: Instructions.h:3147
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3161
llvm::MemoryLocation
Representation for a specific memory location.
Definition: MemoryLocation.h:209
llvm::LoopAnalysis
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1243
llvm::MemorySSAUpdater::removeMemoryAccess
void removeMemoryAccess(MemoryAccess *, bool OptimizePhis=false)
Remove a MemoryAccess from MemorySSA, including updating all definitions and uses.
Definition: MemorySSAUpdater.cpp:1309
llvm::LoopInfo::wouldBeOutOfLoopUseRequiringLCSSA
bool wouldBeOutOfLoopUseRequiringLCSSA(const Value *V, const BasicBlock *ExitBB) const
Definition: LoopInfo.cpp:935
canSplitPredecessors
static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo)
Definition: LICM.cpp:1571
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:908
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::SinkAndHoistLICMFlags::getIsSink
bool getIsSink()
Definition: LoopUtils.h:129
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37
sinkThroughTriviallyReplaceablePHI
static Instruction * sinkThroughTriviallyReplaceablePHI(PHINode *TPN, Instruction *I, LoopInfo *LI, SmallDenseMap< BasicBlock *, Instruction *, 32 > &SunkCopies, const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop, MemorySSAUpdater *MSSAU)
Definition: LICM.cpp:1553
llvm::LoadAndStorePromoter::isInstInList
virtual bool isInstInList(Instruction *I, const SmallVectorImpl< Instruction * > &Insts) const
Return true if the specified instruction is in the Inst list.
Definition: SSAUpdater.cpp:476