LLVM  15.0.0git
LICM.cpp
Go to the documentation of this file.
1 //===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs loop invariant code motion, attempting to remove as much
10 // code from the body of a loop as possible. It does this by either hoisting
11 // code into the preheader block, or by sinking code to the exit blocks if it is
12 // safe. This pass also promotes must-aliased memory locations in the loop to
13 // live in registers, thus hoisting and sinking "invariant" loads and stores.
14 //
15 // Hoisting operations out of loops is a canonicalization transform. It
16 // enables and simplifies subsequent optimizations in the middle-end.
17 // Rematerialization of hoisted instructions to reduce register pressure is the
18 // responsibility of the back-end, which has more accurate information about
19 // register pressure and also handles other optimizations than LICM that
20 // increase live-ranges.
21 //
22 // This pass uses alias analysis for two purposes:
23 //
24 // 1. Moving loop invariant loads and calls out of loops. If we can determine
25 // that a load or call inside of a loop never aliases anything stored to,
26 // we can hoist it or sink it like any other instruction.
27 // 2. Scalar Promotion of Memory - If there is a store instruction inside of
28 // the loop, we try to move the store to happen AFTER the loop instead of
29 // inside of the loop. This can only happen if a few conditions are true:
30 // A. The pointer stored through is loop invariant
31 // B. There are no stores or loads in the loop which _may_ alias the
32 // pointer. There are no calls in the loop which mod/ref the pointer.
33 // If these conditions are true, we can promote the loads and stores in the
34 // loop of the pointer to use a temporary alloca'd variable. We then use
35 // the SSAUpdater to construct the appropriate SSA form for the value.
36 //
37 //===----------------------------------------------------------------------===//
38 
41 #include "llvm/ADT/SetOperations.h"
42 #include "llvm/ADT/Statistic.h"
49 #include "llvm/Analysis/Loads.h"
50 #include "llvm/Analysis/LoopInfo.h"
53 #include "llvm/Analysis/LoopPass.h"
62 #include "llvm/IR/CFG.h"
63 #include "llvm/IR/Constants.h"
64 #include "llvm/IR/DataLayout.h"
66 #include "llvm/IR/DerivedTypes.h"
67 #include "llvm/IR/Dominators.h"
68 #include "llvm/IR/Instructions.h"
69 #include "llvm/IR/IntrinsicInst.h"
70 #include "llvm/IR/LLVMContext.h"
71 #include "llvm/IR/Metadata.h"
72 #include "llvm/IR/PatternMatch.h"
74 #include "llvm/InitializePasses.h"
76 #include "llvm/Support/Debug.h"
78 #include "llvm/Transforms/Scalar.h"
84 #include <algorithm>
85 #include <utility>
86 using namespace llvm;
87 
88 namespace llvm {
89 class BlockFrequencyInfo;
90 class LPMUpdater;
91 } // namespace llvm
92 
93 #define DEBUG_TYPE "licm"
94 
95 STATISTIC(NumCreatedBlocks, "Number of blocks created");
96 STATISTIC(NumClonedBranches, "Number of branches cloned");
97 STATISTIC(NumSunk, "Number of instructions sunk out of loop");
98 STATISTIC(NumHoisted, "Number of instructions hoisted out of loop");
99 STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
100 STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
101 STATISTIC(NumPromoted, "Number of memory locations promoted to registers");
102 
103 /// Memory promotion is enabled by default.
104 static cl::opt<bool>
105  DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false),
106  cl::desc("Disable memory promotion in LICM pass"));
107 
109  "licm-control-flow-hoisting", cl::Hidden, cl::init(false),
110  cl::desc("Enable control flow (and PHI) hoisting in LICM"));
111 
113  "licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
114  cl::desc("Max num uses visited for identifying load "
115  "invariance in loop using invariant start (default = 8)"));
116 
117 // Experimental option to allow imprecision in LICM in pathological cases, in
118 // exchange for faster compile. This is to be removed if MemorySSA starts to
119 // address the same issue. LICM calls MemorySSAWalker's
120 // getClobberingMemoryAccess, up to the value of the Cap, getting perfect
121 // accuracy. Afterwards, LICM will call into MemorySSA's getDefiningAccess,
122 // which may not be precise, since optimizeUses is capped. The result is
123 // correct, but we may not get as "far up" as possible to get which access is
124 // clobbering the one queried.
126  "licm-mssa-optimization-cap", cl::init(100), cl::Hidden,
127  cl::desc("Enable imprecision in LICM in pathological cases, in exchange "
128  "for faster compile. Caps the MemorySSA clobbering calls."));
129 
130 // Experimentally, memory promotion carries less importance than sinking and
131 // hoisting. Limit when we do promotion when using MemorySSA, in order to save
132 // compile time.
134  "licm-mssa-max-acc-promotion", cl::init(250), cl::Hidden,
135  cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no "
136  "effect. When MSSA in LICM is enabled, then this is the maximum "
137  "number of accesses allowed to be present in a loop in order to "
138  "enable memory promotion."));
139 
140 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
141 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
142  const LoopSafetyInfo *SafetyInfo,
143  TargetTransformInfo *TTI, bool &FreeInLoop,
144  bool LoopNestMode);
145 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
146  BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
147  MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
149 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
150  BlockFrequencyInfo *BFI, const Loop *CurLoop,
151  ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU,
154  Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI,
155  const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
156  OptimizationRemarkEmitter *ORE, const Instruction *CtxI,
157  bool AllowSpeculation);
158 static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU,
159  Loop *CurLoop, Instruction &I,
160  SinkAndHoistLICMFlags &Flags);
162  MemoryUse &MU);
164  Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
165  const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU);
166 
167 static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
168  MemorySSAUpdater &MSSAU);
169 
170 static void moveInstructionBefore(Instruction &I, Instruction &Dest,
171  ICFLoopSafetyInfo &SafetyInfo,
172  MemorySSAUpdater &MSSAU, ScalarEvolution *SE);
173 
174 static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,
175  function_ref<void(Instruction *)> Fn);
178 
179 namespace {
180 struct LoopInvariantCodeMotion {
181  bool runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT,
184  OptimizationRemarkEmitter *ORE, bool LoopNestMode = false);
185 
186  LoopInvariantCodeMotion(unsigned LicmMssaOptCap,
187  unsigned LicmMssaNoAccForPromotionCap,
188  bool LicmAllowSpeculation)
189  : LicmMssaOptCap(LicmMssaOptCap),
190  LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
191  LicmAllowSpeculation(LicmAllowSpeculation) {}
192 
193 private:
194  unsigned LicmMssaOptCap;
195  unsigned LicmMssaNoAccForPromotionCap;
196  bool LicmAllowSpeculation;
197 };
198 
199 struct LegacyLICMPass : public LoopPass {
200  static char ID; // Pass identification, replacement for typeid
201  LegacyLICMPass(
202  unsigned LicmMssaOptCap = SetLicmMssaOptCap,
203  unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap,
204  bool LicmAllowSpeculation = true)
205  : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
206  LicmAllowSpeculation) {
208  }
209 
210  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
211  if (skipLoop(L))
212  return false;
213 
214  LLVM_DEBUG(dbgs() << "Perform LICM on Loop with header at block "
215  << L->getHeader()->getNameOrAsOperand() << "\n");
216 
217  auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
218  MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
221  hasProfileData ? &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI()
222  : nullptr;
223  // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
224  // pass. Function analyses need to be preserved across loop transformations
225  // but ORE cannot be preserved (see comment before the pass definition).
227  return LICM.runOnLoop(
228  L, &getAnalysis<AAResultsWrapperPass>().getAAResults(),
229  &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
230  &getAnalysis<DominatorTreeWrapperPass>().getDomTree(), BFI,
231  &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
232  *L->getHeader()->getParent()),
233  &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
234  *L->getHeader()->getParent()),
235  SE ? &SE->getSE() : nullptr, MSSA, &ORE);
236  }
237 
238  /// This transformation requires natural loop information & requires that
239  /// loop preheaders be inserted into the CFG...
240  ///
241  void getAnalysisUsage(AnalysisUsage &AU) const override {
252  }
253 
254 private:
255  LoopInvariantCodeMotion LICM;
256 };
257 } // namespace
258 
261  if (!AR.MSSA)
262  report_fatal_error("LICM requires MemorySSA (loop-mssa)");
263 
264  // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
265  // pass. Function analyses need to be preserved across loop transformations
266  // but ORE cannot be preserved (see comment before the pass definition).
268 
269  LoopInvariantCodeMotion LICM(Opts.MssaOptCap, Opts.MssaNoAccForPromotionCap,
270  Opts.AllowSpeculation);
271  if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, AR.BFI, &AR.TLI, &AR.TTI,
272  &AR.SE, AR.MSSA, &ORE))
273  return PreservedAnalyses::all();
274 
275  auto PA = getLoopPassPreservedAnalyses();
276 
277  PA.preserve<DominatorTreeAnalysis>();
278  PA.preserve<LoopAnalysis>();
279  PA.preserve<MemorySSAAnalysis>();
280 
281  return PA;
282 }
283 
285  raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
286  static_cast<PassInfoMixin<LICMPass> *>(this)->printPipeline(
287  OS, MapClassName2PassName);
288 
289  OS << "<";
290  OS << (Opts.AllowSpeculation ? "" : "no-") << "allowspeculation";
291  OS << ">";
292 }
293 
296  LPMUpdater &) {
297  if (!AR.MSSA)
298  report_fatal_error("LNICM requires MemorySSA (loop-mssa)");
299 
300  // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
301  // pass. Function analyses need to be preserved across loop transformations
302  // but ORE cannot be preserved (see comment before the pass definition).
304 
305  LoopInvariantCodeMotion LICM(Opts.MssaOptCap, Opts.MssaNoAccForPromotionCap,
306  Opts.AllowSpeculation);
307 
308  Loop &OutermostLoop = LN.getOutermostLoop();
309  bool Changed = LICM.runOnLoop(&OutermostLoop, &AR.AA, &AR.LI, &AR.DT, AR.BFI,
310  &AR.TLI, &AR.TTI, &AR.SE, AR.MSSA, &ORE, true);
311 
312  if (!Changed)
313  return PreservedAnalyses::all();
314 
315  auto PA = getLoopPassPreservedAnalyses();
316 
317  PA.preserve<DominatorTreeAnalysis>();
318  PA.preserve<LoopAnalysis>();
319  PA.preserve<MemorySSAAnalysis>();
320 
321  return PA;
322 }
323 
325  raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
326  static_cast<PassInfoMixin<LNICMPass> *>(this)->printPipeline(
327  OS, MapClassName2PassName);
328 
329  OS << "<";
330  OS << (Opts.AllowSpeculation ? "" : "no-") << "allowspeculation";
331  OS << ">";
332 }
333 
334 char LegacyLICMPass::ID = 0;
335 INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion",
336  false, false)
341 INITIALIZE_PASS_DEPENDENCY(LazyBFIPass)
342 INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
343  false)
344 
345 Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }
346 Pass *llvm::createLICMPass(unsigned LicmMssaOptCap,
347  unsigned LicmMssaNoAccForPromotionCap,
348  bool LicmAllowSpeculation) {
349  return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
350  LicmAllowSpeculation);
351 }
352 
354  MemorySSA *MSSA)
356  IsSink, L, MSSA) {}
357 
359  unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap, bool IsSink,
360  Loop *L, MemorySSA *MSSA)
361  : LicmMssaOptCap(LicmMssaOptCap),
362  LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
363  IsSink(IsSink) {
364  assert(((L != nullptr) == (MSSA != nullptr)) &&
365  "Unexpected values for SinkAndHoistLICMFlags");
366  if (!MSSA)
367  return;
368 
369  unsigned AccessCapCount = 0;
370  for (auto *BB : L->getBlocks())
371  if (const auto *Accesses = MSSA->getBlockAccesses(BB))
372  for (const auto &MA : *Accesses) {
373  (void)MA;
374  ++AccessCapCount;
375  if (AccessCapCount > LicmMssaNoAccForPromotionCap) {
376  NoOfMemAccTooLarge = true;
377  return;
378  }
379  }
380 }
381 
382 /// Hoist expressions out of the specified loop. Note, alias info for inner
383 /// loop is not preserved so it is not a good idea to run LICM multiple
384 /// times on one loop.
385 bool LoopInvariantCodeMotion::runOnLoop(
386  Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT,
389  bool LoopNestMode) {
390  bool Changed = false;
391 
392  assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
393  MSSA->ensureOptimizedUses();
394 
395  // If this loop has metadata indicating that LICM is not to be performed then
396  // just exit.
398  return false;
399  }
400 
401  // Don't sink stores from loops with coroutine suspend instructions.
402  // LICM would sink instructions into the default destination of
403  // the coroutine switch. The default destination of the switch is to
404  // handle the case where the coroutine is suspended, by which point the
405  // coroutine frame may have been destroyed. No instruction can be sunk there.
406  // FIXME: This would unfortunately hurt the performance of coroutines, however
407  // there is currently no general solution for this. Similar issues could also
408  // potentially happen in other passes where instructions are being moved
409  // across that edge.
410  bool HasCoroSuspendInst = llvm::any_of(L->getBlocks(), [](BasicBlock *BB) {
411  return llvm::any_of(*BB, [](Instruction &I) {
412  IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
413  return II && II->getIntrinsicID() == Intrinsic::coro_suspend;
414  });
415  });
416 
417  MemorySSAUpdater MSSAU(MSSA);
418  SinkAndHoistLICMFlags Flags(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
419  /*IsSink=*/true, L, MSSA);
420 
421  // Get the preheader block to move instructions into...
422  BasicBlock *Preheader = L->getLoopPreheader();
423 
424  // Compute loop safety information.
425  ICFLoopSafetyInfo SafetyInfo;
426  SafetyInfo.computeLoopSafetyInfo(L);
427 
428  // We want to visit all of the instructions in this loop... that are not parts
429  // of our subloops (they have already had their invariants hoisted out of
430  // their loop, into this loop, so there is no need to process the BODIES of
431  // the subloops).
432  //
433  // Traverse the body of the loop in depth first order on the dominator tree so
434  // that we are guaranteed to see definitions before we see uses. This allows
435  // us to sink instructions in one pass, without iteration. After sinking
436  // instructions, we perform another pass to hoist them out of the loop.
437  if (L->hasDedicatedExits())
438  Changed |= LoopNestMode
439  ? sinkRegionForLoopNest(DT->getNode(L->getHeader()), AA, LI,
440  DT, BFI, TLI, TTI, L, MSSAU,
441  &SafetyInfo, Flags, ORE)
442  : sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI,
443  TLI, TTI, L, MSSAU, &SafetyInfo, Flags, ORE);
444  Flags.setIsSink(false);
445  if (Preheader)
446  Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L,
447  MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode,
448  LicmAllowSpeculation);
449 
450  // Now that all loop invariants have been removed from the loop, promote any
451  // memory references to scalars that we can.
452  // Don't sink stores from loops without dedicated block exits. Exits
453  // containing indirect branches are not transformed by loop simplify,
454  // make sure we catch that. An additional load may be generated in the
455  // preheader for SSA updater, so also avoid sinking when no preheader
456  // is available.
457  if (!DisablePromotion && Preheader && L->hasDedicatedExits() &&
458  !Flags.tooManyMemoryAccesses() && !HasCoroSuspendInst) {
459  // Figure out the loop exits and their insertion points
460  SmallVector<BasicBlock *, 8> ExitBlocks;
461  L->getUniqueExitBlocks(ExitBlocks);
462 
463  // We can't insert into a catchswitch.
464  bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) {
465  return isa<CatchSwitchInst>(Exit->getTerminator());
466  });
467 
468  if (!HasCatchSwitch) {
470  SmallVector<MemoryAccess *, 8> MSSAInsertPts;
471  InsertPts.reserve(ExitBlocks.size());
472  MSSAInsertPts.reserve(ExitBlocks.size());
473  for (BasicBlock *ExitBlock : ExitBlocks) {
474  InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
475  MSSAInsertPts.push_back(nullptr);
476  }
477 
479 
480  // Promoting one set of accesses may make the pointers for another set
481  // loop invariant, so run this in a loop.
482  bool Promoted = false;
483  bool LocalPromoted;
484  do {
485  LocalPromoted = false;
486  for (const SmallSetVector<Value *, 8> &PointerMustAliases :
487  collectPromotionCandidates(MSSA, AA, L)) {
488  LocalPromoted |= promoteLoopAccessesToScalars(
489  PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
490  DT, TLI, L, MSSAU, &SafetyInfo, ORE, LicmAllowSpeculation);
491  }
492  Promoted |= LocalPromoted;
493  } while (LocalPromoted);
494 
495  // Once we have promoted values across the loop body we have to
496  // recursively reform LCSSA as any nested loop may now have values defined
497  // within the loop used in the outer loop.
498  // FIXME: This is really heavy handed. It would be a bit better to use an
499  // SSAUpdater strategy during promotion that was LCSSA aware and reformed
500  // it as it went.
501  if (Promoted)
502  formLCSSARecursively(*L, *DT, LI, SE);
503 
504  Changed |= Promoted;
505  }
506  }
507 
508  // Check that neither this loop nor its parent have had LCSSA broken. LICM is
509  // specifically moving instructions across the loop boundary and so it is
510  // especially in need of basic functional correctness checking here.
511  assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!");
512  assert((L->isOutermost() || L->getParentLoop()->isLCSSAForm(*DT)) &&
513  "Parent loop not left in LCSSA form after LICM!");
514 
515  if (VerifyMemorySSA)
516  MSSA->verifyMemorySSA();
517 
518  if (Changed && SE)
519  SE->forgetLoopDispositions(L);
520  return Changed;
521 }
522 
523 /// Walk the specified region of the CFG (defined by all blocks dominated by
524 /// the specified block, and that are in the current loop) in reverse depth
525 /// first order w.r.t the DominatorTree. This allows us to visit uses before
526 /// definitions, allowing us to sink a loop body in one pass without iteration.
527 ///
531  Loop *CurLoop, MemorySSAUpdater &MSSAU,
532  ICFLoopSafetyInfo *SafetyInfo,
533  SinkAndHoistLICMFlags &Flags,
534  OptimizationRemarkEmitter *ORE, Loop *OutermostLoop) {
535 
536  // Verify inputs.
537  assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
538  CurLoop != nullptr && SafetyInfo != nullptr &&
539  "Unexpected input to sinkRegion.");
540 
541  // We want to visit children before parents. We will enque all the parents
542  // before their children in the worklist and process the worklist in reverse
543  // order.
545 
546  bool Changed = false;
547  for (DomTreeNode *DTN : reverse(Worklist)) {
548  BasicBlock *BB = DTN->getBlock();
549  // Only need to process the contents of this block if it is not part of a
550  // subloop (which would already have been processed).
551  if (inSubLoop(BB, CurLoop, LI))
552  continue;
553 
554  for (BasicBlock::iterator II = BB->end(); II != BB->begin();) {
555  Instruction &I = *--II;
556 
557  // The instruction is not used in the loop if it is dead. In this case,
558  // we just delete it instead of sinking it.
559  if (isInstructionTriviallyDead(&I, TLI)) {
560  LLVM_DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
563  ++II;
564  eraseInstruction(I, *SafetyInfo, MSSAU);
565  Changed = true;
566  continue;
567  }
568 
569  // Check to see if we can sink this instruction to the exit blocks
570  // of the loop. We can do this if the all users of the instruction are
571  // outside of the loop. In this case, it doesn't even matter if the
572  // operands of the instruction are loop invariant.
573  //
574  bool FreeInLoop = false;
575  bool LoopNestMode = OutermostLoop != nullptr;
576  if (!I.mayHaveSideEffects() &&
577  isNotUsedOrFreeInLoop(I, LoopNestMode ? OutermostLoop : CurLoop,
578  SafetyInfo, TTI, FreeInLoop, LoopNestMode) &&
579  canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, true, Flags, ORE)) {
580  if (sink(I, LI, DT, BFI, CurLoop, SafetyInfo, MSSAU, ORE)) {
581  if (!FreeInLoop) {
582  ++II;
584  eraseInstruction(I, *SafetyInfo, MSSAU);
585  }
586  Changed = true;
587  }
588  }
589  }
590  }
591  if (VerifyMemorySSA)
592  MSSAU.getMemorySSA()->verifyMemorySSA();
593  return Changed;
594 }
595 
599  Loop *CurLoop, MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo,
601 
602  bool Changed = false;
604  Worklist.insert(CurLoop);
605  appendLoopsToWorklist(*CurLoop, Worklist);
606  while (!Worklist.empty()) {
607  Loop *L = Worklist.pop_back_val();
608  Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI,
609  TTI, L, MSSAU, SafetyInfo, Flags, ORE, CurLoop);
610  }
611  return Changed;
612 }
613 
614 namespace {
615 // This is a helper class for hoistRegion to make it able to hoist control flow
616 // in order to be able to hoist phis. The way this works is that we initially
617 // start hoisting to the loop preheader, and when we see a loop invariant branch
618 // we make note of this. When we then come to hoist an instruction that's
619 // conditional on such a branch we duplicate the branch and the relevant control
620 // flow, then hoist the instruction into the block corresponding to its original
621 // block in the duplicated control flow.
622 class ControlFlowHoister {
623 private:
624  // Information about the loop we are hoisting from
625  LoopInfo *LI;
626  DominatorTree *DT;
627  Loop *CurLoop;
628  MemorySSAUpdater &MSSAU;
629 
630  // A map of blocks in the loop to the block their instructions will be hoisted
631  // to.
632  DenseMap<BasicBlock *, BasicBlock *> HoistDestinationMap;
633 
634  // The branches that we can hoist, mapped to the block that marks a
635  // convergence point of their control flow.
636  DenseMap<BranchInst *, BasicBlock *> HoistableBranches;
637 
638 public:
639  ControlFlowHoister(LoopInfo *LI, DominatorTree *DT, Loop *CurLoop,
640  MemorySSAUpdater &MSSAU)
641  : LI(LI), DT(DT), CurLoop(CurLoop), MSSAU(MSSAU) {}
642 
643  void registerPossiblyHoistableBranch(BranchInst *BI) {
644  // We can only hoist conditional branches with loop invariant operands.
645  if (!ControlFlowHoisting || !BI->isConditional() ||
646  !CurLoop->hasLoopInvariantOperands(BI))
647  return;
648 
649  // The branch destinations need to be in the loop, and we don't gain
650  // anything by duplicating conditional branches with duplicate successors,
651  // as it's essentially the same as an unconditional branch.
652  BasicBlock *TrueDest = BI->getSuccessor(0);
653  BasicBlock *FalseDest = BI->getSuccessor(1);
654  if (!CurLoop->contains(TrueDest) || !CurLoop->contains(FalseDest) ||
655  TrueDest == FalseDest)
656  return;
657 
658  // We can hoist BI if one branch destination is the successor of the other,
659  // or both have common successor which we check by seeing if the
660  // intersection of their successors is non-empty.
661  // TODO: This could be expanded to allowing branches where both ends
662  // eventually converge to a single block.
663  SmallPtrSet<BasicBlock *, 4> TrueDestSucc, FalseDestSucc;
664  TrueDestSucc.insert(succ_begin(TrueDest), succ_end(TrueDest));
665  FalseDestSucc.insert(succ_begin(FalseDest), succ_end(FalseDest));
666  BasicBlock *CommonSucc = nullptr;
667  if (TrueDestSucc.count(FalseDest)) {
668  CommonSucc = FalseDest;
669  } else if (FalseDestSucc.count(TrueDest)) {
670  CommonSucc = TrueDest;
671  } else {
672  set_intersect(TrueDestSucc, FalseDestSucc);
673  // If there's one common successor use that.
674  if (TrueDestSucc.size() == 1)
675  CommonSucc = *TrueDestSucc.begin();
676  // If there's more than one pick whichever appears first in the block list
677  // (we can't use the value returned by TrueDestSucc.begin() as it's
678  // unpredicatable which element gets returned).
679  else if (!TrueDestSucc.empty()) {
680  Function *F = TrueDest->getParent();
681  auto IsSucc = [&](BasicBlock &BB) { return TrueDestSucc.count(&BB); };
682  auto It = llvm::find_if(*F, IsSucc);
683  assert(It != F->end() && "Could not find successor in function");
684  CommonSucc = &*It;
685  }
686  }
687  // The common successor has to be dominated by the branch, as otherwise
688  // there will be some other path to the successor that will not be
689  // controlled by this branch so any phi we hoist would be controlled by the
690  // wrong condition. This also takes care of avoiding hoisting of loop back
691  // edges.
692  // TODO: In some cases this could be relaxed if the successor is dominated
693  // by another block that's been hoisted and we can guarantee that the
694  // control flow has been replicated exactly.
695  if (CommonSucc && DT->dominates(BI, CommonSucc))
696  HoistableBranches[BI] = CommonSucc;
697  }
698 
699  bool canHoistPHI(PHINode *PN) {
700  // The phi must have loop invariant operands.
701  if (!ControlFlowHoisting || !CurLoop->hasLoopInvariantOperands(PN))
702  return false;
703  // We can hoist phis if the block they are in is the target of hoistable
704  // branches which cover all of the predecessors of the block.
705  SmallPtrSet<BasicBlock *, 8> PredecessorBlocks;
706  BasicBlock *BB = PN->getParent();
707  for (BasicBlock *PredBB : predecessors(BB))
708  PredecessorBlocks.insert(PredBB);
709  // If we have less predecessor blocks than predecessors then the phi will
710  // have more than one incoming value for the same block which we can't
711  // handle.
712  // TODO: This could be handled be erasing some of the duplicate incoming
713  // values.
714  if (PredecessorBlocks.size() != pred_size(BB))
715  return false;
716  for (auto &Pair : HoistableBranches) {
717  if (Pair.second == BB) {
718  // Which blocks are predecessors via this branch depends on if the
719  // branch is triangle-like or diamond-like.
720  if (Pair.first->getSuccessor(0) == BB) {
721  PredecessorBlocks.erase(Pair.first->getParent());
722  PredecessorBlocks.erase(Pair.first->getSuccessor(1));
723  } else if (Pair.first->getSuccessor(1) == BB) {
724  PredecessorBlocks.erase(Pair.first->getParent());
725  PredecessorBlocks.erase(Pair.first->getSuccessor(0));
726  } else {
727  PredecessorBlocks.erase(Pair.first->getSuccessor(0));
728  PredecessorBlocks.erase(Pair.first->getSuccessor(1));
729  }
730  }
731  }
732  // PredecessorBlocks will now be empty if for every predecessor of BB we
733  // found a hoistable branch source.
734  return PredecessorBlocks.empty();
735  }
736 
737  BasicBlock *getOrCreateHoistedBlock(BasicBlock *BB) {
738  if (!ControlFlowHoisting)
739  return CurLoop->getLoopPreheader();
740  // If BB has already been hoisted, return that
741  if (HoistDestinationMap.count(BB))
742  return HoistDestinationMap[BB];
743 
744  // Check if this block is conditional based on a pending branch
745  auto HasBBAsSuccessor =
747  return BB != Pair.second && (Pair.first->getSuccessor(0) == BB ||
748  Pair.first->getSuccessor(1) == BB);
749  };
750  auto It = llvm::find_if(HoistableBranches, HasBBAsSuccessor);
751 
752  // If not involved in a pending branch, hoist to preheader
753  BasicBlock *InitialPreheader = CurLoop->getLoopPreheader();
754  if (It == HoistableBranches.end()) {
755  LLVM_DEBUG(dbgs() << "LICM using "
756  << InitialPreheader->getNameOrAsOperand()
757  << " as hoist destination for "
758  << BB->getNameOrAsOperand() << "\n");
759  HoistDestinationMap[BB] = InitialPreheader;
760  return InitialPreheader;
761  }
762  BranchInst *BI = It->first;
763  assert(std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor) ==
764  HoistableBranches.end() &&
765  "BB is expected to be the target of at most one branch");
766 
767  LLVMContext &C = BB->getContext();
768  BasicBlock *TrueDest = BI->getSuccessor(0);
769  BasicBlock *FalseDest = BI->getSuccessor(1);
770  BasicBlock *CommonSucc = HoistableBranches[BI];
771  BasicBlock *HoistTarget = getOrCreateHoistedBlock(BI->getParent());
772 
773  // Create hoisted versions of blocks that currently don't have them
774  auto CreateHoistedBlock = [&](BasicBlock *Orig) {
775  if (HoistDestinationMap.count(Orig))
776  return HoistDestinationMap[Orig];
777  BasicBlock *New =
778  BasicBlock::Create(C, Orig->getName() + ".licm", Orig->getParent());
779  HoistDestinationMap[Orig] = New;
780  DT->addNewBlock(New, HoistTarget);
781  if (CurLoop->getParentLoop())
782  CurLoop->getParentLoop()->addBasicBlockToLoop(New, *LI);
783  ++NumCreatedBlocks;
784  LLVM_DEBUG(dbgs() << "LICM created " << New->getName()
785  << " as hoist destination for " << Orig->getName()
786  << "\n");
787  return New;
788  };
789  BasicBlock *HoistTrueDest = CreateHoistedBlock(TrueDest);
790  BasicBlock *HoistFalseDest = CreateHoistedBlock(FalseDest);
791  BasicBlock *HoistCommonSucc = CreateHoistedBlock(CommonSucc);
792 
793  // Link up these blocks with branches.
794  if (!HoistCommonSucc->getTerminator()) {
795  // The new common successor we've generated will branch to whatever that
796  // hoist target branched to.
797  BasicBlock *TargetSucc = HoistTarget->getSingleSuccessor();
798  assert(TargetSucc && "Expected hoist target to have a single successor");
799  HoistCommonSucc->moveBefore(TargetSucc);
800  BranchInst::Create(TargetSucc, HoistCommonSucc);
801  }
802  if (!HoistTrueDest->getTerminator()) {
803  HoistTrueDest->moveBefore(HoistCommonSucc);
804  BranchInst::Create(HoistCommonSucc, HoistTrueDest);
805  }
806  if (!HoistFalseDest->getTerminator()) {
807  HoistFalseDest->moveBefore(HoistCommonSucc);
808  BranchInst::Create(HoistCommonSucc, HoistFalseDest);
809  }
810 
811  // If BI is being cloned to what was originally the preheader then
812  // HoistCommonSucc will now be the new preheader.
813  if (HoistTarget == InitialPreheader) {
814  // Phis in the loop header now need to use the new preheader.
815  InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc);
817  HoistTarget->getSingleSuccessor(), HoistCommonSucc, {HoistTarget});
818  // The new preheader dominates the loop header.
819  DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc);
820  DomTreeNode *HeaderNode = DT->getNode(CurLoop->getHeader());
821  DT->changeImmediateDominator(HeaderNode, PreheaderNode);
822  // The preheader hoist destination is now the new preheader, with the
823  // exception of the hoist destination of this branch.
824  for (auto &Pair : HoistDestinationMap)
825  if (Pair.second == InitialPreheader && Pair.first != BI->getParent())
826  Pair.second = HoistCommonSucc;
827  }
828 
829  // Now finally clone BI.
831  HoistTarget->getTerminator(),
832  BranchInst::Create(HoistTrueDest, HoistFalseDest, BI->getCondition()));
833  ++NumClonedBranches;
834 
835  assert(CurLoop->getLoopPreheader() &&
836  "Hoisting blocks should not have destroyed preheader");
837  return HoistDestinationMap[BB];
838  }
839 };
840 } // namespace
841 
842 /// Walk the specified region of the CFG (defined by all blocks dominated by
843 /// the specified block, and that are in the current loop) in depth first
844 /// order w.r.t the DominatorTree. This allows us to visit definitions before
845 /// uses, allowing us to hoist a loop body in one pass without iteration.
846 ///
849  TargetLibraryInfo *TLI, Loop *CurLoop,
850  MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
851  ICFLoopSafetyInfo *SafetyInfo,
852  SinkAndHoistLICMFlags &Flags,
853  OptimizationRemarkEmitter *ORE, bool LoopNestMode,
854  bool AllowSpeculation) {
855  // Verify inputs.
856  assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
857  CurLoop != nullptr && SafetyInfo != nullptr &&
858  "Unexpected input to hoistRegion.");
859 
860  ControlFlowHoister CFH(LI, DT, CurLoop, MSSAU);
861 
862  // Keep track of instructions that have been hoisted, as they may need to be
863  // re-hoisted if they end up not dominating all of their uses.
864  SmallVector<Instruction *, 16> HoistedInstructions;
865 
866  // For PHI hoisting to work we need to hoist blocks before their successors.
867  // We can do this by iterating through the blocks in the loop in reverse
868  // post-order.
869  LoopBlocksRPO Worklist(CurLoop);
870  Worklist.perform(LI);
871  bool Changed = false;
872  for (BasicBlock *BB : Worklist) {
873  // Only need to process the contents of this block if it is not part of a
874  // subloop (which would already have been processed).
875  if (!LoopNestMode && inSubLoop(BB, CurLoop, LI))
876  continue;
877 
879  // Try constant folding this instruction. If all the operands are
880  // constants, it is technically hoistable, but it would be better to
881  // just fold it.
883  &I, I.getModule()->getDataLayout(), TLI)) {
884  LLVM_DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C
885  << '\n');
886  // FIXME MSSA: Such replacements may make accesses unoptimized (D51960).
887  I.replaceAllUsesWith(C);
888  if (isInstructionTriviallyDead(&I, TLI))
889  eraseInstruction(I, *SafetyInfo, MSSAU);
890  Changed = true;
891  continue;
892  }
893 
894  // Try hoisting the instruction out to the preheader. We can only do
895  // this if all of the operands of the instruction are loop invariant and
896  // if it is safe to hoist the instruction. We also check block frequency
897  // to make sure instruction only gets hoisted into colder blocks.
898  // TODO: It may be safe to hoist if we are hoisting to a conditional block
899  // and we have accurately duplicated the control flow from the loop header
900  // to that block.
901  if (CurLoop->hasLoopInvariantOperands(&I) &&
902  canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, true, Flags, ORE) &&
904  I, DT, TLI, CurLoop, SafetyInfo, ORE,
905  CurLoop->getLoopPreheader()->getTerminator(), AllowSpeculation)) {
906  hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
907  MSSAU, SE, ORE);
908  HoistedInstructions.push_back(&I);
909  Changed = true;
910  continue;
911  }
912 
913  // Attempt to remove floating point division out of the loop by
914  // converting it to a reciprocal multiplication.
915  if (I.getOpcode() == Instruction::FDiv && I.hasAllowReciprocal() &&
916  CurLoop->isLoopInvariant(I.getOperand(1))) {
917  auto Divisor = I.getOperand(1);
918  auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0);
919  auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor);
920  ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags());
921  SafetyInfo->insertInstructionTo(ReciprocalDivisor, I.getParent());
922  ReciprocalDivisor->insertBefore(&I);
923 
924  auto Product =
925  BinaryOperator::CreateFMul(I.getOperand(0), ReciprocalDivisor);
926  Product->setFastMathFlags(I.getFastMathFlags());
927  SafetyInfo->insertInstructionTo(Product, I.getParent());
928  Product->insertAfter(&I);
929  I.replaceAllUsesWith(Product);
930  eraseInstruction(I, *SafetyInfo, MSSAU);
931 
932  hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB),
933  SafetyInfo, MSSAU, SE, ORE);
934  HoistedInstructions.push_back(ReciprocalDivisor);
935  Changed = true;
936  continue;
937  }
938 
939  auto IsInvariantStart = [&](Instruction &I) {
940  using namespace PatternMatch;
941  return I.use_empty() &&
942  match(&I, m_Intrinsic<Intrinsic::invariant_start>());
943  };
944  auto MustExecuteWithoutWritesBefore = [&](Instruction &I) {
945  return SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) &&
946  SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop);
947  };
948  if ((IsInvariantStart(I) || isGuard(&I)) &&
949  CurLoop->hasLoopInvariantOperands(&I) &&
950  MustExecuteWithoutWritesBefore(I)) {
951  hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
952  MSSAU, SE, ORE);
953  HoistedInstructions.push_back(&I);
954  Changed = true;
955  continue;
956  }
957 
958  if (PHINode *PN = dyn_cast<PHINode>(&I)) {
959  if (CFH.canHoistPHI(PN)) {
960  // Redirect incoming blocks first to ensure that we create hoisted
961  // versions of those blocks before we hoist the phi.
962  for (unsigned int i = 0; i < PN->getNumIncomingValues(); ++i)
963  PN->setIncomingBlock(
964  i, CFH.getOrCreateHoistedBlock(PN->getIncomingBlock(i)));
965  hoist(*PN, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
966  MSSAU, SE, ORE);
967  assert(DT->dominates(PN, BB) && "Conditional PHIs not expected");
968  Changed = true;
969  continue;
970  }
971  }
972 
973  // Remember possibly hoistable branches so we can actually hoist them
974  // later if needed.
975  if (BranchInst *BI = dyn_cast<BranchInst>(&I))
976  CFH.registerPossiblyHoistableBranch(BI);
977  }
978  }
979 
980  // If we hoisted instructions to a conditional block they may not dominate
981  // their uses that weren't hoisted (such as phis where some operands are not
982  // loop invariant). If so make them unconditional by moving them to their
983  // immediate dominator. We iterate through the instructions in reverse order
984  // which ensures that when we rehoist an instruction we rehoist its operands,
985  // and also keep track of where in the block we are rehoisting to to make sure
986  // that we rehoist instructions before the instructions that use them.
987  Instruction *HoistPoint = nullptr;
988  if (ControlFlowHoisting) {
989  for (Instruction *I : reverse(HoistedInstructions)) {
990  if (!llvm::all_of(I->uses(),
991  [&](Use &U) { return DT->dominates(I, U); })) {
992  BasicBlock *Dominator =
993  DT->getNode(I->getParent())->getIDom()->getBlock();
994  if (!HoistPoint || !DT->dominates(HoistPoint->getParent(), Dominator)) {
995  if (HoistPoint)
996  assert(DT->dominates(Dominator, HoistPoint->getParent()) &&
997  "New hoist point expected to dominate old hoist point");
998  HoistPoint = Dominator->getTerminator();
999  }
1000  LLVM_DEBUG(dbgs() << "LICM rehoisting to "
1001  << HoistPoint->getParent()->getNameOrAsOperand()
1002  << ": " << *I << "\n");
1003  moveInstructionBefore(*I, *HoistPoint, *SafetyInfo, MSSAU, SE);
1004  HoistPoint = I;
1005  Changed = true;
1006  }
1007  }
1008  }
1009  if (VerifyMemorySSA)
1010  MSSAU.getMemorySSA()->verifyMemorySSA();
1011 
1012  // Now that we've finished hoisting make sure that LI and DT are still
1013  // valid.
1014 #ifdef EXPENSIVE_CHECKS
1015  if (Changed) {
1017  "Dominator tree verification failed");
1018  LI->verify(*DT);
1019  }
1020 #endif
1021 
1022  return Changed;
1023 }
1024 
1025 // Return true if LI is invariant within scope of the loop. LI is invariant if
1026 // CurLoop is dominated by an invariant.start representing the same memory
1027 // location and size as the memory location LI loads from, and also the
1028 // invariant.start has no uses.
1030  Loop *CurLoop) {
1031  Value *Addr = LI->getOperand(0);
1032  const DataLayout &DL = LI->getModule()->getDataLayout();
1033  const TypeSize LocSizeInBits = DL.getTypeSizeInBits(LI->getType());
1034 
1035  // It is not currently possible for clang to generate an invariant.start
1036  // intrinsic with scalable vector types because we don't support thread local
1037  // sizeless types and we don't permit sizeless types in structs or classes.
1038  // Furthermore, even if support is added for this in future the intrinsic
1039  // itself is defined to have a size of -1 for variable sized objects. This
1040  // makes it impossible to verify if the intrinsic envelops our region of
1041  // interest. For example, both <vscale x 32 x i8> and <vscale x 16 x i8>
1042  // types would have a -1 parameter, but the former is clearly double the size
1043  // of the latter.
1044  if (LocSizeInBits.isScalable())
1045  return false;
1046 
1047  // if the type is i8 addrspace(x)*, we know this is the type of
1048  // llvm.invariant.start operand
1049  auto *PtrInt8Ty = PointerType::get(Type::getInt8Ty(LI->getContext()),
1050  LI->getPointerAddressSpace());
1051  unsigned BitcastsVisited = 0;
1052  // Look through bitcasts until we reach the i8* type (this is invariant.start
1053  // operand type).
1054  while (Addr->getType() != PtrInt8Ty) {
1055  auto *BC = dyn_cast<BitCastInst>(Addr);
1056  // Avoid traversing high number of bitcast uses.
1057  if (++BitcastsVisited > MaxNumUsesTraversed || !BC)
1058  return false;
1059  Addr = BC->getOperand(0);
1060  }
1061  // If we've ended up at a global/constant, bail. We shouldn't be looking at
1062  // uselists for non-local Values in a loop pass.
1063  if (isa<Constant>(Addr))
1064  return false;
1065 
1066  unsigned UsesVisited = 0;
1067  // Traverse all uses of the load operand value, to see if invariant.start is
1068  // one of the uses, and whether it dominates the load instruction.
1069  for (auto *U : Addr->users()) {
1070  // Avoid traversing for Load operand with high number of users.
1071  if (++UsesVisited > MaxNumUsesTraversed)
1072  return false;
1073  IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
1074  // If there are escaping uses of invariant.start instruction, the load maybe
1075  // non-invariant.
1076  if (!II || II->getIntrinsicID() != Intrinsic::invariant_start ||
1077  !II->use_empty())
1078  continue;
1079  ConstantInt *InvariantSize = cast<ConstantInt>(II->getArgOperand(0));
1080  // The intrinsic supports having a -1 argument for variable sized objects
1081  // so we should check for that here.
1082  if (InvariantSize->isNegative())
1083  continue;
1084  uint64_t InvariantSizeInBits = InvariantSize->getSExtValue() * 8;
1085  // Confirm the invariant.start location size contains the load operand size
1086  // in bits. Also, the invariant.start should dominate the load, and we
1087  // should not hoist the load out of a loop that contains this dominating
1088  // invariant.start.
1089  if (LocSizeInBits.getFixedSize() <= InvariantSizeInBits &&
1090  DT->properlyDominates(II->getParent(), CurLoop->getHeader()))
1091  return true;
1092  }
1093 
1094  return false;
1095 }
1096 
1097 namespace {
1098 /// Return true if-and-only-if we know how to (mechanically) both hoist and
1099 /// sink a given instruction out of a loop. Does not address legality
1100 /// concerns such as aliasing or speculation safety.
1101 bool isHoistableAndSinkableInst(Instruction &I) {
1102  // Only these instructions are hoistable/sinkable.
1103  return (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<CallInst>(I) ||
1104  isa<FenceInst>(I) || isa<CastInst>(I) || isa<UnaryOperator>(I) ||
1105  isa<BinaryOperator>(I) || isa<SelectInst>(I) ||
1106  isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
1107  isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
1108  isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
1109  isa<InsertValueInst>(I) || isa<FreezeInst>(I));
1110 }
1111 /// Return true if MSSA knows there are no MemoryDefs in the loop.
1112 bool isReadOnly(const MemorySSAUpdater &MSSAU, const Loop *L) {
1113  for (auto *BB : L->getBlocks())
1114  if (MSSAU.getMemorySSA()->getBlockDefs(BB))
1115  return false;
1116  return true;
1117 }
1118 
1119 /// Return true if I is the only Instruction with a MemoryAccess in L.
1120 bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
1121  const MemorySSAUpdater &MSSAU) {
1122  for (auto *BB : L->getBlocks())
1123  if (auto *Accs = MSSAU.getMemorySSA()->getBlockAccesses(BB)) {
1124  int NotAPhi = 0;
1125  for (const auto &Acc : *Accs) {
1126  if (isa<MemoryPhi>(&Acc))
1127  continue;
1128  const auto *MUD = cast<MemoryUseOrDef>(&Acc);
1129  if (MUD->getMemoryInst() != I || NotAPhi++ == 1)
1130  return false;
1131  }
1132  }
1133  return true;
1134 }
1135 }
1136 
1138  Loop *CurLoop, MemorySSAUpdater &MSSAU,
1139  bool TargetExecutesOncePerLoop,
1140  SinkAndHoistLICMFlags &Flags,
1142  // If we don't understand the instruction, bail early.
1143  if (!isHoistableAndSinkableInst(I))
1144  return false;
1145 
1146  MemorySSA *MSSA = MSSAU.getMemorySSA();
1147  // Loads have extra constraints we have to verify before we can hoist them.
1148  if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
1149  if (!LI->isUnordered())
1150  return false; // Don't sink/hoist volatile or ordered atomic loads!
1151 
1152  // Loads from constant memory are always safe to move, even if they end up
1153  // in the same alias set as something that ends up being modified.
1154  if (AA->pointsToConstantMemory(LI->getOperand(0)))
1155  return true;
1156  if (LI->hasMetadata(LLVMContext::MD_invariant_load))
1157  return true;
1158 
1159  if (LI->isAtomic() && !TargetExecutesOncePerLoop)
1160  return false; // Don't risk duplicating unordered loads
1161 
1162  // This checks for an invariant.start dominating the load.
1163  if (isLoadInvariantInLoop(LI, DT, CurLoop))
1164  return true;
1165 
1166  bool Invalidated = pointerInvalidatedByLoop(
1167  MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop, I, Flags);
1168  // Check loop-invariant address because this may also be a sinkable load
1169  // whose address is not necessarily loop-invariant.
1170  if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand()))
1171  ORE->emit([&]() {
1172  return OptimizationRemarkMissed(
1173  DEBUG_TYPE, "LoadWithLoopInvariantAddressInvalidated", LI)
1174  << "failed to move load with loop-invariant address "
1175  "because the loop may invalidate its value";
1176  });
1177 
1178  return !Invalidated;
1179  } else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
1180  // Don't sink or hoist dbg info; it's legal, but not useful.
1181  if (isa<DbgInfoIntrinsic>(I))
1182  return false;
1183 
1184  // Don't sink calls which can throw.
1185  if (CI->mayThrow())
1186  return false;
1187 
1188  // Convergent attribute has been used on operations that involve
1189  // inter-thread communication which results are implicitly affected by the
1190  // enclosing control flows. It is not safe to hoist or sink such operations
1191  // across control flow.
1192  if (CI->isConvergent())
1193  return false;
1194 
1195  using namespace PatternMatch;
1196  if (match(CI, m_Intrinsic<Intrinsic::assume>()))
1197  // Assumes don't actually alias anything or throw
1198  return true;
1199 
1200  if (match(CI, m_Intrinsic<Intrinsic::experimental_widenable_condition>()))
1201  // Widenable conditions don't actually alias anything or throw
1202  return true;
1203 
1204  // Handle simple cases by querying alias analysis.
1205  FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI);
1206  if (Behavior == FMRB_DoesNotAccessMemory)
1207  return true;
1208  if (AAResults::onlyReadsMemory(Behavior)) {
1209  // A readonly argmemonly function only reads from memory pointed to by
1210  // it's arguments with arbitrary offsets. If we can prove there are no
1211  // writes to this memory in the loop, we can hoist or sink.
1212  if (AAResults::onlyAccessesArgPointees(Behavior)) {
1213  // TODO: expand to writeable arguments
1214  for (Value *Op : CI->args())
1215  if (Op->getType()->isPointerTy() &&
1217  MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop, I,
1218  Flags))
1219  return false;
1220  return true;
1221  }
1222 
1223  // If this call only reads from memory and there are no writes to memory
1224  // in the loop, we can hoist or sink the call as appropriate.
1225  if (isReadOnly(MSSAU, CurLoop))
1226  return true;
1227  }
1228 
1229  // FIXME: This should use mod/ref information to see if we can hoist or
1230  // sink the call.
1231 
1232  return false;
1233  } else if (auto *FI = dyn_cast<FenceInst>(&I)) {
1234  // Fences alias (most) everything to provide ordering. For the moment,
1235  // just give up if there are any other memory operations in the loop.
1236  return isOnlyMemoryAccess(FI, CurLoop, MSSAU);
1237  } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
1238  if (!SI->isUnordered())
1239  return false; // Don't sink/hoist volatile or ordered atomic store!
1240 
1241  // We can only hoist a store that we can prove writes a value which is not
1242  // read or overwritten within the loop. For those cases, we fallback to
1243  // load store promotion instead. TODO: We can extend this to cases where
1244  // there is exactly one write to the location and that write dominates an
1245  // arbitrary number of reads in the loop.
1246  if (isOnlyMemoryAccess(SI, CurLoop, MSSAU))
1247  return true;
1248  // If there are more accesses than the Promotion cap or no "quota" to
1249  // check clobber, then give up as we're not walking a list that long.
1250  if (Flags.tooManyMemoryAccesses() || Flags.tooManyClobberingCalls())
1251  return false;
1252  // If there are interfering Uses (i.e. their defining access is in the
1253  // loop), or ordered loads (stored as Defs!), don't move this store.
1254  // Could do better here, but this is conservatively correct.
1255  // TODO: Cache set of Uses on the first walk in runOnLoop, update when
1256  // moving accesses. Can also extend to dominating uses.
1257  auto *SIMD = MSSA->getMemoryAccess(SI);
1258  for (auto *BB : CurLoop->getBlocks())
1259  if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
1260  for (const auto &MA : *Accesses)
1261  if (const auto *MU = dyn_cast<MemoryUse>(&MA)) {
1262  auto *MD = MU->getDefiningAccess();
1263  if (!MSSA->isLiveOnEntryDef(MD) &&
1264  CurLoop->contains(MD->getBlock()))
1265  return false;
1266  // Disable hoisting past potentially interfering loads. Optimized
1267  // Uses may point to an access outside the loop, as getClobbering
1268  // checks the previous iteration when walking the backedge.
1269  // FIXME: More precise: no Uses that alias SI.
1270  if (!Flags.getIsSink() && !MSSA->dominates(SIMD, MU))
1271  return false;
1272  } else if (const auto *MD = dyn_cast<MemoryDef>(&MA)) {
1273  if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) {
1274  (void)LI; // Silence warning.
1275  assert(!LI->isUnordered() && "Expected unordered load");
1276  return false;
1277  }
1278  // Any call, while it may not be clobbering SI, it may be a use.
1279  if (auto *CI = dyn_cast<CallInst>(MD->getMemoryInst())) {
1280  // Check if the call may read from the memory location written
1281  // to by SI. Check CI's attributes and arguments; the number of
1282  // such checks performed is limited above by NoOfMemAccTooLarge.
1283  ModRefInfo MRI = AA->getModRefInfo(CI, MemoryLocation::get(SI));
1284  if (isModOrRefSet(MRI))
1285  return false;
1286  }
1287  }
1288  }
1290  Flags.incrementClobberingCalls();
1291  // If there are no clobbering Defs in the loop, store is safe to hoist.
1292  return MSSA->isLiveOnEntryDef(Source) ||
1293  !CurLoop->contains(Source->getBlock());
1294  }
1295 
1296  assert(!I.mayReadOrWriteMemory() && "unhandled aliasing");
1297 
1298  // We've established mechanical ability and aliasing, it's up to the caller
1299  // to check fault safety
1300  return true;
1301 }
1302 
1303 /// Returns true if a PHINode is a trivially replaceable with an
1304 /// Instruction.
1305 /// This is true when all incoming values are that instruction.
1306 /// This pattern occurs most often with LCSSA PHI nodes.
1307 ///
1308 static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I) {
1309  for (const Value *IncValue : PN.incoming_values())
1310  if (IncValue != &I)
1311  return false;
1312 
1313  return true;
1314 }
1315 
1316 /// Return true if the instruction is free in the loop.
1317 static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop,
1318  const TargetTransformInfo *TTI) {
1319 
1320  if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) {
1321  if (TTI->getUserCost(GEP, TargetTransformInfo::TCK_SizeAndLatency) !=
1322  TargetTransformInfo::TCC_Free)
1323  return false;
1324  // For a GEP, we cannot simply use getUserCost because currently it
1325  // optimistically assumes that a GEP will fold into addressing mode
1326  // regardless of its users.
1327  const BasicBlock *BB = GEP->getParent();
1328  for (const User *U : GEP->users()) {
1329  const Instruction *UI = cast<Instruction>(U);
1330  if (CurLoop->contains(UI) &&
1331  (BB != UI->getParent() ||
1332  (!isa<StoreInst>(UI) && !isa<LoadInst>(UI))))
1333  return false;
1334  }
1335  return true;
1336  } else
1337  return TTI->getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency) ==
1338  TargetTransformInfo::TCC_Free;
1339 }
1340 
1341 /// Return true if the only users of this instruction are outside of
1342 /// the loop. If this is true, we can sink the instruction to the exit
1343 /// blocks of the loop.
1344 ///
1345 /// We also return true if the instruction could be folded away in lowering.
1346 /// (e.g., a GEP can be folded into a load as an addressing mode in the loop).
1347 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
1348  const LoopSafetyInfo *SafetyInfo,
1349  TargetTransformInfo *TTI, bool &FreeInLoop,
1350  bool LoopNestMode) {
1351  const auto &BlockColors = SafetyInfo->getBlockColors();
1352  bool IsFree = isFreeInLoop(I, CurLoop, TTI);
1353  for (const User *U : I.users()) {
1354  const Instruction *UI = cast<Instruction>(U);
1355  if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
1356  const BasicBlock *BB = PN->getParent();
1357  // We cannot sink uses in catchswitches.
1358  if (isa<CatchSwitchInst>(BB->getTerminator()))
1359  return false;
1360 
1361  // We need to sink a callsite to a unique funclet. Avoid sinking if the
1362  // phi use is too muddled.
1363  if (isa<CallInst>(I))
1364  if (!BlockColors.empty() &&
1365  BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1)
1366  return false;
1367 
1368  if (LoopNestMode) {
1369  while (isa<PHINode>(UI) && UI->hasOneUser() &&
1370  UI->getNumOperands() == 1) {
1371  if (!CurLoop->contains(UI))
1372  break;
1373  UI = cast<Instruction>(UI->user_back());
1374  }
1375  }
1376  }
1377 
1378  if (CurLoop->contains(UI)) {
1379  if (IsFree) {
1380  FreeInLoop = true;
1381  continue;
1382  }
1383  return false;
1384  }
1385  }
1386  return true;
1387 }
1388 
1390  Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
1391  const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU) {
1392  Instruction *New;
1393  if (auto *CI = dyn_cast<CallInst>(&I)) {
1394  const auto &BlockColors = SafetyInfo->getBlockColors();
1395 
1396  // Sinking call-sites need to be handled differently from other
1397  // instructions. The cloned call-site needs a funclet bundle operand
1398  // appropriate for its location in the CFG.
1400  for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles();
1401  BundleIdx != BundleEnd; ++BundleIdx) {
1402  OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx);
1403  if (Bundle.getTagID() == LLVMContext::OB_funclet)
1404  continue;
1405 
1406  OpBundles.emplace_back(Bundle);
1407  }
1408 
1409  if (!BlockColors.empty()) {
1410  const ColorVector &CV = BlockColors.find(&ExitBlock)->second;
1411  assert(CV.size() == 1 && "non-unique color for exit block!");
1412  BasicBlock *BBColor = CV.front();
1413  Instruction *EHPad = BBColor->getFirstNonPHI();
1414  if (EHPad->isEHPad())
1415  OpBundles.emplace_back("funclet", EHPad);
1416  }
1417 
1418  New = CallInst::Create(CI, OpBundles);
1419  } else {
1420  New = I.clone();
1421  }
1422 
1423  ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New);
1424  if (!I.getName().empty())
1425  New->setName(I.getName() + ".le");
1426 
1427  if (MSSAU.getMemorySSA()->getMemoryAccess(&I)) {
1428  // Create a new MemoryAccess and let MemorySSA set its defining access.
1429  MemoryAccess *NewMemAcc = MSSAU.createMemoryAccessInBB(
1430  New, nullptr, New->getParent(), MemorySSA::Beginning);
1431  if (NewMemAcc) {
1432  if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc))
1433  MSSAU.insertDef(MemDef, /*RenameUses=*/true);
1434  else {
1435  auto *MemUse = cast<MemoryUse>(NewMemAcc);
1436  MSSAU.insertUse(MemUse, /*RenameUses=*/true);
1437  }
1438  }
1439  }
1440 
1441  // Build LCSSA PHI nodes for any in-loop operands (if legal). Note that
1442  // this is particularly cheap because we can rip off the PHI node that we're
1443  // replacing for the number and blocks of the predecessors.
1444  // OPT: If this shows up in a profile, we can instead finish sinking all
1445  // invariant instructions, and then walk their operands to re-establish
1446  // LCSSA. That will eliminate creating PHI nodes just to nuke them when
1447  // sinking bottom-up.
1448  for (Use &Op : New->operands())
1449  if (LI->wouldBeOutOfLoopUseRequiringLCSSA(Op.get(), PN.getParent())) {
1450  auto *OInst = cast<Instruction>(Op.get());
1451  PHINode *OpPN =
1452  PHINode::Create(OInst->getType(), PN.getNumIncomingValues(),
1453  OInst->getName() + ".lcssa", &ExitBlock.front());
1454  for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
1455  OpPN->addIncoming(OInst, PN.getIncomingBlock(i));
1456  Op = OpPN;
1457  }
1458  return New;
1459 }
1460 
1462  MemorySSAUpdater &MSSAU) {
1463  MSSAU.removeMemoryAccess(&I);
1464  SafetyInfo.removeInstruction(&I);
1465  I.eraseFromParent();
1466 }
1467 
1469  ICFLoopSafetyInfo &SafetyInfo,
1470  MemorySSAUpdater &MSSAU,
1471  ScalarEvolution *SE) {
1472  SafetyInfo.removeInstruction(&I);
1473  SafetyInfo.insertInstructionTo(&I, Dest.getParent());
1474  I.moveBefore(&Dest);
1475  if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
1476  MSSAU.getMemorySSA()->getMemoryAccess(&I)))
1477  MSSAU.moveToPlace(OldMemAcc, Dest.getParent(), MemorySSA::BeforeTerminator);
1478  if (SE)
1479  SE->forgetValue(&I);
1480 }
1481 
1483  PHINode *TPN, Instruction *I, LoopInfo *LI,
1485  const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop,
1486  MemorySSAUpdater &MSSAU) {
1488  "Expect only trivially replaceable PHI");
1489  BasicBlock *ExitBlock = TPN->getParent();
1490  Instruction *New;
1491  auto It = SunkCopies.find(ExitBlock);
1492  if (It != SunkCopies.end())
1493  New = It->second;
1494  else
1495  New = SunkCopies[ExitBlock] = cloneInstructionInExitBlock(
1496  *I, *ExitBlock, *TPN, LI, SafetyInfo, MSSAU);
1497  return New;
1498 }
1499 
1500 static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) {
1501  BasicBlock *BB = PN->getParent();
1502  if (!BB->canSplitPredecessors())
1503  return false;
1504  // It's not impossible to split EHPad blocks, but if BlockColors already exist
1505  // it require updating BlockColors for all offspring blocks accordingly. By
1506  // skipping such corner case, we can make updating BlockColors after splitting
1507  // predecessor fairly simple.
1508  if (!SafetyInfo->getBlockColors().empty() && BB->getFirstNonPHI()->isEHPad())
1509  return false;
1510  for (BasicBlock *BBPred : predecessors(BB)) {
1511  if (isa<IndirectBrInst>(BBPred->getTerminator()) ||
1512  isa<CallBrInst>(BBPred->getTerminator()))
1513  return false;
1514  }
1515  return true;
1516 }
1517 
1519  LoopInfo *LI, const Loop *CurLoop,
1520  LoopSafetyInfo *SafetyInfo,
1521  MemorySSAUpdater *MSSAU) {
1522 #ifndef NDEBUG
1523  SmallVector<BasicBlock *, 32> ExitBlocks;
1524  CurLoop->getUniqueExitBlocks(ExitBlocks);
1525  SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
1526  ExitBlocks.end());
1527 #endif
1528  BasicBlock *ExitBB = PN->getParent();
1529  assert(ExitBlockSet.count(ExitBB) && "Expect the PHI is in an exit block.");
1530 
1531  // Split predecessors of the loop exit to make instructions in the loop are
1532  // exposed to exit blocks through trivially replaceable PHIs while keeping the
1533  // loop in the canonical form where each predecessor of each exit block should
1534  // be contained within the loop. For example, this will convert the loop below
1535  // from
1536  //
1537  // LB1:
1538  // %v1 =
1539  // br %LE, %LB2
1540  // LB2:
1541  // %v2 =
1542  // br %LE, %LB1
1543  // LE:
1544  // %p = phi [%v1, %LB1], [%v2, %LB2] <-- non-trivially replaceable
1545  //
1546  // to
1547  //
1548  // LB1:
1549  // %v1 =
1550  // br %LE.split, %LB2
1551  // LB2:
1552  // %v2 =
1553  // br %LE.split2, %LB1
1554  // LE.split:
1555  // %p1 = phi [%v1, %LB1] <-- trivially replaceable
1556  // br %LE
1557  // LE.split2:
1558  // %p2 = phi [%v2, %LB2] <-- trivially replaceable
1559  // br %LE
1560  // LE:
1561  // %p = phi [%p1, %LE.split], [%p2, %LE.split2]
1562  //
1563  const auto &BlockColors = SafetyInfo->getBlockColors();
1564  SmallSetVector<BasicBlock *, 8> PredBBs(pred_begin(ExitBB), pred_end(ExitBB));
1565  while (!PredBBs.empty()) {
1566  BasicBlock *PredBB = *PredBBs.begin();
1567  assert(CurLoop->contains(PredBB) &&
1568  "Expect all predecessors are in the loop");
1569  if (PN->getBasicBlockIndex(PredBB) >= 0) {
1571  ExitBB, PredBB, ".split.loop.exit", DT, LI, MSSAU, true);
1572  // Since we do not allow splitting EH-block with BlockColors in
1573  // canSplitPredecessors(), we can simply assign predecessor's color to
1574  // the new block.
1575  if (!BlockColors.empty())
1576  // Grab a reference to the ColorVector to be inserted before getting the
1577  // reference to the vector we are copying because inserting the new
1578  // element in BlockColors might cause the map to be reallocated.
1579  SafetyInfo->copyColors(NewPred, PredBB);
1580  }
1581  PredBBs.remove(PredBB);
1582  }
1583 }
1584 
1585 /// When an instruction is found to only be used outside of the loop, this
1586 /// function moves it to the exit blocks and patches up SSA form as needed.
1587 /// This method is guaranteed to remove the original instruction from its
1588 /// position, and may either delete it or move it to outside of the loop.
1589 ///
1590 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
1591  BlockFrequencyInfo *BFI, const Loop *CurLoop,
1592  ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU,
1594  bool Changed = false;
1595  LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
1596 
1597  // Iterate over users to be ready for actual sinking. Replace users via
1598  // unreachable blocks with undef and make all user PHIs trivially replaceable.
1599  SmallPtrSet<Instruction *, 8> VisitedUsers;
1600  for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) {
1601  auto *User = cast<Instruction>(*UI);
1602  Use &U = UI.getUse();
1603  ++UI;
1604 
1605  if (VisitedUsers.count(User) || CurLoop->contains(User))
1606  continue;
1607 
1608  if (!DT->isReachableFromEntry(User->getParent())) {
1609  U = UndefValue::get(I.getType());
1610  Changed = true;
1611  continue;
1612  }
1613 
1614  // The user must be a PHI node.
1615  PHINode *PN = cast<PHINode>(User);
1616 
1617  // Surprisingly, instructions can be used outside of loops without any
1618  // exits. This can only happen in PHI nodes if the incoming block is
1619  // unreachable.
1620  BasicBlock *BB = PN->getIncomingBlock(U);
1621  if (!DT->isReachableFromEntry(BB)) {
1622  U = UndefValue::get(I.getType());
1623  Changed = true;
1624  continue;
1625  }
1626 
1627  VisitedUsers.insert(PN);
1628  if (isTriviallyReplaceablePHI(*PN, I))
1629  continue;
1630 
1631  if (!canSplitPredecessors(PN, SafetyInfo))
1632  return Changed;
1633 
1634  // Split predecessors of the PHI so that we can make users trivially
1635  // replaceable.
1636  splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo, &MSSAU);
1637 
1638  // Should rebuild the iterators, as they may be invalidated by
1639  // splitPredecessorsOfLoopExit().
1640  UI = I.user_begin();
1641  UE = I.user_end();
1642  }
1643 
1644  if (VisitedUsers.empty())
1645  return Changed;
1646 
1647  ORE->emit([&]() {
1648  return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
1649  << "sinking " << ore::NV("Inst", &I);
1650  });
1651  if (isa<LoadInst>(I))
1652  ++NumMovedLoads;
1653  else if (isa<CallInst>(I))
1654  ++NumMovedCalls;
1655  ++NumSunk;
1656 
1657 #ifndef NDEBUG
1658  SmallVector<BasicBlock *, 32> ExitBlocks;
1659  CurLoop->getUniqueExitBlocks(ExitBlocks);
1660  SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
1661  ExitBlocks.end());
1662 #endif
1663 
1664  // Clones of this instruction. Don't create more than one per exit block!
1666 
1667  // If this instruction is only used outside of the loop, then all users are
1668  // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
1669  // the instruction.
1670  // First check if I is worth sinking for all uses. Sink only when it is worth
1671  // across all uses.
1672  SmallSetVector<User*, 8> Users(I.user_begin(), I.user_end());
1673  for (auto *UI : Users) {
1674  auto *User = cast<Instruction>(UI);
1675 
1676  if (CurLoop->contains(User))
1677  continue;
1678 
1679  PHINode *PN = cast<PHINode>(User);
1680  assert(ExitBlockSet.count(PN->getParent()) &&
1681  "The LCSSA PHI is not in an exit block!");
1682 
1683  // The PHI must be trivially replaceable.
1685  PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU);
1686  PN->replaceAllUsesWith(New);
1687  eraseInstruction(*PN, *SafetyInfo, MSSAU);
1688  Changed = true;
1689  }
1690  return Changed;
1691 }
1692 
1693 /// When an instruction is found to only use loop invariant operands that
1694 /// is safe to hoist, this instruction is called to do the dirty work.
1695 ///
1696 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
1697  BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
1698  MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
1700  LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getNameOrAsOperand() << ": "
1701  << I << "\n");
1702  ORE->emit([&]() {
1703  return OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) << "hoisting "
1704  << ore::NV("Inst", &I);
1705  });
1706 
1707  // Metadata can be dependent on conditions we are hoisting above.
1708  // Conservatively strip all metadata on the instruction unless we were
1709  // guaranteed to execute I if we entered the loop, in which case the metadata
1710  // is valid in the loop preheader.
1711  // Similarly, If I is a call and it is not guaranteed to execute in the loop,
1712  // then moving to the preheader means we should strip attributes on the call
1713  // that can cause UB since we may be hoisting above conditions that allowed
1714  // inferring those attributes. They may not be valid at the preheader.
1715  if ((I.hasMetadataOtherThanDebugLoc() || isa<CallInst>(I)) &&
1716  // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning
1717  // time in isGuaranteedToExecute if we don't actually have anything to
1718  // drop. It is a compile time optimization, not required for correctness.
1719  !SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop))
1720  I.dropUndefImplyingAttrsAndUnknownMetadata();
1721 
1722  if (isa<PHINode>(I))
1723  // Move the new node to the end of the phi list in the destination block.
1724  moveInstructionBefore(I, *Dest->getFirstNonPHI(), *SafetyInfo, MSSAU, SE);
1725  else
1726  // Move the new node to the destination block, before its terminator.
1727  moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo, MSSAU, SE);
1728 
1729  I.updateLocationAfterHoist();
1730 
1731  if (isa<LoadInst>(I))
1732  ++NumMovedLoads;
1733  else if (isa<CallInst>(I))
1734  ++NumMovedCalls;
1735  ++NumHoisted;
1736 }
1737 
1738 /// Only sink or hoist an instruction if it is not a trapping instruction,
1739 /// or if the instruction is known not to trap when moved to the preheader.
1740 /// or if it is a trapping instruction and is guaranteed to execute.
1742  Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI,
1743  const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
1744  OptimizationRemarkEmitter *ORE, const Instruction *CtxI,
1745  bool AllowSpeculation) {
1746  if (AllowSpeculation && isSafeToSpeculativelyExecute(&Inst, CtxI, DT, TLI))
1747  return true;
1748 
1749  bool GuaranteedToExecute =
1750  SafetyInfo->isGuaranteedToExecute(Inst, DT, CurLoop);
1751 
1752  if (!GuaranteedToExecute) {
1753  auto *LI = dyn_cast<LoadInst>(&Inst);
1754  if (LI && CurLoop->isLoopInvariant(LI->getPointerOperand()))
1755  ORE->emit([&]() {
1756  return OptimizationRemarkMissed(
1757  DEBUG_TYPE, "LoadWithLoopInvariantAddressCondExecuted", LI)
1758  << "failed to hoist load with loop-invariant address "
1759  "because load is conditionally executed";
1760  });
1761  }
1762 
1763  return GuaranteedToExecute;
1764 }
1765 
1766 namespace {
1767 class LoopPromoter : public LoadAndStorePromoter {
1768  Value *SomePtr; // Designated pointer to store to.
1769  const SmallSetVector<Value *, 8> &PointerMustAliases;
1770  SmallVectorImpl<BasicBlock *> &LoopExitBlocks;
1771  SmallVectorImpl<Instruction *> &LoopInsertPts;
1772  SmallVectorImpl<MemoryAccess *> &MSSAInsertPts;
1773  PredIteratorCache &PredCache;
1774  MemorySSAUpdater &MSSAU;
1775  LoopInfo &LI;
1776  DebugLoc DL;
1777  Align Alignment;
1778  bool UnorderedAtomic;
1779  AAMDNodes AATags;
1780  ICFLoopSafetyInfo &SafetyInfo;
1781  bool CanInsertStoresInExitBlocks;
1782 
1783  // We're about to add a use of V in a loop exit block. Insert an LCSSA phi
1784  // (if legal) if doing so would add an out-of-loop use to an instruction
1785  // defined in-loop.
1786  Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
1788  return V;
1789 
1790  Instruction *I = cast<Instruction>(V);
1791  // We need to create an LCSSA PHI node for the incoming value and
1792  // store that.
1793  PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB),
1794  I->getName() + ".lcssa", &BB->front());
1795  for (BasicBlock *Pred : PredCache.get(BB))
1796  PN->addIncoming(I, Pred);
1797  return PN;
1798  }
1799 
1800 public:
1801  LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S,
1802  const SmallSetVector<Value *, 8> &PMA,
1806  MemorySSAUpdater &MSSAU, LoopInfo &li, DebugLoc dl,
1807  Align Alignment, bool UnorderedAtomic, const AAMDNodes &AATags,
1808  ICFLoopSafetyInfo &SafetyInfo, bool CanInsertStoresInExitBlocks)
1809  : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
1810  LoopExitBlocks(LEB), LoopInsertPts(LIP), MSSAInsertPts(MSSAIP),
1811  PredCache(PIC), MSSAU(MSSAU), LI(li), DL(std::move(dl)),
1812  Alignment(Alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags),
1813  SafetyInfo(SafetyInfo),
1814  CanInsertStoresInExitBlocks(CanInsertStoresInExitBlocks) {}
1815 
1816  bool isInstInList(Instruction *I,
1817  const SmallVectorImpl<Instruction *> &) const override {
1818  Value *Ptr;
1819  if (LoadInst *LI = dyn_cast<LoadInst>(I))
1820  Ptr = LI->getOperand(0);
1821  else
1822  Ptr = cast<StoreInst>(I)->getPointerOperand();
1823  return PointerMustAliases.count(Ptr);
1824  }
1825 
1826  void insertStoresInLoopExitBlocks() {
1827  // Insert stores after in the loop exit blocks. Each exit block gets a
1828  // store of the live-out values that feed them. Since we've already told
1829  // the SSA updater about the defs in the loop and the preheader
1830  // definition, it is all set and we can start using it.
1831  for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
1832  BasicBlock *ExitBlock = LoopExitBlocks[i];
1833  Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
1834  LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock);
1835  Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
1836  Instruction *InsertPos = LoopInsertPts[i];
1837  StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
1838  if (UnorderedAtomic)
1840  NewSI->setAlignment(Alignment);
1841  NewSI->setDebugLoc(DL);
1842  if (AATags)
1843  NewSI->setAAMetadata(AATags);
1844 
1845  MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i];
1846  MemoryAccess *NewMemAcc;
1847  if (!MSSAInsertPoint) {
1848  NewMemAcc = MSSAU.createMemoryAccessInBB(
1849  NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning);
1850  } else {
1851  NewMemAcc =
1852  MSSAU.createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint);
1853  }
1854  MSSAInsertPts[i] = NewMemAcc;
1855  MSSAU.insertDef(cast<MemoryDef>(NewMemAcc), true);
1856  // FIXME: true for safety, false may still be correct.
1857  }
1858  }
1859 
1860  void doExtraRewritesBeforeFinalDeletion() override {
1861  if (CanInsertStoresInExitBlocks)
1862  insertStoresInLoopExitBlocks();
1863  }
1864 
1865  void instructionDeleted(Instruction *I) const override {
1866  SafetyInfo.removeInstruction(I);
1867  MSSAU.removeMemoryAccess(I);
1868  }
1869 
1870  bool shouldDelete(Instruction *I) const override {
1871  if (isa<StoreInst>(I))
1872  return CanInsertStoresInExitBlocks;
1873  return true;
1874  }
1875 };
1876 
1877 bool isNotCapturedBeforeOrInLoop(const Value *V, const Loop *L,
1878  DominatorTree *DT) {
1879  // We can perform the captured-before check against any instruction in the
1880  // loop header, as the loop header is reachable from any instruction inside
1881  // the loop.
1882  // TODO: ReturnCaptures=true shouldn't be necessary here.
1883  return !PointerMayBeCapturedBefore(V, /* ReturnCaptures */ true,
1884  /* StoreCaptures */ true,
1885  L->getHeader()->getTerminator(), DT);
1886 }
1887 
1888 /// Return true if we can prove that a caller cannot inspect the object if an
1889 /// unwind occurs inside the loop.
1890 bool isNotVisibleOnUnwindInLoop(const Value *Object, const Loop *L,
1891  DominatorTree *DT) {
1892  bool RequiresNoCaptureBeforeUnwind;
1893  if (!isNotVisibleOnUnwind(Object, RequiresNoCaptureBeforeUnwind))
1894  return false;
1895 
1896  return !RequiresNoCaptureBeforeUnwind ||
1897  isNotCapturedBeforeOrInLoop(Object, L, DT);
1898 }
1899 
1900 } // namespace
1901 
1902 /// Try to promote memory values to scalars by sinking stores out of the
1903 /// loop and moving loads to before the loop. We do this by looping over
1904 /// the stores in the loop, looking for stores to Must pointers which are
1905 /// loop invariant.
1906 ///
1908  const SmallSetVector<Value *, 8> &PointerMustAliases,
1909  SmallVectorImpl<BasicBlock *> &ExitBlocks,
1910  SmallVectorImpl<Instruction *> &InsertPts,
1912  LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
1913  Loop *CurLoop, MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo,
1914  OptimizationRemarkEmitter *ORE, bool AllowSpeculation) {
1915  // Verify inputs.
1916  assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
1917  SafetyInfo != nullptr &&
1918  "Unexpected Input to promoteLoopAccessesToScalars");
1919 
1920  Value *SomePtr = *PointerMustAliases.begin();
1921  BasicBlock *Preheader = CurLoop->getLoopPreheader();
1922 
1923  // It is not safe to promote a load/store from the loop if the load/store is
1924  // conditional. For example, turning:
1925  //
1926  // for () { if (c) *P += 1; }
1927  //
1928  // into:
1929  //
1930  // tmp = *P; for () { if (c) tmp +=1; } *P = tmp;
1931  //
1932  // is not safe, because *P may only be valid to access if 'c' is true.
1933  //
1934  // The safety property divides into two parts:
1935  // p1) The memory may not be dereferenceable on entry to the loop. In this
1936  // case, we can't insert the required load in the preheader.
1937  // p2) The memory model does not allow us to insert a store along any dynamic
1938  // path which did not originally have one.
1939  //
1940  // If at least one store is guaranteed to execute, both properties are
1941  // satisfied, and promotion is legal.
1942  //
1943  // This, however, is not a necessary condition. Even if no store/load is
1944  // guaranteed to execute, we can still establish these properties.
1945  // We can establish (p1) by proving that hoisting the load into the preheader
1946  // is safe (i.e. proving dereferenceability on all paths through the loop). We
1947  // can use any access within the alias set to prove dereferenceability,
1948  // since they're all must alias.
1949  //
1950  // There are two ways establish (p2):
1951  // a) Prove the location is thread-local. In this case the memory model
1952  // requirement does not apply, and stores are safe to insert.
1953  // b) Prove a store dominates every exit block. In this case, if an exit
1954  // blocks is reached, the original dynamic path would have taken us through
1955  // the store, so inserting a store into the exit block is safe. Note that this
1956  // is different from the store being guaranteed to execute. For instance,
1957  // if an exception is thrown on the first iteration of the loop, the original
1958  // store is never executed, but the exit blocks are not executed either.
1959 
1960  bool DereferenceableInPH = false;
1961  bool SafeToInsertStore = false;
1962  bool FoundLoadToPromote = false;
1963 
1965 
1966  // We start with an alignment of one and try to find instructions that allow
1967  // us to prove better alignment.
1968  Align Alignment;
1969  // Keep track of which types of access we see
1970  bool SawUnorderedAtomic = false;
1971  bool SawNotAtomic = false;
1972  AAMDNodes AATags;
1973 
1974  const DataLayout &MDL = Preheader->getModule()->getDataLayout();
1975 
1976  bool IsKnownThreadLocalObject = false;
1977  if (SafetyInfo->anyBlockMayThrow()) {
1978  // If a loop can throw, we have to insert a store along each unwind edge.
1979  // That said, we can't actually make the unwind edge explicit. Therefore,
1980  // we have to prove that the store is dead along the unwind edge. We do
1981  // this by proving that the caller can't have a reference to the object
1982  // after return and thus can't possibly load from the object.
1983  Value *Object = getUnderlyingObject(SomePtr);
1984  if (!isNotVisibleOnUnwindInLoop(Object, CurLoop, DT))
1985  return false;
1986  // Subtlety: Alloca's aren't visible to callers, but *are* potentially
1987  // visible to other threads if captured and used during their lifetimes.
1988  IsKnownThreadLocalObject = !isa<AllocaInst>(Object);
1989  }
1990 
1991  // Check that all accesses to pointers in the aliass set use the same type.
1992  // We cannot (yet) promote a memory location that is loaded and stored in
1993  // different sizes. While we are at it, collect alignment and AA info.
1994  Type *AccessTy = nullptr;
1995  for (Value *ASIV : PointerMustAliases) {
1996  for (Use &U : ASIV->uses()) {
1997  // Ignore instructions that are outside the loop.
1998  Instruction *UI = dyn_cast<Instruction>(U.getUser());
1999  if (!UI || !CurLoop->contains(UI))
2000  continue;
2001 
2002  // If there is an non-load/store instruction in the loop, we can't promote
2003  // it.
2004  if (LoadInst *Load = dyn_cast<LoadInst>(UI)) {
2005  if (!Load->isUnordered())
2006  return false;
2007 
2008  SawUnorderedAtomic |= Load->isAtomic();
2009  SawNotAtomic |= !Load->isAtomic();
2010  FoundLoadToPromote = true;
2011 
2012  Align InstAlignment = Load->getAlign();
2013 
2014  // Note that proving a load safe to speculate requires proving
2015  // sufficient alignment at the target location. Proving it guaranteed
2016  // to execute does as well. Thus we can increase our guaranteed
2017  // alignment as well.
2018  if (!DereferenceableInPH || (InstAlignment > Alignment))
2020  *Load, DT, TLI, CurLoop, SafetyInfo, ORE,
2021  Preheader->getTerminator(), AllowSpeculation)) {
2022  DereferenceableInPH = true;
2023  Alignment = std::max(Alignment, InstAlignment);
2024  }
2025  } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) {
2026  // Stores *of* the pointer are not interesting, only stores *to* the
2027  // pointer.
2028  if (U.getOperandNo() != StoreInst::getPointerOperandIndex())
2029  continue;
2030  if (!Store->isUnordered())
2031  return false;
2032 
2033  SawUnorderedAtomic |= Store->isAtomic();
2034  SawNotAtomic |= !Store->isAtomic();
2035 
2036  // If the store is guaranteed to execute, both properties are satisfied.
2037  // We may want to check if a store is guaranteed to execute even if we
2038  // already know that promotion is safe, since it may have higher
2039  // alignment than any other guaranteed stores, in which case we can
2040  // raise the alignment on the promoted store.
2041  Align InstAlignment = Store->getAlign();
2042 
2043  if (!DereferenceableInPH || !SafeToInsertStore ||
2044  (InstAlignment > Alignment)) {
2045  if (SafetyInfo->isGuaranteedToExecute(*UI, DT, CurLoop)) {
2046  DereferenceableInPH = true;
2047  SafeToInsertStore = true;
2048  Alignment = std::max(Alignment, InstAlignment);
2049  }
2050  }
2051 
2052  // If a store dominates all exit blocks, it is safe to sink.
2053  // As explained above, if an exit block was executed, a dominating
2054  // store must have been executed at least once, so we are not
2055  // introducing stores on paths that did not have them.
2056  // Note that this only looks at explicit exit blocks. If we ever
2057  // start sinking stores into unwind edges (see above), this will break.
2058  if (!SafeToInsertStore)
2059  SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) {
2060  return DT->dominates(Store->getParent(), Exit);
2061  });
2062 
2063  // If the store is not guaranteed to execute, we may still get
2064  // deref info through it.
2065  if (!DereferenceableInPH) {
2066  DereferenceableInPH = isDereferenceableAndAlignedPointer(
2067  Store->getPointerOperand(), Store->getValueOperand()->getType(),
2068  Store->getAlign(), MDL, Preheader->getTerminator(), DT, TLI);
2069  }
2070  } else
2071  return false; // Not a load or store.
2072 
2073  if (!AccessTy)
2074  AccessTy = getLoadStoreType(UI);
2075  else if (AccessTy != getLoadStoreType(UI))
2076  return false;
2077 
2078  // Merge the AA tags.
2079  if (LoopUses.empty()) {
2080  // On the first load/store, just take its AA tags.
2081  AATags = UI->getAAMetadata();
2082  } else if (AATags) {
2083  AATags = AATags.merge(UI->getAAMetadata());
2084  }
2085 
2086  LoopUses.push_back(UI);
2087  }
2088  }
2089 
2090  // If we found both an unordered atomic instruction and a non-atomic memory
2091  // access, bail. We can't blindly promote non-atomic to atomic since we
2092  // might not be able to lower the result. We can't downgrade since that
2093  // would violate memory model. Also, align 0 is an error for atomics.
2094  if (SawUnorderedAtomic && SawNotAtomic)
2095  return false;
2096 
2097  // If we're inserting an atomic load in the preheader, we must be able to
2098  // lower it. We're only guaranteed to be able to lower naturally aligned
2099  // atomics.
2100  if (SawUnorderedAtomic && Alignment < MDL.getTypeStoreSize(AccessTy))
2101  return false;
2102 
2103  // If we couldn't prove we can hoist the load, bail.
2104  if (!DereferenceableInPH)
2105  return false;
2106 
2107  // We know we can hoist the load, but don't have a guaranteed store.
2108  // Check whether the location is thread-local. If it is, then we can insert
2109  // stores along paths which originally didn't have them without violating the
2110  // memory model.
2111  if (!SafeToInsertStore) {
2112  if (IsKnownThreadLocalObject)
2113  SafeToInsertStore = true;
2114  else {
2115  Value *Object = getUnderlyingObject(SomePtr);
2116  SafeToInsertStore =
2117  (isNoAliasCall(Object) || isa<AllocaInst>(Object)) &&
2118  isNotCapturedBeforeOrInLoop(Object, CurLoop, DT);
2119  }
2120  }
2121 
2122  // If we've still failed to prove we can sink the store, hoist the load
2123  // only, if possible.
2124  if (!SafeToInsertStore && !FoundLoadToPromote)
2125  // If we cannot hoist the load either, give up.
2126  return false;
2127 
2128  // Lets do the promotion!
2129  if (SafeToInsertStore)
2130  LLVM_DEBUG(dbgs() << "LICM: Promoting load/store of the value: " << *SomePtr
2131  << '\n');
2132  else
2133  LLVM_DEBUG(dbgs() << "LICM: Promoting load of the value: " << *SomePtr
2134  << '\n');
2135 
2136  ORE->emit([&]() {
2137  return OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar",
2138  LoopUses[0])
2139  << "Moving accesses to memory location out of the loop";
2140  });
2141  ++NumPromoted;
2142 
2143  // Look at all the loop uses, and try to merge their locations.
2144  std::vector<const DILocation *> LoopUsesLocs;
2145  for (auto U : LoopUses)
2146  LoopUsesLocs.push_back(U->getDebugLoc().get());
2147  auto DL = DebugLoc(DILocation::getMergedLocations(LoopUsesLocs));
2148 
2149  // We use the SSAUpdater interface to insert phi nodes as required.
2151  SSAUpdater SSA(&NewPHIs);
2152  LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
2153  InsertPts, MSSAInsertPts, PIC, MSSAU, *LI, DL,
2154  Alignment, SawUnorderedAtomic, AATags, *SafetyInfo,
2155  SafeToInsertStore);
2156 
2157  // Set up the preheader to have a definition of the value. It is the live-out
2158  // value from the preheader that uses in the loop will use.
2159  LoadInst *PreheaderLoad = new LoadInst(
2160  AccessTy, SomePtr, SomePtr->getName() + ".promoted",
2161  Preheader->getTerminator());
2162  if (SawUnorderedAtomic)
2163  PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
2164  PreheaderLoad->setAlignment(Alignment);
2165  PreheaderLoad->setDebugLoc(DebugLoc());
2166  if (AATags)
2167  PreheaderLoad->setAAMetadata(AATags);
2168  SSA.AddAvailableValue(Preheader, PreheaderLoad);
2169 
2170  MemoryAccess *PreheaderLoadMemoryAccess = MSSAU.createMemoryAccessInBB(
2171  PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End);
2172  MemoryUse *NewMemUse = cast<MemoryUse>(PreheaderLoadMemoryAccess);
2173  MSSAU.insertUse(NewMemUse, /*RenameUses=*/true);
2174 
2175  if (VerifyMemorySSA)
2176  MSSAU.getMemorySSA()->verifyMemorySSA();
2177  // Rewrite all the loads in the loop and remember all the definitions from
2178  // stores in the loop.
2179  Promoter.run(LoopUses);
2180 
2181  if (VerifyMemorySSA)
2182  MSSAU.getMemorySSA()->verifyMemorySSA();
2183  // If the SSAUpdater didn't use the load in the preheader, just zap it now.
2184  if (PreheaderLoad->use_empty())
2185  eraseInstruction(*PreheaderLoad, *SafetyInfo, MSSAU);
2186 
2187  return true;
2188 }
2189 
2190 static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,
2191  function_ref<void(Instruction *)> Fn) {
2192  for (const BasicBlock *BB : L->blocks())
2193  if (const auto *Accesses = MSSA->getBlockAccesses(BB))
2194  for (const auto &Access : *Accesses)
2195  if (const auto *MUD = dyn_cast<MemoryUseOrDef>(&Access))
2196  Fn(MUD->getMemoryInst());
2197 }
2198 
2201  AliasSetTracker AST(*AA);
2202 
2203  auto IsPotentiallyPromotable = [L](const Instruction *I) {
2204  if (const auto *SI = dyn_cast<StoreInst>(I))
2205  return L->isLoopInvariant(SI->getPointerOperand());
2206  if (const auto *LI = dyn_cast<LoadInst>(I))
2207  return L->isLoopInvariant(LI->getPointerOperand());
2208  return false;
2209  };
2210 
2211  // Populate AST with potentially promotable accesses.
2212  SmallPtrSet<Value *, 16> AttemptingPromotion;
2213  foreachMemoryAccess(MSSA, L, [&](Instruction *I) {
2214  if (IsPotentiallyPromotable(I)) {
2215  AttemptingPromotion.insert(I);
2216  AST.add(I);
2217  }
2218  });
2219 
2220  // We're only interested in must-alias sets that contain a mod.
2222  for (AliasSet &AS : AST)
2223  if (!AS.isForwardingAliasSet() && AS.isMod() && AS.isMustAlias())
2224  Sets.push_back(&AS);
2225 
2226  if (Sets.empty())
2227  return {}; // Nothing to promote...
2228 
2229  // Discard any sets for which there is an aliasing non-promotable access.
2230  foreachMemoryAccess(MSSA, L, [&](Instruction *I) {
2231  if (AttemptingPromotion.contains(I))
2232  return;
2233 
2234  llvm::erase_if(Sets, [&](const AliasSet *AS) {
2235  return AS->aliasesUnknownInst(I, *AA);
2236  });
2237  });
2238 
2240  for (const AliasSet *Set : Sets) {
2241  SmallSetVector<Value *, 8> PointerMustAliases;
2242  for (const auto &ASI : *Set)
2243  PointerMustAliases.insert(ASI.getValue());
2244  Result.push_back(std::move(PointerMustAliases));
2245  }
2246 
2247  return Result;
2248 }
2249 
2251  Loop *CurLoop, Instruction &I,
2252  SinkAndHoistLICMFlags &Flags) {
2253  // For hoisting, use the walker to determine safety
2254  if (!Flags.getIsSink()) {
2256  // See declaration of SetLicmMssaOptCap for usage details.
2257  if (Flags.tooManyClobberingCalls())
2258  Source = MU->getDefiningAccess();
2259  else {
2261  Flags.incrementClobberingCalls();
2262  }
2263  return !MSSA->isLiveOnEntryDef(Source) &&
2264  CurLoop->contains(Source->getBlock());
2265  }
2266 
2267  // For sinking, we'd need to check all Defs below this use. The getClobbering
2268  // call will look on the backedge of the loop, but will check aliasing with
2269  // the instructions on the previous iteration.
2270  // For example:
2271  // for (i ... )
2272  // load a[i] ( Use (LoE)
2273  // store a[i] ( 1 = Def (2), with 2 = Phi for the loop.
2274  // i++;
2275  // The load sees no clobbering inside the loop, as the backedge alias check
2276  // does phi translation, and will check aliasing against store a[i-1].
2277  // However sinking the load outside the loop, below the store is incorrect.
2278 
2279  // For now, only sink if there are no Defs in the loop, and the existing ones
2280  // precede the use and are in the same block.
2281  // FIXME: Increase precision: Safe to sink if Use post dominates the Def;
2282  // needs PostDominatorTreeAnalysis.
2283  // FIXME: More precise: no Defs that alias this Use.
2284  if (Flags.tooManyMemoryAccesses())
2285  return true;
2286  for (auto *BB : CurLoop->getBlocks())
2287  if (pointerInvalidatedByBlock(*BB, *MSSA, *MU))
2288  return true;
2289  // When sinking, the source block may not be part of the loop so check it.
2290  if (!CurLoop->contains(&I))
2291  return pointerInvalidatedByBlock(*I.getParent(), *MSSA, *MU);
2292 
2293  return false;
2294 }
2295 
2297  if (const auto *Accesses = MSSA.getBlockDefs(&BB))
2298  for (const auto &MA : *Accesses)
2299  if (const auto *MD = dyn_cast<MemoryDef>(&MA))
2300  if (MU.getBlock() != MD->getBlock() || !MSSA.locallyDominates(MD, &MU))
2301  return true;
2302  return false;
2303 }
2304 
2305 /// Little predicate that returns true if the specified basic block is in
2306 /// a subloop of the current one, not the current one itself.
2307 ///
2308 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) {
2309  assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
2310  return LI->getLoopFor(BB) != CurLoop;
2311 }
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm::LoopSafetyInfo::isGuaranteedToExecute
virtual bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const =0
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
llvm::OptimizationRemarkMissed
Diagnostic information for missed-optimization remarks.
Definition: DiagnosticInfo.h:735
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:60
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
hoist
static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE)
When an instruction is found to only use loop invariant operands that is safe to hoist,...
Definition: LICM.cpp:1696
llvm::MemorySSA::verifyMemorySSA
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1906
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:65
llvm::initializeLegacyLICMPassPass
void initializeLegacyLICMPassPass(PassRegistry &)
llvm::SmallPtrSetImpl::erase
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:379
llvm::PHINode::incoming_values
op_range incoming_values()
Definition: Instructions.h:2750
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
Metadata.h
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
llvm::isSafeToSpeculativelyExecute
bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
Definition: ValueTracking.cpp:4576
llvm::isModOrRefSet
LLVM_NODISCARD bool isModOrRefSet(const ModRefInfo MRI)
Definition: AliasAnalysis.h:189
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
IntrinsicInst.h
llvm::hoistRegion
bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, BlockFrequencyInfo *, TargetLibraryInfo *, Loop *, MemorySSAUpdater &, ScalarEvolution *, ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, bool, bool AllowSpeculation)
Walk the specified region of the CFG (defined by all blocks dominated by the specified block,...
Definition: LICM.cpp:847
DebugInfoMetadata.h
llvm::LoadAndStorePromoter
Helper class for promoting a collection of loads and stores into SSA Form using the SSAUpdater.
Definition: SSAUpdater.h:136
Scalar.h
SetOperations.h
llvm::PassInfoMixin< LICMPass >
llvm::succ_end
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:102
llvm::ScalarEvolution::forgetLoopDispositions
void forgetLoopDispositions(const Loop *L)
Called when the client has changed the disposition of values in this loop.
Definition: ScalarEvolution.cpp:8126
llvm::TypeSize::getFixedSize
ScalarTy getFixedSize() const
Definition: TypeSize.h:430
Loads.h
MemorySSAUpdater.h
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::PredIteratorCache
PredIteratorCache - This class is an extremely trivial cache for predecessor iterator queries.
Definition: PredIteratorCache.h:27
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:53
isDereferenceableAndAlignedPointer
static bool isDereferenceableAndAlignedPointer(const Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, const Instruction *CtxI, const DominatorTree *DT, const TargetLibraryInfo *TLI, SmallPtrSetImpl< const Value * > &Visited, unsigned MaxDepth)
Test if V is always a pointer to allocated and suitably aligned memory for a simple load or store.
Definition: Loads.cpp:39
llvm::PriorityWorklist< T, SmallVector< T, N >, SmallDenseMap< T, ptrdiff_t > >::insert
bool insert(const T &X)
Insert a new element into the PriorityWorklist.
Definition: PriorityWorklist.h:90
llvm::SetLicmMssaNoAccForPromotionCap
cl::opt< unsigned > SetLicmMssaNoAccForPromotionCap
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
llvm::LoopInfoBase::verify
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
Definition: LoopInfoImpl.h:689
Statistic.h
CaptureTracking.h
llvm::DataLayout::getTypeStoreSize
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Definition: DataLayout.h:474
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:167
LazyBlockFrequencyInfo.h
llvm::BasicBlock::replaceSuccessorsPhiUsesWith
void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
Definition: BasicBlock.cpp:454
llvm::erase_if
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:1797
isLoadInvariantInLoop
static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT, Loop *CurLoop)
Definition: LICM.cpp:1029
llvm::SmallDenseMap
Definition: DenseMap.h:882
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
llvm::MemorySSA::dominates
bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
Definition: MemorySSA.cpp:2159
eraseInstruction
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
Definition: LICM.cpp:1461
llvm::SinkAndHoistLICMFlags::tooManyClobberingCalls
bool tooManyClobberingCalls()
Definition: LoopUtils.h:128
ValueTracking.h
Local.h
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::getLoopAnalysisUsage
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:144
llvm::AliasSetTracker
Definition: AliasSetTracker.h:326
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::getLoadStoreType
Type * getLoadStoreType(Value *I)
A helper function that returns the type of a load or store instruction.
Definition: Instructions.h:5375
llvm::AAMDNodes
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:652
llvm::LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage
static void getLazyBFIAnalysisUsage(AnalysisUsage &AU)
Helper for client passes to set up the analysis usage on behalf of this pass.
Definition: LazyBlockFrequencyInfo.cpp:62
ScalarEvolution.h
MaxNumUsesTraversed
static cl::opt< uint32_t > MaxNumUsesTraversed("licm-max-num-uses-traversed", cl::Hidden, cl::init(8), cl::desc("Max num uses visited for identifying load " "invariance in loop using invariant start (default = 8)"))
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:380
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1271
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:51
llvm::BasicBlock::getSingleSuccessor
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:291
llvm::StoreInst::setAlignment
void setAlignment(Align Align)
Definition: Instructions.h:358
PIC
PassInstrumentationCallbacks PIC
Definition: PassBuilderBindings.cpp:55
llvm::DominatorTreeBase::getNode
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Definition: GenericDomTree.h:351
llvm::hasDisableLICMTransformsHint
bool hasDisableLICMTransformsHint(const Loop *L)
Look for the loop attribute that disables the LICM transformation heuristics.
Definition: LoopUtils.cpp:351
DEBUG_TYPE
#define DEBUG_TYPE
Definition: LICM.cpp:93
sink
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, BlockFrequencyInfo *BFI, const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU, OptimizationRemarkEmitter *ORE)
When an instruction is found to only be used outside of the loop, this function moves it to the exit ...
Definition: LICM.cpp:1590
llvm::TinyPtrVector::front
EltTy front() const
Definition: TinyPtrVector.h:230
ControlFlowHoisting
static cl::opt< bool > ControlFlowHoisting("licm-control-flow-hoisting", cl::Hidden, cl::init(false), cl::desc("Enable control flow (and PHI) hoisting in LICM"))
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::Value::hasOneUser
bool hasOneUser() const
Return true if there is exactly one user of this value.
Definition: Value.cpp:153
llvm::formLCSSARecursively
bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition: LCSSA.cpp:404
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:147
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::SinkAndHoistLICMFlags::LicmMssaNoAccForPromotionCap
unsigned LicmMssaNoAccForPromotionCap
Definition: LoopUtils.h:135
llvm::DomTreeNodeBase::getIDom
DomTreeNodeBase * getIDom() const
Definition: GenericDomTree.h:89
llvm::MemoryUse
Represents read-only accesses to memory.
Definition: MemorySSA.h:326
llvm::SinkAndHoistLICMFlags::incrementClobberingCalls
void incrementClobberingCalls()
Definition: LoopUtils.h:129
llvm::LoopStandardAnalysisResults::DT
DominatorTree & DT
Definition: LoopAnalysisManager.h:54
llvm::Sched::Fast
@ Fast
Definition: TargetLowering.h:104
llvm::canSinkOrHoistInst
bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, Loop *CurLoop, MemorySSAUpdater &MSSAU, bool TargetExecutesOncePerLoop, SinkAndHoistLICMFlags &LICMFlags, OptimizationRemarkEmitter *ORE=nullptr)
Returns true if is legal to hoist or sink this instruction disregarding the possible introduction of ...
Definition: LICM.cpp:1137
ConstantFolding.h
llvm::LinearPolySize::isScalable
bool isScalable() const
Returns whether the size is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:298
MustExecute.h
llvm::MemorySSAUpdater::createMemoryAccessInBB
MemoryAccess * createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, MemorySSA::InsertionPlace Point)
Create a MemoryAccess in MemorySSA at a specified point in a block, with a specified clobbering defin...
Definition: MemorySSAUpdater.cpp:1429
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::sinkRegionForLoopNest
bool sinkRegionForLoopNest(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, BlockFrequencyInfo *, TargetLibraryInfo *, TargetTransformInfo *, Loop *, MemorySSAUpdater &, ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *)
Call sinkRegion on loops contained within the specified loop in order from innermost to outermost.
Definition: LICM.cpp:596
F
#define F(x, y, z)
Definition: MD5.cpp:55
cloneInstructionInExitBlock
static Instruction * cloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI, const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU)
Definition: LICM.cpp:1389
llvm::LazyBlockFrequencyInfoPass
This is an alternative analysis pass to BlockFrequencyInfoWrapperPass.
Definition: LazyBlockFrequencyInfo.h:98
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
AliasAnalysis.h
licm
licm
Definition: LICM.cpp:342
llvm::LoopBlocksRPO
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
Definition: LoopIterator.h:172
llvm::isGuard
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
Definition: GuardUtils.cpp:18
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
PriorityWorklist.h
CommandLine.h
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::remove
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:157
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::LoopBase::getParentLoop
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Definition: LoopInfo.h:113
llvm::MemorySSAUpdater::insertUse
void insertUse(MemoryUse *Use, bool RenameUses=false)
Definition: MemorySSAUpdater.cpp:238
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1607
llvm::StoreInst::setOrdering
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this store instruction.
Definition: Instructions.h:369
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::isNoAliasCall
bool isNoAliasCall(const Value *V)
Return true if this pointer is returned by a noalias function.
Definition: AliasAnalysis.cpp:962
llvm::MemorySSAWrapperPass
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:998
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::begin
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:82
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::LICMOptions::AllowSpeculation
bool AllowSpeculation
Definition: LICM.h:51
Constants.h
collectPromotionCandidates
static SmallVector< SmallSetVector< Value *, 8 >, 0 > collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L)
Definition: LICM.cpp:2200
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
inSubLoop
static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI)
Little predicate that returns true if the specified basic block is in a subloop of the current one,...
Definition: LICM.cpp:2308
llvm::AAResults
Definition: AliasAnalysis.h:511
llvm::LoopSafetyInfo
Captures loop safety information.
Definition: MustExecute.h:60
llvm::MemorySSA::isLiveOnEntryDef
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:751
foreachMemoryAccess
static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L, function_ref< void(Instruction *)> Fn)
Definition: LICM.cpp:2190
llvm::PredIteratorCache::size
size_t size(BasicBlock *BB) const
Definition: PredIteratorCache.h:65
llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags
SinkAndHoistLICMFlags(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap, bool IsSink, Loop *L=nullptr, MemorySSA *MSSA=nullptr)
Definition: LICM.cpp:358
llvm::User
Definition: User.h:44
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::SinkAndHoistLICMFlags
Flags controlling how much is checked when sinking or hoisting instructions.
Definition: LoopUtils.h:115
llvm::promoteLoopAccessesToScalars
bool promoteLoopAccessesToScalars(const SmallSetVector< Value *, 8 > &, SmallVectorImpl< BasicBlock * > &, SmallVectorImpl< Instruction * > &, SmallVectorImpl< MemoryAccess * > &, PredIteratorCache &, LoopInfo *, DominatorTree *, const TargetLibraryInfo *, Loop *, MemorySSAUpdater &, ICFLoopSafetyInfo *, OptimizationRemarkEmitter *, bool AllowSpeculation)
Try to promote memory values to scalars by sinking stores out of the loop and moving loads to before ...
Definition: LICM.cpp:1907
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::PriorityWorklist< T, SmallVector< T, N >, SmallDenseMap< T, ptrdiff_t > >::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: PriorityWorklist.h:153
llvm::AliasSet
Definition: AliasSetTracker.h:46
llvm::LICMOptions::MssaOptCap
unsigned MssaOptCap
Definition: LICM.h:49
llvm::ReplaceInstWithInst
void ReplaceInstWithInst(BasicBlock::InstListType &BIL, BasicBlock::iterator &BI, Instruction *I)
Replace the instruction specified by BI with the instruction specified by I.
Definition: BasicBlockUtils.cpp:477
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
llvm::LoopSafetyInfo::getBlockColors
const DenseMap< BasicBlock *, ColorVector > & getBlockColors() const
Returns block colors map that is used to update funclet operand bundles.
Definition: MustExecute.cpp:32
TargetLibraryInfo.h
AssumeBundleBuilder.h
llvm::Value::uses
iterator_range< use_iterator > uses()
Definition: Value.h:376
llvm::BasicBlock::getFirstInsertionPt
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:246
false
Definition: StackSlotColoring.cpp:141
llvm::MemorySSAUpdater::moveToPlace
void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, MemorySSA::InsertionPlace Where)
Definition: MemorySSAUpdater.cpp:1200
llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:171
llvm::Instruction
Definition: Instruction.h:42
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:302
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:143
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::predecessors
auto predecessors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:30
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:54
llvm::MemorySSA::getBlockAccesses
const AccessList * getBlockAccesses(const BasicBlock *BB) const
Return the list of MemoryAccess's for a given basic block.
Definition: MemorySSA.h:771
LoopUtils.h
llvm::getUnderlyingObject
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
Definition: ValueTracking.cpp:4343
llvm::LPPassManager
Definition: LoopPass.h:76
llvm::Instruction::getAAMetadata
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Definition: Metadata.cpp:1402
llvm::SplitBlockPredecessors
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
Definition: BasicBlockUtils.cpp:1172
llvm::BasicBlock::getModule
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:147
llvm::DominatorTreeBase::changeImmediateDominator
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
Definition: GenericDomTree.h:655
PatternMatch.h
llvm::BasicBlock::getFirstNonPHI
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:209
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::empty
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition: Instructions.h:2756
llvm::MemoryAccess::getBlock
BasicBlock * getBlock() const
Definition: MemorySSA.h:164
LoopIterator.h
llvm::Value::use_empty
bool use_empty() const
Definition: Value.h:344
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::MemorySSA::locallyDominates
bool locallyDominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in the same basic block, determine whether MemoryAccess A dominates MemoryA...
Definition: MemorySSA.cpp:2128
llvm::OperandBundleUse::getTagID
uint32_t getTagID() const
Return the tag of this operand bundle as an integer.
Definition: InstrTypes.h:1097
CFG.h
LoopInfo.h
llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3180
llvm::OutputFileType::Object
@ Object
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLFunctionalExtras.h:36
isTriviallyReplaceablePHI
static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I)
Returns true if a PHINode is a trivially replaceable with an Instruction.
Definition: LICM.cpp:1308
llvm::MemorySSAWalker::getClobberingMemoryAccess
MemoryAccess * getClobberingMemoryAccess(const Instruction *I)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1058
llvm::getLoopPassPreservedAnalyses
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Definition: LoopAnalysisManager.cpp:137
llvm::SPIRV::Decoration::Alignment
@ Alignment
llvm::cl::opt< bool >
SSA
Memory SSA
Definition: MemorySSA.cpp:73
llvm::LoadInst::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:274
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:305
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::AliasSetTracker::add
void add(Value *Ptr, LocationSize Size, const AAMDNodes &AAInfo)
These methods are used to add different types of instructions to the alias sets.
Definition: AliasSetTracker.cpp:393
llvm::Value::getNameOrAsOperand
std::string getNameOrAsOperand() const
Definition: Value.cpp:440
LICM
loop versioning Loop Versioning For LICM
Definition: LoopVersioningLICM.cpp:656
llvm::LoadAndStorePromoter::shouldDelete
virtual bool shouldDelete(Instruction *I) const
Return false if a sub-class wants to keep one of the loads/stores after the SSA construction.
Definition: SSAUpdater.h:175
llvm::DomTreeNodeBase::getBlock
NodeT * getBlock() const
Definition: GenericDomTree.h:88
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:468
uint64_t
llvm::LoopPass
Definition: LoopPass.h:28
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2514
llvm::MemorySSAUpdater
Definition: MemorySSAUpdater.h:54
llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition: LoopPassManager.h:262
llvm::MemorySSAUpdater::getMemorySSA
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
Definition: MemorySSAUpdater.h:242
llvm::LoadInst::setOrdering
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this load instruction.
Definition: Instructions.h:238
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:78
llvm::LoopBase::getUniqueExitBlocks
void getUniqueExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all unique successor blocks of this loop.
Definition: LoopInfoImpl.h:121
llvm::Instruction::user_back
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:88
llvm::PHINode::setIncomingBlock
void setIncomingBlock(unsigned i, BasicBlock *BB)
Definition: Instructions.h:2799
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::SmallPriorityWorklist
A version of PriorityWorklist that selects small size optimized data structures for the vector and ma...
Definition: PriorityWorklist.h:255
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2814
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DenseMap
Definition: DenseMap.h:716
llvm::ICFLoopSafetyInfo::anyBlockMayThrow
bool anyBlockMayThrow() const override
Returns true iff any block of the loop for which this info is contains an instruction that may throw ...
Definition: MustExecute.cpp:74
llvm::MemorySSA
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:714
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:970
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::MemorySSA::getMemoryAccess
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:731
PredIteratorCache.h
llvm::succ_begin
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:99
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:929
llvm::LoadInst::setAlignment
void setAlignment(Align Align)
Definition: Instructions.h:228
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:608
llvm::PHINode::getBasicBlockIndex
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
Definition: Instructions.h:2842
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:166
llvm::SmallPtrSetImpl::begin
iterator begin() const
Definition: SmallPtrSet.h:403
llvm::Instruction::setDebugLoc
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:364
llvm::ScalarEvolution::forgetValue
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
Definition: ScalarEvolution.cpp:8099
llvm::ICFLoopSafetyInfo::doesNotWriteMemoryBefore
bool doesNotWriteMemoryBefore(const BasicBlock *BB, const Loop *CurLoop) const
Returns true if we could not execute a memory-modifying instruction before we enter BB under assumpti...
Definition: MustExecute.cpp:274
llvm::PriorityWorklist< T, SmallVector< T, N >, SmallDenseMap< T, ptrdiff_t > >::empty
bool empty() const
Determine if the PriorityWorklist is empty or not.
Definition: PriorityWorklist.h:67
splitPredecessorsOfLoopExit
static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT, LoopInfo *LI, const Loop *CurLoop, LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU)
Definition: LICM.cpp:1518
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:152
llvm::ModRefInfo
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition: AliasAnalysis.h:148
llvm::LoopBase::hasDedicatedExits
bool hasDedicatedExits() const
Return true if no exit block for the loop has a predecessor that is outside the loop.
Definition: LoopInfoImpl.h:91
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::LNICMPass::run
PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LICM.cpp:294
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
llvm::LNICMPass::printPipeline
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Definition: LICM.cpp:324
llvm::DominatorTree::isReachableFromEntry
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:335
isFreeInLoop
static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop, const TargetTransformInfo *TTI)
Return true if the instruction is free in the loop.
Definition: LICM.cpp:1317
LoopNestAnalysis.h
isSafeToExecuteUnconditionally
static bool isSafeToExecuteUnconditionally(Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE, const Instruction *CtxI, bool AllowSpeculation)
Only sink or hoist an instruction if it is not a trapping instruction, or if the instruction is known...
Definition: LICM.cpp:1741
llvm::salvageKnowledge
void salvageKnowledge(Instruction *I, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Calls BuildAssumeFromInst and if the resulting llvm.assume is valid insert if before I.
Definition: AssumeBundleBuilder.cpp:293
pointerInvalidatedByBlock
static bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA, MemoryUse &MU)
Definition: LICM.cpp:2296
llvm::FMRB_DoesNotAccessMemory
@ FMRB_DoesNotAccessMemory
This function does not perform any non-local loads or stores to memory.
Definition: AliasAnalysis.h:268
llvm::LoopNest::getOutermostLoop
Loop & getOutermostLoop() const
Return the outermost loop in the loop nest.
Definition: LoopNestAnalysis.h:80
llvm::MemorySSAAnalysis
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:948
llvm::collectChildrenInLoop
SmallVector< DomTreeNode *, 16 > collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop)
Does a BFS from a given node to all of its children inside a given loop.
Definition: LoopUtils.cpp:453
hasProfileData
static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI)
Definition: PartialInlining.cpp:711
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::SinkAndHoistLICMFlags::NoOfMemAccTooLarge
bool NoOfMemAccTooLarge
Definition: LoopUtils.h:132
llvm::MemorySSAUpdater::createMemoryAccessAfter
MemoryUseOrDef * createMemoryAccessAfter(Instruction *I, MemoryAccess *Definition, MemoryAccess *InsertPt)
Definition: MemorySSAUpdater.cpp:1447
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:99
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::isInstructionTriviallyDead
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:395
llvm::LoopInfo
Definition: LoopInfo.h:1086
llvm::AAMDNodes::merge
AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
Definition: TypeBasedAliasAnalysis.cpp:523
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::MemorySSAUpdater::insertDef
void insertDef(MemoryDef *Def, bool RenameUses=false)
Insert a definition into the MemorySSA IR.
Definition: MemorySSAUpdater.cpp:307
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1614
DataLayout.h
llvm::TinyPtrVector::size
unsigned size() const
Definition: TinyPtrVector.h:172
llvm::MemorySSA::getSkipSelfWalker
MemorySSAWalker * getSkipSelfWalker()
Definition: MemorySSA.cpp:1619
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
LoopPass.h
Motion
Loop Invariant Code Motion
Definition: LICM.cpp:342
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::MemorySSA::getBlockDefs
const DefsList * getBlockDefs(const BasicBlock *BB) const
Return the list of MemoryDef's and MemoryPhi's for a given basic block.
Definition: MemorySSA.h:779
llvm::isNotVisibleOnUnwind
bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
Definition: AliasAnalysis.cpp:990
llvm::pred_size
unsigned pred_size(const BasicBlock *BB)
Get the number of predecessors of BB.
Definition: CFG.h:123
llvm::appendLoopsToWorklist
void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
Definition: LoopUtils.cpp:1472
llvm::LoopBase::isOutermost
bool isOutermost() const
Return true if the loop does not have a parent (natural) loop.
Definition: LoopInfo.h:168
llvm::TargetTransformInfo::getUserCost
InstructionCost getUserCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
Definition: TargetTransformInfo.cpp:217
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:529
llvm::ms_demangle::IntrinsicFunctionKind::New
@ New
llvm::OperandBundleUse
A lightweight accessor for an operand bundle meant to be passed around by value.
Definition: InstrTypes.h:1069
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:991
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::LoopStandardAnalysisResults::LI
LoopInfo & LI
Definition: LoopAnalysisManager.h:55
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::ICFLoopSafetyInfo::insertInstructionTo
void insertInstructionTo(const Instruction *Inst, const BasicBlock *BB)
Inform the safety info that we are planning to insert a new instruction Inst into the basic block BB.
Definition: MustExecute.cpp:92
llvm::FunctionModRefBehavior
FunctionModRefBehavior
Summary of how a function affects memory in the program.
Definition: AliasAnalysis.h:262
llvm::DominatorTreeBase::properlyDominates
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Definition: GenericDomTree.h:392
llvm::LoopStandardAnalysisResults::BFI
BlockFrequencyInfo * BFI
Definition: LoopAnalysisManager.h:59
llvm::LoopNest::getParent
Function * getParent() const
Return the function to which the loop-nest belongs.
Definition: LoopNestAnalysis.h:176
SSAUpdater.h
llvm::ConstantInt::getSExtValue
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:148
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:429
llvm::Loop::hasLoopInvariantOperands
bool hasLoopInvariantOperands(const Instruction *I) const
Return true if all the operands of the specified instruction are loop invariant.
Definition: LoopInfo.cpp:66
llvm::MemoryAccess
Definition: MemorySSA.h:142
llvm::DomTreeNodeBase< BasicBlock >
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:305
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:176
llvm::DominatorTreeBase::addNewBlock
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
Definition: GenericDomTree.h:619
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::SmallPtrSetImplBase::size
size_type size() const
Definition: SmallPtrSet.h:93
llvm::find_if
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1634
llvm::BasicBlock::front
const Instruction & front() const
Definition: BasicBlock.h:309
llvm::MemorySSA::ensureOptimizedUses
void ensureOptimizedUses()
By default, uses are not optimized during MemorySSA construction.
Definition: MemorySSA.cpp:2186
llvm::ICFLoopSafetyInfo::removeInstruction
void removeInstruction(const Instruction *Inst)
Inform safety info that we are planning to remove the instruction Inst from its block.
Definition: MustExecute.cpp:98
getPointerOperandIndex
static unsigned getPointerOperandIndex(Instruction *I)
Definition: HWAddressSanitizer.cpp:756
std
Definition: BitVector.h:851
llvm::Loop::isLCSSAForm
bool isLCSSAForm(const DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:458
llvm::Value::user_iterator
user_iterator_impl< User > user_iterator
Definition: Value.h:390
llvm::MemoryUseOrDef::getDefiningAccess
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition: MemorySSA.h:262
llvm::LoopStandardAnalysisResults::TTI
TargetTransformInfo & TTI
Definition: LoopAnalysisManager.h:58
get
Should compile to something r4 addze r3 instead we get
Definition: README.txt:24
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:84
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:341
llvm::ICFLoopSafetyInfo
This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to give precise answers on "may...
Definition: MustExecute.h:132
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition: Instructions.h:2706
llvm::MemorySSAUpdater::wireOldPredecessorsToNewImmediatePredecessor
void wireOldPredecessorsToNewImmediatePredecessor(BasicBlock *Old, BasicBlock *New, ArrayRef< BasicBlock * > Preds, bool IdenticalEdgesWereMerged=true)
A new empty BasicBlock (New) now branches directly to Old.
Definition: MemorySSAUpdater.cpp:1268
llvm::pred_end
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:112
llvm::ConstantFP::get
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:966
llvm::TypeSize
Definition: TypeSize.h:421
llvm::Instruction::setAAMetadata
void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
Definition: Metadata.cpp:1411
llvm::LICMPass::printPipeline
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Definition: LICM.cpp:284
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::LICMOptions::MssaNoAccForPromotionCap
unsigned MssaNoAccForPromotionCap
Definition: LICM.h:50
sinkThroughTriviallyReplaceablePHI
static Instruction * sinkThroughTriviallyReplaceablePHI(PHINode *TPN, Instruction *I, LoopInfo *LI, SmallDenseMap< BasicBlock *, Instruction *, 32 > &SunkCopies, const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop, MemorySSAUpdater &MSSAU)
Definition: LICM.cpp:1482
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::count
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:215
llvm::salvageDebugInfo
void salvageDebugInfo(Instruction &I)
Assuming the instruction I is going to be deleted, attempt to salvage debug users of I by writing the...
Definition: Local.cpp:1739
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:222
llvm::Instruction::isEHPad
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:662
llvm::MemoryUseOrDef
Class that has the common methods + fields of memory uses/defs.
Definition: MemorySSA.h:252
llvm::Function::hasProfileData
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
Definition: Function.h:290
llvm::LoopSafetyInfo::copyColors
void copyColors(BasicBlock *New, BasicBlock *Old)
Copy colors of block Old into the block New.
Definition: MustExecute.cpp:36
Unordered
QP Compare Ordered Unordered
Definition: README_P9.txt:299
llvm::LoopStandardAnalysisResults::SE
ScalarEvolution & SE
Definition: LoopAnalysisManager.h:56
llvm::LoopStandardAnalysisResults::AA
AAResults & AA
Definition: LoopAnalysisManager.h:52
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:46
GuardUtils.h
DisablePromotion
static cl::opt< bool > DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false), cl::desc("Disable memory promotion in LICM pass"))
Memory promotion is enabled by default.
AA
llvm::pred_begin
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:109
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:267
llvm::BasicBlock::getInstList
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:364
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:690
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
llvm::createLICMPass
Pass * createLICMPass()
Definition: LICM.cpp:345
MemorySSA.h
Instructions.h
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition: User.h:191
llvm::LICMPass::run
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LICM.cpp:259
llvm::ConstantInt::isNegative
bool isNegative() const
Definition: Constants.h:189
llvm::BasicBlock::moveBefore
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition: BasicBlock.cpp:136
moveInstructionBefore
static void moveInstructionBefore(Instruction &I, Instruction &Dest, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU, ScalarEvolution *SE)
Definition: LICM.cpp:1468
llvm::LoopBase::addBasicBlockToLoop
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Definition: LoopInfoImpl.h:241
llvm::LoadAndStorePromoter::doExtraRewritesBeforeFinalDeletion
virtual void doExtraRewritesBeforeFinalDeletion()
This hook is invoked after all the stores are found and inserted as available values.
Definition: SSAUpdater.h:161
Dominators.h
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1341
llvm::SmallPtrSetImplBase::empty
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:92
N
#define N
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::ICFLoopSafetyInfo::computeLoopSafetyInfo
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
Definition: MustExecute.cpp:78
llvm::PHINode::getIncomingBlock
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Definition: Instructions.h:2780
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false, false) INITIALIZE_PASS_END(LegacyLICMPass
llvm::DominatorTreeBase::verify
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
Definition: GenericDomTree.h:802
Users
iv Induction Variable Users
Definition: IVUsers.cpp:48
TargetTransformInfo.h
llvm::LoopStandardAnalysisResults::MSSA
MemorySSA * MSSA
Definition: LoopAnalysisManager.h:61
llvm::PHINode
Definition: Instructions.h:2664
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:119
LICM.h
llvm::set_intersect
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
Definition: SetOperations.h:40
llvm::SmallVectorImpl< BasicBlock * >
llvm::PredIteratorCache::get
ArrayRef< BasicBlock * > get(BasicBlock *BB)
Definition: PredIteratorCache.h:66
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
isNotUsedOrFreeInLoop
static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, TargetTransformInfo *TTI, bool &FreeInLoop, bool LoopNestMode)
Return true if the only users of this instruction are outside of the loop.
Definition: LICM.cpp:1347
DerivedTypes.h
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
pointerInvalidatedByLoop
static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU, Loop *CurLoop, Instruction &I, SinkAndHoistLICMFlags &Flags)
Definition: LICM.cpp:2250
AliasSetTracker.h
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1474
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:172
llvm::ICFLoopSafetyInfo::isGuaranteedToExecute
bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
Definition: MustExecute.cpp:267
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::VerifyMemorySSA
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:91
LLVMContext.h
llvm::SinkAndHoistLICMFlags::tooManyMemoryAccesses
bool tooManyMemoryAccesses()
Definition: LoopUtils.h:127
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::LoadAndStorePromoter::instructionDeleted
virtual void instructionDeleted(Instruction *I) const
Called before each instruction is deleted.
Definition: SSAUpdater.h:168
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:405
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3099
llvm::ConstantFoldInstruction
Constant * ConstantFoldInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstruction - Try to constant fold the specified instruction.
Definition: ConstantFolding.cpp:1095
raw_ostream.h
llvm::LoopBlocksRPO::perform
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopIterator.h:180
llvm::TinyPtrVector
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Definition: TinyPtrVector.h:30
llvm::SSAUpdater
Helper class for SSA formation on a set of values defined in multiple blocks.
Definition: SSAUpdater.h:38
llvm::SmallPtrSetImpl::contains
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:389
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:644
BasicBlockUtils.h
llvm::SetLicmMssaOptCap
cl::opt< unsigned > SetLicmMssaOptCap
llvm::LazyBranchProbabilityInfoPass
This is an alternative analysis pass to BranchProbabilityInfoWrapperPass.
Definition: LazyBranchProbabilityInfo.h:50
llvm::LoopStandardAnalysisResults::TLI
TargetLibraryInfo & TLI
Definition: LoopAnalysisManager.h:57
InitializePasses.h
llvm::PointerMayBeCapturedBefore
bool PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures, bool StoreCaptures, const Instruction *I, const DominatorTree *DT, bool IncludeI=false, unsigned MaxUsesToExplore=0, const LoopInfo *LI=nullptr)
PointerMayBeCapturedBefore - Return true if this pointer value may be captured by the enclosing funct...
Definition: CaptureTracking.cpp:269
llvm::LoopNest
This class represents a loop nest and can be used to query its properties.
Definition: LoopNestAnalysis.h:28
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::BranchInst::isConditional
bool isConditional() const
Definition: Instructions.h:3178
llvm::sinkRegion
bool sinkRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, BlockFrequencyInfo *, TargetLibraryInfo *, TargetTransformInfo *, Loop *CurLoop, MemorySSAUpdater &, ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, Loop *OutermostLoop=nullptr)
Walk the specified region of the CFG (defined by all blocks dominated by the specified block,...
Definition: LICM.cpp:528
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3192
llvm::LoopAnalysis
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1246
llvm::MemorySSAUpdater::removeMemoryAccess
void removeMemoryAccess(MemoryAccess *, bool OptimizePhis=false)
Remove a MemoryAccess from MemorySSA, including updating all definitions and uses.
Definition: MemorySSAUpdater.cpp:1305
llvm::LoopInfo::wouldBeOutOfLoopUseRequiringLCSSA
bool wouldBeOutOfLoopUseRequiringLCSSA(const Value *V, const BasicBlock *ExitBB) const
Definition: LoopInfo.cpp:932
canSplitPredecessors
static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo)
Definition: LICM.cpp:1500
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:927
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::SinkAndHoistLICMFlags::getIsSink
bool getIsSink()
Definition: LoopUtils.h:126
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37
llvm::LoadAndStorePromoter::isInstInList
virtual bool isInstInList(Instruction *I, const SmallVectorImpl< Instruction * > &Insts) const
Return true if the specified instruction is in the Inst list.
Definition: SSAUpdater.cpp:478