LLVM  16.0.0git
LoopDistribute.cpp
Go to the documentation of this file.
1 //===- LoopDistribute.cpp - Loop Distribution Pass ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the Loop Distribution Pass. Its main focus is to
10 // distribute loops that cannot be vectorized due to dependence cycles. It
11 // tries to isolate the offending dependences into a new loop allowing
12 // vectorization of the remaining parts.
13 //
14 // For dependence analysis, the pass uses the LoopVectorizer's
15 // LoopAccessAnalysis. Because this analysis presumes no change in the order of
16 // memory operations, special care is taken to preserve the lexical order of
17 // these operations.
18 //
19 // Similarly to the Vectorizer, the pass also supports loop versioning to
20 // run-time disambiguate potentially overlapping arrays.
21 //
22 //===----------------------------------------------------------------------===//
23 
25 #include "llvm/ADT/DenseMap.h"
28 #include "llvm/ADT/Optional.h"
29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/ADT/SmallPtrSet.h"
31 #include "llvm/ADT/SmallVector.h"
32 #include "llvm/ADT/Statistic.h"
33 #include "llvm/ADT/StringRef.h"
34 #include "llvm/ADT/Twine.h"
40 #include "llvm/Analysis/LoopInfo.h"
45 #include "llvm/IR/BasicBlock.h"
46 #include "llvm/IR/Constants.h"
47 #include "llvm/IR/DiagnosticInfo.h"
48 #include "llvm/IR/Dominators.h"
49 #include "llvm/IR/Function.h"
50 #include "llvm/IR/Instruction.h"
51 #include "llvm/IR/Instructions.h"
52 #include "llvm/IR/LLVMContext.h"
53 #include "llvm/IR/Metadata.h"
54 #include "llvm/IR/PassManager.h"
55 #include "llvm/IR/Value.h"
56 #include "llvm/InitializePasses.h"
57 #include "llvm/Pass.h"
58 #include "llvm/Support/Casting.h"
60 #include "llvm/Support/Debug.h"
62 #include "llvm/Transforms/Scalar.h"
68 #include <cassert>
69 #include <functional>
70 #include <list>
71 #include <tuple>
72 #include <utility>
73 
74 using namespace llvm;
75 
76 #define LDIST_NAME "loop-distribute"
77 #define DEBUG_TYPE LDIST_NAME
78 
79 /// @{
80 /// Metadata attribute names
81 static const char *const LLVMLoopDistributeFollowupAll =
82  "llvm.loop.distribute.followup_all";
83 static const char *const LLVMLoopDistributeFollowupCoincident =
84  "llvm.loop.distribute.followup_coincident";
85 static const char *const LLVMLoopDistributeFollowupSequential =
86  "llvm.loop.distribute.followup_sequential";
87 static const char *const LLVMLoopDistributeFollowupFallback =
88  "llvm.loop.distribute.followup_fallback";
89 /// @}
90 
91 static cl::opt<bool>
92  LDistVerify("loop-distribute-verify", cl::Hidden,
93  cl::desc("Turn on DominatorTree and LoopInfo verification "
94  "after Loop Distribution"),
95  cl::init(false));
96 
98  "loop-distribute-non-if-convertible", cl::Hidden,
99  cl::desc("Whether to distribute into a loop that may not be "
100  "if-convertible by the loop vectorizer"),
101  cl::init(false));
102 
104  "loop-distribute-scev-check-threshold", cl::init(8), cl::Hidden,
105  cl::desc("The maximum number of SCEV checks allowed for Loop "
106  "Distribution"));
107 
109  "loop-distribute-scev-check-threshold-with-pragma", cl::init(128),
110  cl::Hidden,
111  cl::desc(
112  "The maximum number of SCEV checks allowed for Loop "
113  "Distribution for loop marked with #pragma loop distribute(enable)"));
114 
116  "enable-loop-distribute", cl::Hidden,
117  cl::desc("Enable the new, experimental LoopDistribution Pass"),
118  cl::init(false));
119 
120 STATISTIC(NumLoopsDistributed, "Number of loops distributed");
121 
122 namespace {
123 
124 /// Maintains the set of instructions of the loop for a partition before
125 /// cloning. After cloning, it hosts the new loop.
126 class InstPartition {
127  using InstructionSet = SmallPtrSet<Instruction *, 8>;
128 
129 public:
130  InstPartition(Instruction *I, Loop *L, bool DepCycle = false)
131  : DepCycle(DepCycle), OrigLoop(L) {
132  Set.insert(I);
133  }
134 
135  /// Returns whether this partition contains a dependence cycle.
136  bool hasDepCycle() const { return DepCycle; }
137 
138  /// Adds an instruction to this partition.
139  void add(Instruction *I) { Set.insert(I); }
140 
141  /// Collection accessors.
142  InstructionSet::iterator begin() { return Set.begin(); }
143  InstructionSet::iterator end() { return Set.end(); }
144  InstructionSet::const_iterator begin() const { return Set.begin(); }
145  InstructionSet::const_iterator end() const { return Set.end(); }
146  bool empty() const { return Set.empty(); }
147 
148  /// Moves this partition into \p Other. This partition becomes empty
149  /// after this.
150  void moveTo(InstPartition &Other) {
151  Other.Set.insert(Set.begin(), Set.end());
152  Set.clear();
153  Other.DepCycle |= DepCycle;
154  }
155 
156  /// Populates the partition with a transitive closure of all the
157  /// instructions that the seeded instructions dependent on.
158  void populateUsedSet() {
159  // FIXME: We currently don't use control-dependence but simply include all
160  // blocks (possibly empty at the end) and let simplifycfg mostly clean this
161  // up.
162  for (auto *B : OrigLoop->getBlocks())
163  Set.insert(B->getTerminator());
164 
165  // Follow the use-def chains to form a transitive closure of all the
166  // instructions that the originally seeded instructions depend on.
167  SmallVector<Instruction *, 8> Worklist(Set.begin(), Set.end());
168  while (!Worklist.empty()) {
169  Instruction *I = Worklist.pop_back_val();
170  // Insert instructions from the loop that we depend on.
171  for (Value *V : I->operand_values()) {
172  auto *I = dyn_cast<Instruction>(V);
173  if (I && OrigLoop->contains(I->getParent()) && Set.insert(I).second)
174  Worklist.push_back(I);
175  }
176  }
177  }
178 
179  /// Clones the original loop.
180  ///
181  /// Updates LoopInfo and DominatorTree using the information that block \p
182  /// LoopDomBB dominates the loop.
183  Loop *cloneLoopWithPreheader(BasicBlock *InsertBefore, BasicBlock *LoopDomBB,
184  unsigned Index, LoopInfo *LI,
185  DominatorTree *DT) {
186  ClonedLoop = ::cloneLoopWithPreheader(InsertBefore, LoopDomBB, OrigLoop,
187  VMap, Twine(".ldist") + Twine(Index),
188  LI, DT, ClonedLoopBlocks);
189  return ClonedLoop;
190  }
191 
192  /// The cloned loop. If this partition is mapped to the original loop,
193  /// this is null.
194  const Loop *getClonedLoop() const { return ClonedLoop; }
195 
196  /// Returns the loop where this partition ends up after distribution.
197  /// If this partition is mapped to the original loop then use the block from
198  /// the loop.
199  Loop *getDistributedLoop() const {
200  return ClonedLoop ? ClonedLoop : OrigLoop;
201  }
202 
203  /// The VMap that is populated by cloning and then used in
204  /// remapinstruction to remap the cloned instructions.
205  ValueToValueMapTy &getVMap() { return VMap; }
206 
207  /// Remaps the cloned instructions using VMap.
208  void remapInstructions() {
209  remapInstructionsInBlocks(ClonedLoopBlocks, VMap);
210  }
211 
212  /// Based on the set of instructions selected for this partition,
213  /// removes the unnecessary ones.
214  void removeUnusedInsts() {
216 
217  for (auto *Block : OrigLoop->getBlocks())
218  for (auto &Inst : *Block)
219  if (!Set.count(&Inst)) {
220  Instruction *NewInst = &Inst;
221  if (!VMap.empty())
222  NewInst = cast<Instruction>(VMap[NewInst]);
223 
224  assert(!isa<BranchInst>(NewInst) &&
225  "Branches are marked used early on");
226  Unused.push_back(NewInst);
227  }
228 
229  // Delete the instructions backwards, as it has a reduced likelihood of
230  // having to update as many def-use and use-def chains.
231  for (auto *Inst : reverse(Unused)) {
232  if (!Inst->use_empty())
233  Inst->replaceAllUsesWith(PoisonValue::get(Inst->getType()));
234  Inst->eraseFromParent();
235  }
236  }
237 
238  void print() const {
239  if (DepCycle)
240  dbgs() << " (cycle)\n";
241  for (auto *I : Set)
242  // Prefix with the block name.
243  dbgs() << " " << I->getParent()->getName() << ":" << *I << "\n";
244  }
245 
246  void printBlocks() const {
247  for (auto *BB : getDistributedLoop()->getBlocks())
248  dbgs() << *BB;
249  }
250 
251 private:
252  /// Instructions from OrigLoop selected for this partition.
253  InstructionSet Set;
254 
255  /// Whether this partition contains a dependence cycle.
256  bool DepCycle;
257 
258  /// The original loop.
259  Loop *OrigLoop;
260 
261  /// The cloned loop. If this partition is mapped to the original loop,
262  /// this is null.
263  Loop *ClonedLoop = nullptr;
264 
265  /// The blocks of ClonedLoop including the preheader. If this
266  /// partition is mapped to the original loop, this is empty.
267  SmallVector<BasicBlock *, 8> ClonedLoopBlocks;
268 
269  /// These gets populated once the set of instructions have been
270  /// finalized. If this partition is mapped to the original loop, these are not
271  /// set.
272  ValueToValueMapTy VMap;
273 };
274 
275 /// Holds the set of Partitions. It populates them, merges them and then
276 /// clones the loops.
277 class InstPartitionContainer {
278  using InstToPartitionIdT = DenseMap<Instruction *, int>;
279 
280 public:
281  InstPartitionContainer(Loop *L, LoopInfo *LI, DominatorTree *DT)
282  : L(L), LI(LI), DT(DT) {}
283 
284  /// Returns the number of partitions.
285  unsigned getSize() const { return PartitionContainer.size(); }
286 
287  /// Adds \p Inst into the current partition if that is marked to
288  /// contain cycles. Otherwise start a new partition for it.
289  void addToCyclicPartition(Instruction *Inst) {
290  // If the current partition is non-cyclic. Start a new one.
291  if (PartitionContainer.empty() || !PartitionContainer.back().hasDepCycle())
292  PartitionContainer.emplace_back(Inst, L, /*DepCycle=*/true);
293  else
294  PartitionContainer.back().add(Inst);
295  }
296 
297  /// Adds \p Inst into a partition that is not marked to contain
298  /// dependence cycles.
299  ///
300  // Initially we isolate memory instructions into as many partitions as
301  // possible, then later we may merge them back together.
302  void addToNewNonCyclicPartition(Instruction *Inst) {
303  PartitionContainer.emplace_back(Inst, L);
304  }
305 
306  /// Merges adjacent non-cyclic partitions.
307  ///
308  /// The idea is that we currently only want to isolate the non-vectorizable
309  /// partition. We could later allow more distribution among these partition
310  /// too.
311  void mergeAdjacentNonCyclic() {
312  mergeAdjacentPartitionsIf(
313  [](const InstPartition *P) { return !P->hasDepCycle(); });
314  }
315 
316  /// If a partition contains only conditional stores, we won't vectorize
317  /// it. Try to merge it with a previous cyclic partition.
318  void mergeNonIfConvertible() {
319  mergeAdjacentPartitionsIf([&](const InstPartition *Partition) {
320  if (Partition->hasDepCycle())
321  return true;
322 
323  // Now, check if all stores are conditional in this partition.
324  bool seenStore = false;
325 
326  for (auto *Inst : *Partition)
327  if (isa<StoreInst>(Inst)) {
328  seenStore = true;
330  return false;
331  }
332  return seenStore;
333  });
334  }
335 
336  /// Merges the partitions according to various heuristics.
337  void mergeBeforePopulating() {
338  mergeAdjacentNonCyclic();
340  mergeNonIfConvertible();
341  }
342 
343  /// Merges partitions in order to ensure that no loads are duplicated.
344  ///
345  /// We can't duplicate loads because that could potentially reorder them.
346  /// LoopAccessAnalysis provides dependency information with the context that
347  /// the order of memory operation is preserved.
348  ///
349  /// Return if any partitions were merged.
350  bool mergeToAvoidDuplicatedLoads() {
351  using LoadToPartitionT = DenseMap<Instruction *, InstPartition *>;
352  using ToBeMergedT = EquivalenceClasses<InstPartition *>;
353 
354  LoadToPartitionT LoadToPartition;
355  ToBeMergedT ToBeMerged;
356 
357  // Step through the partitions and create equivalence between partitions
358  // that contain the same load. Also put partitions in between them in the
359  // same equivalence class to avoid reordering of memory operations.
360  for (PartitionContainerT::iterator I = PartitionContainer.begin(),
361  E = PartitionContainer.end();
362  I != E; ++I) {
363  auto *PartI = &*I;
364 
365  // If a load occurs in two partitions PartI and PartJ, merge all
366  // partitions (PartI, PartJ] into PartI.
367  for (Instruction *Inst : *PartI)
368  if (isa<LoadInst>(Inst)) {
369  bool NewElt;
370  LoadToPartitionT::iterator LoadToPart;
371 
372  std::tie(LoadToPart, NewElt) =
373  LoadToPartition.insert(std::make_pair(Inst, PartI));
374  if (!NewElt) {
375  LLVM_DEBUG(dbgs()
376  << "Merging partitions due to this load in multiple "
377  << "partitions: " << PartI << ", " << LoadToPart->second
378  << "\n"
379  << *Inst << "\n");
380 
381  auto PartJ = I;
382  do {
383  --PartJ;
384  ToBeMerged.unionSets(PartI, &*PartJ);
385  } while (&*PartJ != LoadToPart->second);
386  }
387  }
388  }
389  if (ToBeMerged.empty())
390  return false;
391 
392  // Merge the member of an equivalence class into its class leader. This
393  // makes the members empty.
394  for (ToBeMergedT::iterator I = ToBeMerged.begin(), E = ToBeMerged.end();
395  I != E; ++I) {
396  if (!I->isLeader())
397  continue;
398 
399  auto PartI = I->getData();
400  for (auto *PartJ : make_range(std::next(ToBeMerged.member_begin(I)),
401  ToBeMerged.member_end())) {
402  PartJ->moveTo(*PartI);
403  }
404  }
405 
406  // Remove the empty partitions.
407  PartitionContainer.remove_if(
408  [](const InstPartition &P) { return P.empty(); });
409 
410  return true;
411  }
412 
413  /// Sets up the mapping between instructions to partitions. If the
414  /// instruction is duplicated across multiple partitions, set the entry to -1.
415  void setupPartitionIdOnInstructions() {
416  int PartitionID = 0;
417  for (const auto &Partition : PartitionContainer) {
418  for (Instruction *Inst : Partition) {
419  bool NewElt;
420  InstToPartitionIdT::iterator Iter;
421 
422  std::tie(Iter, NewElt) =
423  InstToPartitionId.insert(std::make_pair(Inst, PartitionID));
424  if (!NewElt)
425  Iter->second = -1;
426  }
427  ++PartitionID;
428  }
429  }
430 
431  /// Populates the partition with everything that the seeding
432  /// instructions require.
433  void populateUsedSet() {
434  for (auto &P : PartitionContainer)
435  P.populateUsedSet();
436  }
437 
438  /// This performs the main chunk of the work of cloning the loops for
439  /// the partitions.
440  void cloneLoops() {
441  BasicBlock *OrigPH = L->getLoopPreheader();
442  // At this point the predecessor of the preheader is either the memcheck
443  // block or the top part of the original preheader.
444  BasicBlock *Pred = OrigPH->getSinglePredecessor();
445  assert(Pred && "Preheader does not have a single predecessor");
446  BasicBlock *ExitBlock = L->getExitBlock();
447  assert(ExitBlock && "No single exit block");
448  Loop *NewLoop;
449 
450  assert(!PartitionContainer.empty() && "at least two partitions expected");
451  // We're cloning the preheader along with the loop so we already made sure
452  // it was empty.
453  assert(&*OrigPH->begin() == OrigPH->getTerminator() &&
454  "preheader not empty");
455 
456  // Preserve the original loop ID for use after the transformation.
457  MDNode *OrigLoopID = L->getLoopID();
458 
459  // Create a loop for each partition except the last. Clone the original
460  // loop before PH along with adding a preheader for the cloned loop. Then
461  // update PH to point to the newly added preheader.
462  BasicBlock *TopPH = OrigPH;
463  unsigned Index = getSize() - 1;
464  for (auto &Part : llvm::drop_begin(llvm::reverse(PartitionContainer))) {
465  NewLoop = Part.cloneLoopWithPreheader(TopPH, Pred, Index, LI, DT);
466 
467  Part.getVMap()[ExitBlock] = TopPH;
468  Part.remapInstructions();
469  setNewLoopID(OrigLoopID, &Part);
470  --Index;
471  TopPH = NewLoop->getLoopPreheader();
472  }
473  Pred->getTerminator()->replaceUsesOfWith(OrigPH, TopPH);
474 
475  // Also set a new loop ID for the last loop.
476  setNewLoopID(OrigLoopID, &PartitionContainer.back());
477 
478  // Now go in forward order and update the immediate dominator for the
479  // preheaders with the exiting block of the previous loop. Dominance
480  // within the loop is updated in cloneLoopWithPreheader.
481  for (auto Curr = PartitionContainer.cbegin(),
482  Next = std::next(PartitionContainer.cbegin()),
483  E = PartitionContainer.cend();
484  Next != E; ++Curr, ++Next)
486  Next->getDistributedLoop()->getLoopPreheader(),
487  Curr->getDistributedLoop()->getExitingBlock());
488  }
489 
490  /// Removes the dead instructions from the cloned loops.
491  void removeUnusedInsts() {
492  for (auto &Partition : PartitionContainer)
493  Partition.removeUnusedInsts();
494  }
495 
496  /// For each memory pointer, it computes the partitionId the pointer is
497  /// used in.
498  ///
499  /// This returns an array of int where the I-th entry corresponds to I-th
500  /// entry in LAI.getRuntimePointerCheck(). If the pointer is used in multiple
501  /// partitions its entry is set to -1.
503  computePartitionSetForPointers(const LoopAccessInfo &LAI) {
504  const RuntimePointerChecking *RtPtrCheck = LAI.getRuntimePointerChecking();
505 
506  unsigned N = RtPtrCheck->Pointers.size();
507  SmallVector<int, 8> PtrToPartitions(N);
508  for (unsigned I = 0; I < N; ++I) {
509  Value *Ptr = RtPtrCheck->Pointers[I].PointerValue;
510  auto Instructions =
511  LAI.getInstructionsForAccess(Ptr, RtPtrCheck->Pointers[I].IsWritePtr);
512 
513  int &Partition = PtrToPartitions[I];
514  // First set it to uninitialized.
515  Partition = -2;
516  for (Instruction *Inst : Instructions) {
517  // Note that this could be -1 if Inst is duplicated across multiple
518  // partitions.
519  int ThisPartition = this->InstToPartitionId[Inst];
520  if (Partition == -2)
521  Partition = ThisPartition;
522  // -1 means belonging to multiple partitions.
523  else if (Partition == -1)
524  break;
525  else if (Partition != (int)ThisPartition)
526  Partition = -1;
527  }
528  assert(Partition != -2 && "Pointer not belonging to any partition");
529  }
530 
531  return PtrToPartitions;
532  }
533 
534  void print(raw_ostream &OS) const {
535  unsigned Index = 0;
536  for (const auto &P : PartitionContainer) {
537  OS << "Partition " << Index++ << " (" << &P << "):\n";
538  P.print();
539  }
540  }
541 
542  void dump() const { print(dbgs()); }
543 
544 #ifndef NDEBUG
545  friend raw_ostream &operator<<(raw_ostream &OS,
546  const InstPartitionContainer &Partitions) {
547  Partitions.print(OS);
548  return OS;
549  }
550 #endif
551 
552  void printBlocks() const {
553  unsigned Index = 0;
554  for (const auto &P : PartitionContainer) {
555  dbgs() << "\nPartition " << Index++ << " (" << &P << "):\n";
556  P.printBlocks();
557  }
558  }
559 
560 private:
561  using PartitionContainerT = std::list<InstPartition>;
562 
563  /// List of partitions.
564  PartitionContainerT PartitionContainer;
565 
566  /// Mapping from Instruction to partition Id. If the instruction
567  /// belongs to multiple partitions the entry contains -1.
568  InstToPartitionIdT InstToPartitionId;
569 
570  Loop *L;
571  LoopInfo *LI;
572  DominatorTree *DT;
573 
574  /// The control structure to merge adjacent partitions if both satisfy
575  /// the \p Predicate.
576  template <class UnaryPredicate>
577  void mergeAdjacentPartitionsIf(UnaryPredicate Predicate) {
578  InstPartition *PrevMatch = nullptr;
579  for (auto I = PartitionContainer.begin(); I != PartitionContainer.end();) {
580  auto DoesMatch = Predicate(&*I);
581  if (PrevMatch == nullptr && DoesMatch) {
582  PrevMatch = &*I;
583  ++I;
584  } else if (PrevMatch != nullptr && DoesMatch) {
585  I->moveTo(*PrevMatch);
586  I = PartitionContainer.erase(I);
587  } else {
588  PrevMatch = nullptr;
589  ++I;
590  }
591  }
592  }
593 
594  /// Assign new LoopIDs for the partition's cloned loop.
595  void setNewLoopID(MDNode *OrigLoopID, InstPartition *Part) {
597  OrigLoopID,
599  Part->hasDepCycle() ? LLVMLoopDistributeFollowupSequential
601  if (PartitionID) {
602  Loop *NewLoop = Part->getDistributedLoop();
603  NewLoop->setLoopID(PartitionID.value());
604  }
605  }
606 };
607 
608 /// For each memory instruction, this class maintains difference of the
609 /// number of unsafe dependences that start out from this instruction minus
610 /// those that end here.
611 ///
612 /// By traversing the memory instructions in program order and accumulating this
613 /// number, we know whether any unsafe dependence crosses over a program point.
614 class MemoryInstructionDependences {
616 
617 public:
618  struct Entry {
619  Instruction *Inst;
620  unsigned NumUnsafeDependencesStartOrEnd = 0;
621 
622  Entry(Instruction *Inst) : Inst(Inst) {}
623  };
624 
625  using AccessesType = SmallVector<Entry, 8>;
626 
627  AccessesType::const_iterator begin() const { return Accesses.begin(); }
628  AccessesType::const_iterator end() const { return Accesses.end(); }
629 
630  MemoryInstructionDependences(
631  const SmallVectorImpl<Instruction *> &Instructions,
632  const SmallVectorImpl<Dependence> &Dependences) {
633  Accesses.append(Instructions.begin(), Instructions.end());
634 
635  LLVM_DEBUG(dbgs() << "Backward dependences:\n");
636  for (const auto &Dep : Dependences)
637  if (Dep.isPossiblyBackward()) {
638  // Note that the designations source and destination follow the program
639  // order, i.e. source is always first. (The direction is given by the
640  // DepType.)
641  ++Accesses[Dep.Source].NumUnsafeDependencesStartOrEnd;
642  --Accesses[Dep.Destination].NumUnsafeDependencesStartOrEnd;
643 
644  LLVM_DEBUG(Dep.print(dbgs(), 2, Instructions));
645  }
646  }
647 
648 private:
649  AccessesType Accesses;
650 };
651 
652 /// The actual class performing the per-loop work.
653 class LoopDistributeForLoop {
654 public:
655  LoopDistributeForLoop(Loop *L, Function *F, LoopInfo *LI, DominatorTree *DT,
658  : L(L), F(F), LI(LI), DT(DT), SE(SE), LAIs(LAIs), ORE(ORE) {
659  setForced();
660  }
661 
662  /// Try to distribute an inner-most loop.
663  bool processLoop() {
664  assert(L->isInnermost() && "Only process inner loops.");
665 
666  LLVM_DEBUG(dbgs() << "\nLDist: In \""
667  << L->getHeader()->getParent()->getName()
668  << "\" checking " << *L << "\n");
669 
670  // Having a single exit block implies there's also one exiting block.
671  if (!L->getExitBlock())
672  return fail("MultipleExitBlocks", "multiple exit blocks");
673  if (!L->isLoopSimplifyForm())
674  return fail("NotLoopSimplifyForm",
675  "loop is not in loop-simplify form");
676  if (!L->isRotatedForm())
677  return fail("NotBottomTested", "loop is not bottom tested");
678 
679  BasicBlock *PH = L->getLoopPreheader();
680 
681  LAI = &LAIs.getInfo(*L);
682 
683  // Currently, we only distribute to isolate the part of the loop with
684  // dependence cycles to enable partial vectorization.
685  if (LAI->canVectorizeMemory())
686  return fail("MemOpsCanBeVectorized",
687  "memory operations are safe for vectorization");
688 
689  auto *Dependences = LAI->getDepChecker().getDependences();
690  if (!Dependences || Dependences->empty())
691  return fail("NoUnsafeDeps", "no unsafe dependences to isolate");
692 
693  InstPartitionContainer Partitions(L, LI, DT);
694 
695  // First, go through each memory operation and assign them to consecutive
696  // partitions (the order of partitions follows program order). Put those
697  // with unsafe dependences into "cyclic" partition otherwise put each store
698  // in its own "non-cyclic" partition (we'll merge these later).
699  //
700  // Note that a memory operation (e.g. Load2 below) at a program point that
701  // has an unsafe dependence (Store3->Load1) spanning over it must be
702  // included in the same cyclic partition as the dependent operations. This
703  // is to preserve the original program order after distribution. E.g.:
704  //
705  // NumUnsafeDependencesStartOrEnd NumUnsafeDependencesActive
706  // Load1 -. 1 0->1
707  // Load2 | /Unsafe/ 0 1
708  // Store3 -' -1 1->0
709  // Load4 0 0
710  //
711  // NumUnsafeDependencesActive > 0 indicates this situation and in this case
712  // we just keep assigning to the same cyclic partition until
713  // NumUnsafeDependencesActive reaches 0.
714  const MemoryDepChecker &DepChecker = LAI->getDepChecker();
715  MemoryInstructionDependences MID(DepChecker.getMemoryInstructions(),
716  *Dependences);
717 
718  int NumUnsafeDependencesActive = 0;
719  for (const auto &InstDep : MID) {
720  Instruction *I = InstDep.Inst;
721  // We update NumUnsafeDependencesActive post-instruction, catch the
722  // start of a dependence directly via NumUnsafeDependencesStartOrEnd.
723  if (NumUnsafeDependencesActive ||
724  InstDep.NumUnsafeDependencesStartOrEnd > 0)
725  Partitions.addToCyclicPartition(I);
726  else
727  Partitions.addToNewNonCyclicPartition(I);
728  NumUnsafeDependencesActive += InstDep.NumUnsafeDependencesStartOrEnd;
729  assert(NumUnsafeDependencesActive >= 0 &&
730  "Negative number of dependences active");
731  }
732 
733  // Add partitions for values used outside. These partitions can be out of
734  // order from the original program order. This is OK because if the
735  // partition uses a load we will merge this partition with the original
736  // partition of the load that we set up in the previous loop (see
737  // mergeToAvoidDuplicatedLoads).
738  auto DefsUsedOutside = findDefsUsedOutsideOfLoop(L);
739  for (auto *Inst : DefsUsedOutside)
740  Partitions.addToNewNonCyclicPartition(Inst);
741 
742  LLVM_DEBUG(dbgs() << "Seeded partitions:\n" << Partitions);
743  if (Partitions.getSize() < 2)
744  return fail("CantIsolateUnsafeDeps",
745  "cannot isolate unsafe dependencies");
746 
747  // Run the merge heuristics: Merge non-cyclic adjacent partitions since we
748  // should be able to vectorize these together.
749  Partitions.mergeBeforePopulating();
750  LLVM_DEBUG(dbgs() << "\nMerged partitions:\n" << Partitions);
751  if (Partitions.getSize() < 2)
752  return fail("CantIsolateUnsafeDeps",
753  "cannot isolate unsafe dependencies");
754 
755  // Now, populate the partitions with non-memory operations.
756  Partitions.populateUsedSet();
757  LLVM_DEBUG(dbgs() << "\nPopulated partitions:\n" << Partitions);
758 
759  // In order to preserve original lexical order for loads, keep them in the
760  // partition that we set up in the MemoryInstructionDependences loop.
761  if (Partitions.mergeToAvoidDuplicatedLoads()) {
762  LLVM_DEBUG(dbgs() << "\nPartitions merged to ensure unique loads:\n"
763  << Partitions);
764  if (Partitions.getSize() < 2)
765  return fail("CantIsolateUnsafeDeps",
766  "cannot isolate unsafe dependencies");
767  }
768 
769  // Don't distribute the loop if we need too many SCEV run-time checks, or
770  // any if it's illegal.
771  const SCEVPredicate &Pred = LAI->getPSE().getPredicate();
772  if (LAI->hasConvergentOp() && !Pred.isAlwaysTrue()) {
773  return fail("RuntimeCheckWithConvergent",
774  "may not insert runtime check with convergent operation");
775  }
776 
777  if (Pred.getComplexity() > (IsForced.value_or(false)
780  return fail("TooManySCEVRuntimeChecks",
781  "too many SCEV run-time checks needed.\n");
782 
783  if (!IsForced.value_or(false) && hasDisableAllTransformsHint(L))
784  return fail("HeuristicDisabled", "distribution heuristic disabled");
785 
786  LLVM_DEBUG(dbgs() << "\nDistributing loop: " << *L << "\n");
787  // We're done forming the partitions set up the reverse mapping from
788  // instructions to partitions.
789  Partitions.setupPartitionIdOnInstructions();
790 
791  // If we need run-time checks, version the loop now.
792  auto PtrToPartition = Partitions.computePartitionSetForPointers(*LAI);
793  const auto *RtPtrChecking = LAI->getRuntimePointerChecking();
794  const auto &AllChecks = RtPtrChecking->getChecks();
795  auto Checks = includeOnlyCrossPartitionChecks(AllChecks, PtrToPartition,
796  RtPtrChecking);
797 
798  if (LAI->hasConvergentOp() && !Checks.empty()) {
799  return fail("RuntimeCheckWithConvergent",
800  "may not insert runtime check with convergent operation");
801  }
802 
803  // To keep things simple have an empty preheader before we version or clone
804  // the loop. (Also split if this has no predecessor, i.e. entry, because we
805  // rely on PH having a predecessor.)
806  if (!PH->getSinglePredecessor() || &*PH->begin() != PH->getTerminator())
807  SplitBlock(PH, PH->getTerminator(), DT, LI);
808 
809  if (!Pred.isAlwaysTrue() || !Checks.empty()) {
810  assert(!LAI->hasConvergentOp() && "inserting illegal loop versioning");
811 
812  MDNode *OrigLoopID = L->getLoopID();
813 
814  LLVM_DEBUG(dbgs() << "\nPointers:\n");
816  LoopVersioning LVer(*LAI, Checks, L, LI, DT, SE);
817  LVer.versionLoop(DefsUsedOutside);
818  LVer.annotateLoopWithNoAlias();
819 
820  // The unversioned loop will not be changed, so we inherit all attributes
821  // from the original loop, but remove the loop distribution metadata to
822  // avoid to distribute it again.
823  MDNode *UnversionedLoopID =
824  makeFollowupLoopID(OrigLoopID,
827  "llvm.loop.distribute.", true)
828  .value();
829  LVer.getNonVersionedLoop()->setLoopID(UnversionedLoopID);
830  }
831 
832  // Create identical copies of the original loop for each partition and hook
833  // them up sequentially.
834  Partitions.cloneLoops();
835 
836  // Now, we remove the instruction from each loop that don't belong to that
837  // partition.
838  Partitions.removeUnusedInsts();
839  LLVM_DEBUG(dbgs() << "\nAfter removing unused Instrs:\n");
840  LLVM_DEBUG(Partitions.printBlocks());
841 
842  if (LDistVerify) {
843  LI->verify(*DT);
845  }
846 
847  ++NumLoopsDistributed;
848  // Report the success.
849  ORE->emit([&]() {
850  return OptimizationRemark(LDIST_NAME, "Distribute", L->getStartLoc(),
851  L->getHeader())
852  << "distributed loop";
853  });
854  return true;
855  }
856 
857  /// Provide diagnostics then \return with false.
858  bool fail(StringRef RemarkName, StringRef Message) {
859  LLVMContext &Ctx = F->getContext();
860  bool Forced = isForced().value_or(false);
861 
862  LLVM_DEBUG(dbgs() << "Skipping; " << Message << "\n");
863 
864  // With Rpass-missed report that distribution failed.
865  ORE->emit([&]() {
866  return OptimizationRemarkMissed(LDIST_NAME, "NotDistributed",
867  L->getStartLoc(), L->getHeader())
868  << "loop not distributed: use -Rpass-analysis=loop-distribute for "
869  "more "
870  "info";
871  });
872 
873  // With Rpass-analysis report why. This is on by default if distribution
874  // was requested explicitly.
875  ORE->emit(OptimizationRemarkAnalysis(
877  RemarkName, L->getStartLoc(), L->getHeader())
878  << "loop not distributed: " << Message);
879 
880  // Also issue a warning if distribution was requested explicitly but it
881  // failed.
882  if (Forced)
884  *F, L->getStartLoc(), "loop not distributed: failed "
885  "explicitly specified loop distribution"));
886 
887  return false;
888  }
889 
890  /// Return if distribution forced to be enabled/disabled for the loop.
891  ///
892  /// If the optional has a value, it indicates whether distribution was forced
893  /// to be enabled (true) or disabled (false). If the optional has no value
894  /// distribution was not forced either way.
895  const Optional<bool> &isForced() const { return IsForced; }
896 
897 private:
898  /// Filter out checks between pointers from the same partition.
899  ///
900  /// \p PtrToPartition contains the partition number for pointers. Partition
901  /// number -1 means that the pointer is used in multiple partitions. In this
902  /// case we can't safely omit the check.
903  SmallVector<RuntimePointerCheck, 4> includeOnlyCrossPartitionChecks(
904  const SmallVectorImpl<RuntimePointerCheck> &AllChecks,
905  const SmallVectorImpl<int> &PtrToPartition,
906  const RuntimePointerChecking *RtPtrChecking) {
908 
909  copy_if(AllChecks, std::back_inserter(Checks),
910  [&](const RuntimePointerCheck &Check) {
911  for (unsigned PtrIdx1 : Check.first->Members)
912  for (unsigned PtrIdx2 : Check.second->Members)
913  // Only include this check if there is a pair of pointers
914  // that require checking and the pointers fall into
915  // separate partitions.
916  //
917  // (Note that we already know at this point that the two
918  // pointer groups need checking but it doesn't follow
919  // that each pair of pointers within the two groups need
920  // checking as well.
921  //
922  // In other words we don't want to include a check just
923  // because there is a pair of pointers between the two
924  // pointer groups that require checks and a different
925  // pair whose pointers fall into different partitions.)
926  if (RtPtrChecking->needsChecking(PtrIdx1, PtrIdx2) &&
928  PtrToPartition, PtrIdx1, PtrIdx2))
929  return true;
930  return false;
931  });
932 
933  return Checks;
934  }
935 
936  /// Check whether the loop metadata is forcing distribution to be
937  /// enabled/disabled.
938  void setForced() {
940  findStringMetadataForLoop(L, "llvm.loop.distribute.enable");
941  if (!Value)
942  return;
943 
944  const MDOperand *Op = *Value;
945  assert(Op && mdconst::hasa<ConstantInt>(*Op) && "invalid metadata");
946  IsForced = mdconst::extract<ConstantInt>(*Op)->getZExtValue();
947  }
948 
949  Loop *L;
950  Function *F;
951 
952  // Analyses used.
953  LoopInfo *LI;
954  const LoopAccessInfo *LAI = nullptr;
955  DominatorTree *DT;
956  ScalarEvolution *SE;
957  LoopAccessInfoManager &LAIs;
959 
960  /// Indicates whether distribution is forced to be enabled/disabled for
961  /// the loop.
962  ///
963  /// If the optional has a value, it indicates whether distribution was forced
964  /// to be enabled (true) or disabled (false). If the optional has no value
965  /// distribution was not forced either way.
966  Optional<bool> IsForced;
967 };
968 
969 } // end anonymous namespace
970 
971 /// Shared implementation between new and old PMs.
972 static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT,
974  LoopAccessInfoManager &LAIs) {
975  // Build up a worklist of inner-loops to vectorize. This is necessary as the
976  // act of distributing a loop creates new loops and can invalidate iterators
977  // across the loops.
978  SmallVector<Loop *, 8> Worklist;
979 
980  for (Loop *TopLevelLoop : *LI)
981  for (Loop *L : depth_first(TopLevelLoop))
982  // We only handle inner-most loops.
983  if (L->isInnermost())
984  Worklist.push_back(L);
985 
986  // Now walk the identified inner loops.
987  bool Changed = false;
988  for (Loop *L : Worklist) {
989  LoopDistributeForLoop LDL(L, &F, LI, DT, SE, LAIs, ORE);
990 
991  // If distribution was forced for the specific loop to be
992  // enabled/disabled, follow that. Otherwise use the global flag.
993  if (LDL.isForced().value_or(EnableLoopDistribute))
994  Changed |= LDL.processLoop();
995  }
996 
997  // Process each loop nest in the function.
998  return Changed;
999 }
1000 
1001 namespace {
1002 
1003 /// The pass class.
1004 class LoopDistributeLegacy : public FunctionPass {
1005 public:
1006  static char ID;
1007 
1008  LoopDistributeLegacy() : FunctionPass(ID) {
1009  // The default is set by the caller.
1011  }
1012 
1013  bool runOnFunction(Function &F) override {
1014  if (skipFunction(F))
1015  return false;
1016 
1017  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1018  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1019  auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1020  auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
1021  auto &LAIs = getAnalysis<LoopAccessLegacyAnalysis>().getLAIs();
1022 
1023  return runImpl(F, LI, DT, SE, ORE, LAIs);
1024  }
1025 
1026  void getAnalysisUsage(AnalysisUsage &AU) const override {
1035  }
1036 };
1037 
1038 } // end anonymous namespace
1039 
1042  auto &LI = AM.getResult<LoopAnalysis>(F);
1043  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1044  auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
1046 
1048  bool Changed = runImpl(F, &LI, &DT, &SE, &ORE, LAIs);
1049  if (!Changed)
1050  return PreservedAnalyses::all();
1051  PreservedAnalyses PA;
1052  PA.preserve<LoopAnalysis>();
1054  return PA;
1055 }
1056 
1058 
1059 static const char ldist_name[] = "Loop Distribution";
1060 
1061 INITIALIZE_PASS_BEGIN(LoopDistributeLegacy, LDIST_NAME, ldist_name, false,
1062  false)
1068 INITIALIZE_PASS_END(LoopDistributeLegacy, LDIST_NAME, ldist_name, false, false)
1069 
1070 FunctionPass *llvm::createLoopDistributePass() { return new LoopDistributeLegacy(); }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
AssumptionCache.h
llvm::MemoryDepChecker
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
Definition: LoopAccessAnalysis.h:87
llvm::OptimizationRemarkMissed
Diagnostic information for missed-optimization remarks.
Definition: DiagnosticInfo.h:734
DistributeSCEVCheckThreshold
static cl::opt< unsigned > DistributeSCEVCheckThreshold("loop-distribute-scev-check-threshold", cl::init(8), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed for Loop " "Distribution"))
llvm::ScalarEvolutionAnalysis
Analysis pass that exposes the ScalarEvolution for a function.
Definition: ScalarEvolution.h:2158
llvm::LoopAccessLegacyAnalysis
This analysis provides dependence information for the memory accesses of a loop.
Definition: LoopAccessAnalysis.h:796
llvm::minidump::StreamType::Unused
@ Unused
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::sys::path::const_iterator::end
friend const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
llvm::RuntimePointerCheck
std::pair< const RuntimeCheckingPtrGroup *, const RuntimeCheckingPtrGroup * > RuntimePointerCheck
A memcheck which made up of a pair of grouped pointers.
Definition: LoopAccessAnalysis.h:369
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:387
llvm::LoopBase::getExitBlock
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:107
Optional.h
llvm::PredicatedScalarEvolution::getPredicate
const SCEVPredicate & getPredicate() const
Definition: ScalarEvolution.cpp:14603
ValueMapper.h
llvm::LoopAccessInfoManager
Definition: LoopAccessAnalysis.h:768
llvm::AArch64PACKey::ID
ID
Definition: AArch64BaseInfo.h:818
Metadata.h
llvm::cloneLoopWithPreheader
Loop * cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB, Loop *OrigLoop, ValueToValueMapTy &VMap, const Twine &NameSuffix, LoopInfo *LI, DominatorTree *DT, SmallVectorImpl< BasicBlock * > &Blocks)
Clones a loop OrigLoop.
Definition: CloneFunction.cpp:940
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
Scalar.h
llvm::hasDisableAllTransformsHint
bool hasDisableAllTransformsHint(const Loop *L)
Look for the loop attribute that disables all transformation heuristic.
Definition: LoopUtils.cpp:345
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
StringRef.h
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::MipsISD::LDL
@ LDL
Definition: MipsISelLowering.h:252
llvm::SmallVector< Instruction *, 8 >
llvm::LoopInfoBase::verify
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
Definition: LoopInfoImpl.h:699
Statistic.h
llvm::EquivalenceClasses
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
Definition: EquivalenceClasses.h:60
LoopAccessAnalysis.h
llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:631
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
llvm::makeFollowupLoopID
Optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Definition: LoopUtils.cpp:264
llvm::Dependence
Dependence - This class represents a dependence between two memory memory references in a function.
Definition: DependenceAnalysis.h:71
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
GlobalsModRef.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::LoopAccessAnalysis
This analysis provides dependence information for the memory accesses of a loop.
Definition: LoopAccessAnalysis.h:828
ScalarEvolution.h
llvm::findDefsUsedOutsideOfLoop
SmallVector< Instruction *, 8 > findDefsUsedOutsideOfLoop(Loop *L)
Returns the instructions that use values defined in the loop.
Definition: LoopUtils.cpp:124
DenseMap.h
Instructions
Code Generation Notes for reduce the size of the ISel and reduce repetition in the implementation In a small number of this can cause even when no optimisation has taken place Instructions
Definition: MSA.txt:11
llvm::RuntimePointerChecking::needsChecking
bool needsChecking(const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
Definition: LoopAccessAnalysis.cpp:358
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
PragmaDistributeSCEVCheckThreshold
static cl::opt< unsigned > PragmaDistributeSCEVCheckThreshold("loop-distribute-scev-check-threshold-with-pragma", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed for Loop " "Distribution for loop marked with #pragma loop distribute(enable)"))
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1293
llvm::Optional
Definition: APInt.h:33
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
LLVMLoopDistributeFollowupFallback
static const char *const LLVMLoopDistributeFollowupFallback
Definition: LoopDistribute.cpp:87
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:877
llvm::BasicBlock::getSinglePredecessor
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:285
STLExtras.h
LLVMLoopDistributeFollowupAll
static const char *const LLVMLoopDistributeFollowupAll
Definition: LoopDistribute.cpp:81
llvm::Sched::Fast
@ Fast
Definition: TargetLowering.h:104
llvm::initializeLoopDistributeLegacyPass
void initializeLoopDistributeLegacyPass(PassRegistry &)
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
DepthFirstIterator.h
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::SCEVPredicate
This class represents an assumption made using SCEV expressions which can be checked at run-time.
Definition: ScalarEvolution.h:215
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
LoopAnalysisManager.h
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Instruction.h
CommandLine.h
ldist_name
static const char ldist_name[]
Definition: LoopDistribute.cpp:1059
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
Constants.h
EnableLoopDistribute
static cl::opt< bool > EnableLoopDistribute("enable-loop-distribute", cl::Hidden, cl::desc("Enable the new, experimental LoopDistribution Pass"), cl::init(false))
llvm::LoopAccessInfo::blockNeedsPredication
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Definition: LoopAccessAnalysis.cpp:2518
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Twine.h
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:306
llvm::remapInstructionsInBlocks
void remapInstructionsInBlocks(const SmallVectorImpl< BasicBlock * > &Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
Definition: CloneFunction.cpp:926
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
Check
#define Check(C,...)
Definition: Lint.cpp:170
TargetLibraryInfo.h
false
Definition: StackSlotColoring.cpp:141
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::Instruction
Definition: Instruction.h:42
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:302
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
llvm::operator<<
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:291
LoopUtils.h
llvm::ScalarEvolutionWrapperPass
Definition: ScalarEvolution.h:2188
SmallPtrSet.h
llvm::createLoopDistributePass
FunctionPass * createLoopDistributePass()
llvm::DominatorTreeBase::changeImmediateDominator
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
Definition: GenericDomTree.h:655
llvm::MemoryDepChecker::getDependences
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
Definition: LoopAccessAnalysis.h:221
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::LoopAccessInfo::hasConvergentOp
bool hasConvergentOp() const
Return true if there is a convergent operation in the loop.
Definition: LoopAccessAnalysis.h:574
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
LoopInfo.h
DistributeNonIfConvertible
static cl::opt< bool > DistributeNonIfConvertible("loop-distribute-non-if-convertible", cl::Hidden, cl::desc("Whether to distribute into a loop that may not be " "if-convertible by the loop vectorizer"), cl::init(false))
LDIST_NAME
#define LDIST_NAME
Definition: LoopDistribute.cpp:76
llvm::findStringMetadataForLoop
Optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
Definition: LoopInfo.cpp:1052
llvm::Loop::isLoopSimplifyForm
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to,...
Definition: LoopInfo.cpp:479
BasicBlock.h
llvm::cl::opt< bool >
llvm::PPC::Predicate
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
llvm::RuntimePointerChecking
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
Definition: LoopAccessAnalysis.h:385
Index
uint32_t Index
Definition: ELFObjHandler.cpp:83
llvm::MemoryDepChecker::Dependence
Dependece between memory access instructions.
Definition: LoopAccessAnalysis.h:108
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:173
llvm::LoopVersioning
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Definition: LoopVersioning.h:41
llvm::ARM_AM::add
@ add
Definition: ARMAddressingModes.h:39
llvm::LoopAccessInfo::getInstructionsForAccess
SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Return the list of instructions that use Ptr to read or write memory.
Definition: LoopAccessAnalysis.h:608
llvm::RuntimePointerChecking::getChecks
const SmallVectorImpl< RuntimePointerCheck > & getChecks() const
Returns the checks that generateChecks created.
Definition: LoopAccessAnalysis.h:447
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
llvm::DenseMap
Definition: DenseMap.h:714
fail
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg)
Definition: BPFISelLowering.cpp:38
llvm::LoopAccessInfo::getRuntimePointerChecking
const RuntimePointerChecking * getRuntimePointerChecking() const
Definition: LoopAccessAnalysis.h:576
I
#define I(x, y, z)
Definition: MD5.cpp:58
Cloning.h
runImpl
static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE, LoopAccessInfoManager &LAIs)
Shared implementation between new and old PMs.
Definition: LoopDistribute.cpp:972
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:447
llvm::LoopAccessInfo
Drive the analysis of memory accesses in the loop.
Definition: LoopAccessAnalysis.h:562
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:183
llvm::sys::path::const_iterator::begin
friend const_iterator begin(StringRef path, Style style)
Get begin iterator over path.
Definition: Path.cpp:226
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MemoryDepChecker::getMemoryInstructions
const SmallVectorImpl< Instruction * > & getMemoryInstructions() const
The vector of memory access instructions.
Definition: LoopAccessAnalysis.h:229
llvm::LoopDistributePass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: LoopDistribute.cpp:1040
llvm::LoopAccessInfo::canVectorizeMemory
bool canVectorizeMemory() const
Return true we can analyze the memory accesses in the loop and there are no memory dependence cycles.
Definition: LoopAccessAnalysis.h:569
llvm::SCEVPredicate::isAlwaysTrue
virtual bool isAlwaysTrue() const =0
Returns true if the predicate is always true.
Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:60
iterator_range.h
llvm::MDNode
Metadata node.
Definition: Metadata.h:944
llvm::print
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
Definition: GCNRegPressure.cpp:138
llvm::LoopAccessInfo::getDepChecker
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
Definition: LoopAccessAnalysis.h:604
LLVMLoopDistributeFollowupSequential
static const char *const LLVMLoopDistributeFollowupSequential
Definition: LoopDistribute.cpp:85
llvm::LoopInfo
Definition: LoopInfo.h:1108
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::SCEVPredicate::getComplexity
virtual unsigned getComplexity() const
Returns the estimated complexity of this predicate.
Definition: ScalarEvolution.h:238
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
if
if(llvm_vc STREQUAL "") set(fake_version_inc "$
Definition: CMakeLists.txt:14
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::OptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition: DiagnosticInfo.h:780
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
llvm::ValueMap< const Value *, WeakTrackingVH >
for
this could be done in SelectionDAGISel along with other special for
Definition: README.txt:104
llvm::User::replaceUsesOfWith
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
llvm::depth_first
iterator_range< df_iterator< T > > depth_first(const T &G)
Definition: DepthFirstIterator.h:230
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:85
llvm::RuntimePointerChecking::Pointers
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
Definition: LoopAccessAnalysis.h:483
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:182
llvm::RuntimePointerChecking::arePointersInSamePartition
static bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)
Check if pointers are in the same partition.
Definition: LoopAccessAnalysis.cpp:546
llvm::LLVMContext::diagnose
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Definition: LLVMContext.cpp:248
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
llvm::OptimizationRemarkEmitterWrapperPass
OptimizationRemarkEmitter legacy analysis pass.
Definition: OptimizationRemarkEmitter.h:146
llvm::copy_if
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1780
llvm::Loop::setLoopID
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:525
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:348
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
LDistVerify
static cl::opt< bool > LDistVerify("loop-distribute-verify", cl::Hidden, cl::desc("Turn on DominatorTree and LoopInfo verification " "after Loop Distribution"), cl::init(false))
llvm::RuntimePointerChecking::printChecks
void printChecks(raw_ostream &OS, const SmallVectorImpl< RuntimePointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
Definition: LoopAccessAnalysis.cpp:572
llvm::Loop::getLoopID
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:501
Casting.h
DiagnosticInfo.h
Function.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:105
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
PassManager.h
EquivalenceClasses.h
llvm::DiagnosticInfoOptimizationFailure
Diagnostic information for optimization failures.
Definition: DiagnosticInfo.h:976
llvm::OptimizationRemarkAnalysis::AlwaysPrint
static const char * AlwaysPrint
Definition: DiagnosticInfo.h:820
LoopVersioning.h
LoopDistribute.h
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(LoopDistributeLegacy, LDIST_NAME, ldist_name, false, false) FunctionPass *llvm
Definition: LoopDistribute.cpp:1061
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:267
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:689
Instructions.h
SmallVector.h
Dominators.h
LLVMLoopDistributeFollowupCoincident
static const char *const LLVMLoopDistributeFollowupCoincident
Definition: LoopDistribute.cpp:83
N
#define N
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::GlobalsAAWrapperPass
Legacy wrapper pass to provide the GlobalsAAResult object.
Definition: GlobalsModRef.h:142
llvm::DominatorTreeBase::verify
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
Definition: GenericDomTree.h:802
TargetTransformInfo.h
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:119
llvm::Optional::value
constexpr const T & value() const &
Definition: Optional.h:281
llvm::SmallVectorImpl< Instruction * >
llvm::reverse
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:485
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
LLVMContext.h
llvm::Loop::isRotatedForm
bool isRotatedForm() const
Return true if the loop is in rotated form.
Definition: LoopInfo.h:810
llvm::cl::desc
Definition: CommandLine.h:413
llvm::LoopAccessInfo::getPSE
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
Definition: LoopAccessAnalysis.h:639
raw_ostream.h
BasicBlockUtils.h
llvm::SplitBlock
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Definition: BasicBlockUtils.cpp:918
Value.h
InitializePasses.h
llvm::OptimizationRemarkEmitterAnalysis
Definition: OptimizationRemarkEmitter.h:164
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::MDOperand
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:773
llvm::LoopAnalysis
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1268
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1251
llvm::PoisonValue::get
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1732