LLVM  10.0.0svn
LoopDistribute.cpp
Go to the documentation of this file.
1 //===- LoopDistribute.cpp - Loop Distribution Pass ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the Loop Distribution Pass. Its main focus is to
10 // distribute loops that cannot be vectorized due to dependence cycles. It
11 // tries to isolate the offending dependences into a new loop allowing
12 // vectorization of the remaining parts.
13 //
14 // For dependence analysis, the pass uses the LoopVectorizer's
15 // LoopAccessAnalysis. Because this analysis presumes no change in the order of
16 // memory operations, special care is taken to preserve the lexical order of
17 // these operations.
18 //
19 // Similarly to the Vectorizer, the pass also supports loop versioning to
20 // run-time disambiguate potentially overlapping arrays.
21 //
22 //===----------------------------------------------------------------------===//
23 
25 #include "llvm/ADT/DenseMap.h"
28 #include "llvm/ADT/Optional.h"
29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/ADT/SmallPtrSet.h"
31 #include "llvm/ADT/SmallVector.h"
32 #include "llvm/ADT/Statistic.h"
33 #include "llvm/ADT/StringRef.h"
34 #include "llvm/ADT/Twine.h"
41 #include "llvm/Analysis/LoopInfo.h"
46 #include "llvm/IR/BasicBlock.h"
47 #include "llvm/IR/Constants.h"
48 #include "llvm/IR/DiagnosticInfo.h"
49 #include "llvm/IR/Dominators.h"
50 #include "llvm/IR/Function.h"
51 #include "llvm/IR/InstrTypes.h"
52 #include "llvm/IR/Instruction.h"
53 #include "llvm/IR/Instructions.h"
54 #include "llvm/IR/LLVMContext.h"
55 #include "llvm/IR/Metadata.h"
56 #include "llvm/IR/PassManager.h"
57 #include "llvm/IR/Value.h"
58 #include "llvm/Pass.h"
59 #include "llvm/Support/Casting.h"
61 #include "llvm/Support/Debug.h"
63 #include "llvm/Transforms/Scalar.h"
69 #include <cassert>
70 #include <functional>
71 #include <list>
72 #include <tuple>
73 #include <utility>
74 
75 using namespace llvm;
76 
77 #define LDIST_NAME "loop-distribute"
78 #define DEBUG_TYPE LDIST_NAME
79 
80 /// @{
81 /// Metadata attribute names
82 static const char *const LLVMLoopDistributeFollowupAll =
83  "llvm.loop.distribute.followup_all";
84 static const char *const LLVMLoopDistributeFollowupCoincident =
85  "llvm.loop.distribute.followup_coincident";
86 static const char *const LLVMLoopDistributeFollowupSequential =
87  "llvm.loop.distribute.followup_sequential";
88 static const char *const LLVMLoopDistributeFollowupFallback =
89  "llvm.loop.distribute.followup_fallback";
90 /// @}
91 
92 static cl::opt<bool>
93  LDistVerify("loop-distribute-verify", cl::Hidden,
94  cl::desc("Turn on DominatorTree and LoopInfo verification "
95  "after Loop Distribution"),
96  cl::init(false));
97 
99  "loop-distribute-non-if-convertible", cl::Hidden,
100  cl::desc("Whether to distribute into a loop that may not be "
101  "if-convertible by the loop vectorizer"),
102  cl::init(false));
103 
105  "loop-distribute-scev-check-threshold", cl::init(8), cl::Hidden,
106  cl::desc("The maximum number of SCEV checks allowed for Loop "
107  "Distribution"));
108 
110  "loop-distribute-scev-check-threshold-with-pragma", cl::init(128),
111  cl::Hidden,
112  cl::desc(
113  "The maximum number of SCEV checks allowed for Loop "
114  "Distribution for loop marked with #pragma loop distribute(enable)"));
115 
117  "enable-loop-distribute", cl::Hidden,
118  cl::desc("Enable the new, experimental LoopDistribution Pass"),
119  cl::init(false));
120 
121 STATISTIC(NumLoopsDistributed, "Number of loops distributed");
122 
123 namespace {
124 
125 /// Maintains the set of instructions of the loop for a partition before
126 /// cloning. After cloning, it hosts the new loop.
127 class InstPartition {
128  using InstructionSet = SmallPtrSet<Instruction *, 8>;
129 
130 public:
131  InstPartition(Instruction *I, Loop *L, bool DepCycle = false)
132  : DepCycle(DepCycle), OrigLoop(L) {
133  Set.insert(I);
134  }
135 
136  /// Returns whether this partition contains a dependence cycle.
137  bool hasDepCycle() const { return DepCycle; }
138 
139  /// Adds an instruction to this partition.
140  void add(Instruction *I) { Set.insert(I); }
141 
142  /// Collection accessors.
143  InstructionSet::iterator begin() { return Set.begin(); }
144  InstructionSet::iterator end() { return Set.end(); }
145  InstructionSet::const_iterator begin() const { return Set.begin(); }
146  InstructionSet::const_iterator end() const { return Set.end(); }
147  bool empty() const { return Set.empty(); }
148 
149  /// Moves this partition into \p Other. This partition becomes empty
150  /// after this.
151  void moveTo(InstPartition &Other) {
152  Other.Set.insert(Set.begin(), Set.end());
153  Set.clear();
154  Other.DepCycle |= DepCycle;
155  }
156 
157  /// Populates the partition with a transitive closure of all the
158  /// instructions that the seeded instructions dependent on.
159  void populateUsedSet() {
160  // FIXME: We currently don't use control-dependence but simply include all
161  // blocks (possibly empty at the end) and let simplifycfg mostly clean this
162  // up.
163  for (auto *B : OrigLoop->getBlocks())
164  Set.insert(B->getTerminator());
165 
166  // Follow the use-def chains to form a transitive closure of all the
167  // instructions that the originally seeded instructions depend on.
168  SmallVector<Instruction *, 8> Worklist(Set.begin(), Set.end());
169  while (!Worklist.empty()) {
170  Instruction *I = Worklist.pop_back_val();
171  // Insert instructions from the loop that we depend on.
172  for (Value *V : I->operand_values()) {
173  auto *I = dyn_cast<Instruction>(V);
174  if (I && OrigLoop->contains(I->getParent()) && Set.insert(I).second)
175  Worklist.push_back(I);
176  }
177  }
178  }
179 
180  /// Clones the original loop.
181  ///
182  /// Updates LoopInfo and DominatorTree using the information that block \p
183  /// LoopDomBB dominates the loop.
184  Loop *cloneLoopWithPreheader(BasicBlock *InsertBefore, BasicBlock *LoopDomBB,
185  unsigned Index, LoopInfo *LI,
186  DominatorTree *DT) {
187  ClonedLoop = ::cloneLoopWithPreheader(InsertBefore, LoopDomBB, OrigLoop,
188  VMap, Twine(".ldist") + Twine(Index),
189  LI, DT, ClonedLoopBlocks);
190  return ClonedLoop;
191  }
192 
193  /// The cloned loop. If this partition is mapped to the original loop,
194  /// this is null.
195  const Loop *getClonedLoop() const { return ClonedLoop; }
196 
197  /// Returns the loop where this partition ends up after distribution.
198  /// If this partition is mapped to the original loop then use the block from
199  /// the loop.
200  Loop *getDistributedLoop() const {
201  return ClonedLoop ? ClonedLoop : OrigLoop;
202  }
203 
204  /// The VMap that is populated by cloning and then used in
205  /// remapinstruction to remap the cloned instructions.
206  ValueToValueMapTy &getVMap() { return VMap; }
207 
208  /// Remaps the cloned instructions using VMap.
209  void remapInstructions() {
210  remapInstructionsInBlocks(ClonedLoopBlocks, VMap);
211  }
212 
213  /// Based on the set of instructions selected for this partition,
214  /// removes the unnecessary ones.
215  void removeUnusedInsts() {
217 
218  for (auto *Block : OrigLoop->getBlocks())
219  for (auto &Inst : *Block)
220  if (!Set.count(&Inst)) {
221  Instruction *NewInst = &Inst;
222  if (!VMap.empty())
223  NewInst = cast<Instruction>(VMap[NewInst]);
224 
225  assert(!isa<BranchInst>(NewInst) &&
226  "Branches are marked used early on");
227  Unused.push_back(NewInst);
228  }
229 
230  // Delete the instructions backwards, as it has a reduced likelihood of
231  // having to update as many def-use and use-def chains.
232  for (auto *Inst : reverse(Unused)) {
233  if (!Inst->use_empty())
234  Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
235  Inst->eraseFromParent();
236  }
237  }
238 
239  void print() const {
240  if (DepCycle)
241  dbgs() << " (cycle)\n";
242  for (auto *I : Set)
243  // Prefix with the block name.
244  dbgs() << " " << I->getParent()->getName() << ":" << *I << "\n";
245  }
246 
247  void printBlocks() const {
248  for (auto *BB : getDistributedLoop()->getBlocks())
249  dbgs() << *BB;
250  }
251 
252 private:
253  /// Instructions from OrigLoop selected for this partition.
254  InstructionSet Set;
255 
256  /// Whether this partition contains a dependence cycle.
257  bool DepCycle;
258 
259  /// The original loop.
260  Loop *OrigLoop;
261 
262  /// The cloned loop. If this partition is mapped to the original loop,
263  /// this is null.
264  Loop *ClonedLoop = nullptr;
265 
266  /// The blocks of ClonedLoop including the preheader. If this
267  /// partition is mapped to the original loop, this is empty.
268  SmallVector<BasicBlock *, 8> ClonedLoopBlocks;
269 
270  /// These gets populated once the set of instructions have been
271  /// finalized. If this partition is mapped to the original loop, these are not
272  /// set.
273  ValueToValueMapTy VMap;
274 };
275 
276 /// Holds the set of Partitions. It populates them, merges them and then
277 /// clones the loops.
278 class InstPartitionContainer {
279  using InstToPartitionIdT = DenseMap<Instruction *, int>;
280 
281 public:
282  InstPartitionContainer(Loop *L, LoopInfo *LI, DominatorTree *DT)
283  : L(L), LI(LI), DT(DT) {}
284 
285  /// Returns the number of partitions.
286  unsigned getSize() const { return PartitionContainer.size(); }
287 
288  /// Adds \p Inst into the current partition if that is marked to
289  /// contain cycles. Otherwise start a new partition for it.
290  void addToCyclicPartition(Instruction *Inst) {
291  // If the current partition is non-cyclic. Start a new one.
292  if (PartitionContainer.empty() || !PartitionContainer.back().hasDepCycle())
293  PartitionContainer.emplace_back(Inst, L, /*DepCycle=*/true);
294  else
295  PartitionContainer.back().add(Inst);
296  }
297 
298  /// Adds \p Inst into a partition that is not marked to contain
299  /// dependence cycles.
300  ///
301  // Initially we isolate memory instructions into as many partitions as
302  // possible, then later we may merge them back together.
303  void addToNewNonCyclicPartition(Instruction *Inst) {
304  PartitionContainer.emplace_back(Inst, L);
305  }
306 
307  /// Merges adjacent non-cyclic partitions.
308  ///
309  /// The idea is that we currently only want to isolate the non-vectorizable
310  /// partition. We could later allow more distribution among these partition
311  /// too.
312  void mergeAdjacentNonCyclic() {
313  mergeAdjacentPartitionsIf(
314  [](const InstPartition *P) { return !P->hasDepCycle(); });
315  }
316 
317  /// If a partition contains only conditional stores, we won't vectorize
318  /// it. Try to merge it with a previous cyclic partition.
319  void mergeNonIfConvertible() {
320  mergeAdjacentPartitionsIf([&](const InstPartition *Partition) {
321  if (Partition->hasDepCycle())
322  return true;
323 
324  // Now, check if all stores are conditional in this partition.
325  bool seenStore = false;
326 
327  for (auto *Inst : *Partition)
328  if (isa<StoreInst>(Inst)) {
329  seenStore = true;
331  return false;
332  }
333  return seenStore;
334  });
335  }
336 
337  /// Merges the partitions according to various heuristics.
338  void mergeBeforePopulating() {
339  mergeAdjacentNonCyclic();
341  mergeNonIfConvertible();
342  }
343 
344  /// Merges partitions in order to ensure that no loads are duplicated.
345  ///
346  /// We can't duplicate loads because that could potentially reorder them.
347  /// LoopAccessAnalysis provides dependency information with the context that
348  /// the order of memory operation is preserved.
349  ///
350  /// Return if any partitions were merged.
351  bool mergeToAvoidDuplicatedLoads() {
352  using LoadToPartitionT = DenseMap<Instruction *, InstPartition *>;
353  using ToBeMergedT = EquivalenceClasses<InstPartition *>;
354 
355  LoadToPartitionT LoadToPartition;
356  ToBeMergedT ToBeMerged;
357 
358  // Step through the partitions and create equivalence between partitions
359  // that contain the same load. Also put partitions in between them in the
360  // same equivalence class to avoid reordering of memory operations.
361  for (PartitionContainerT::iterator I = PartitionContainer.begin(),
362  E = PartitionContainer.end();
363  I != E; ++I) {
364  auto *PartI = &*I;
365 
366  // If a load occurs in two partitions PartI and PartJ, merge all
367  // partitions (PartI, PartJ] into PartI.
368  for (Instruction *Inst : *PartI)
369  if (isa<LoadInst>(Inst)) {
370  bool NewElt;
371  LoadToPartitionT::iterator LoadToPart;
372 
373  std::tie(LoadToPart, NewElt) =
374  LoadToPartition.insert(std::make_pair(Inst, PartI));
375  if (!NewElt) {
376  LLVM_DEBUG(dbgs()
377  << "Merging partitions due to this load in multiple "
378  << "partitions: " << PartI << ", " << LoadToPart->second
379  << "\n"
380  << *Inst << "\n");
381 
382  auto PartJ = I;
383  do {
384  --PartJ;
385  ToBeMerged.unionSets(PartI, &*PartJ);
386  } while (&*PartJ != LoadToPart->second);
387  }
388  }
389  }
390  if (ToBeMerged.empty())
391  return false;
392 
393  // Merge the member of an equivalence class into its class leader. This
394  // makes the members empty.
395  for (ToBeMergedT::iterator I = ToBeMerged.begin(), E = ToBeMerged.end();
396  I != E; ++I) {
397  if (!I->isLeader())
398  continue;
399 
400  auto PartI = I->getData();
401  for (auto PartJ : make_range(std::next(ToBeMerged.member_begin(I)),
402  ToBeMerged.member_end())) {
403  PartJ->moveTo(*PartI);
404  }
405  }
406 
407  // Remove the empty partitions.
408  PartitionContainer.remove_if(
409  [](const InstPartition &P) { return P.empty(); });
410 
411  return true;
412  }
413 
414  /// Sets up the mapping between instructions to partitions. If the
415  /// instruction is duplicated across multiple partitions, set the entry to -1.
416  void setupPartitionIdOnInstructions() {
417  int PartitionID = 0;
418  for (const auto &Partition : PartitionContainer) {
419  for (Instruction *Inst : Partition) {
420  bool NewElt;
421  InstToPartitionIdT::iterator Iter;
422 
423  std::tie(Iter, NewElt) =
424  InstToPartitionId.insert(std::make_pair(Inst, PartitionID));
425  if (!NewElt)
426  Iter->second = -1;
427  }
428  ++PartitionID;
429  }
430  }
431 
432  /// Populates the partition with everything that the seeding
433  /// instructions require.
434  void populateUsedSet() {
435  for (auto &P : PartitionContainer)
436  P.populateUsedSet();
437  }
438 
439  /// This performs the main chunk of the work of cloning the loops for
440  /// the partitions.
441  void cloneLoops() {
442  BasicBlock *OrigPH = L->getLoopPreheader();
443  // At this point the predecessor of the preheader is either the memcheck
444  // block or the top part of the original preheader.
445  BasicBlock *Pred = OrigPH->getSinglePredecessor();
446  assert(Pred && "Preheader does not have a single predecessor");
447  BasicBlock *ExitBlock = L->getExitBlock();
448  assert(ExitBlock && "No single exit block");
449  Loop *NewLoop;
450 
451  assert(!PartitionContainer.empty() && "at least two partitions expected");
452  // We're cloning the preheader along with the loop so we already made sure
453  // it was empty.
454  assert(&*OrigPH->begin() == OrigPH->getTerminator() &&
455  "preheader not empty");
456 
457  // Preserve the original loop ID for use after the transformation.
458  MDNode *OrigLoopID = L->getLoopID();
459 
460  // Create a loop for each partition except the last. Clone the original
461  // loop before PH along with adding a preheader for the cloned loop. Then
462  // update PH to point to the newly added preheader.
463  BasicBlock *TopPH = OrigPH;
464  unsigned Index = getSize() - 1;
465  for (auto I = std::next(PartitionContainer.rbegin()),
466  E = PartitionContainer.rend();
467  I != E; ++I, --Index, TopPH = NewLoop->getLoopPreheader()) {
468  auto *Part = &*I;
469 
470  NewLoop = Part->cloneLoopWithPreheader(TopPH, Pred, Index, LI, DT);
471 
472  Part->getVMap()[ExitBlock] = TopPH;
473  Part->remapInstructions();
474  setNewLoopID(OrigLoopID, Part);
475  }
476  Pred->getTerminator()->replaceUsesOfWith(OrigPH, TopPH);
477 
478  // Also set a new loop ID for the last loop.
479  setNewLoopID(OrigLoopID, &PartitionContainer.back());
480 
481  // Now go in forward order and update the immediate dominator for the
482  // preheaders with the exiting block of the previous loop. Dominance
483  // within the loop is updated in cloneLoopWithPreheader.
484  for (auto Curr = PartitionContainer.cbegin(),
485  Next = std::next(PartitionContainer.cbegin()),
486  E = PartitionContainer.cend();
487  Next != E; ++Curr, ++Next)
489  Next->getDistributedLoop()->getLoopPreheader(),
490  Curr->getDistributedLoop()->getExitingBlock());
491  }
492 
493  /// Removes the dead instructions from the cloned loops.
494  void removeUnusedInsts() {
495  for (auto &Partition : PartitionContainer)
496  Partition.removeUnusedInsts();
497  }
498 
499  /// For each memory pointer, it computes the partitionId the pointer is
500  /// used in.
501  ///
502  /// This returns an array of int where the I-th entry corresponds to I-th
503  /// entry in LAI.getRuntimePointerCheck(). If the pointer is used in multiple
504  /// partitions its entry is set to -1.
506  computePartitionSetForPointers(const LoopAccessInfo &LAI) {
507  const RuntimePointerChecking *RtPtrCheck = LAI.getRuntimePointerChecking();
508 
509  unsigned N = RtPtrCheck->Pointers.size();
510  SmallVector<int, 8> PtrToPartitions(N);
511  for (unsigned I = 0; I < N; ++I) {
512  Value *Ptr = RtPtrCheck->Pointers[I].PointerValue;
513  auto Instructions =
514  LAI.getInstructionsForAccess(Ptr, RtPtrCheck->Pointers[I].IsWritePtr);
515 
516  int &Partition = PtrToPartitions[I];
517  // First set it to uninitialized.
518  Partition = -2;
519  for (Instruction *Inst : Instructions) {
520  // Note that this could be -1 if Inst is duplicated across multiple
521  // partitions.
522  int ThisPartition = this->InstToPartitionId[Inst];
523  if (Partition == -2)
524  Partition = ThisPartition;
525  // -1 means belonging to multiple partitions.
526  else if (Partition == -1)
527  break;
528  else if (Partition != (int)ThisPartition)
529  Partition = -1;
530  }
531  assert(Partition != -2 && "Pointer not belonging to any partition");
532  }
533 
534  return PtrToPartitions;
535  }
536 
537  void print(raw_ostream &OS) const {
538  unsigned Index = 0;
539  for (const auto &P : PartitionContainer) {
540  OS << "Partition " << Index++ << " (" << &P << "):\n";
541  P.print();
542  }
543  }
544 
545  void dump() const { print(dbgs()); }
546 
547 #ifndef NDEBUG
548  friend raw_ostream &operator<<(raw_ostream &OS,
549  const InstPartitionContainer &Partitions) {
550  Partitions.print(OS);
551  return OS;
552  }
553 #endif
554 
555  void printBlocks() const {
556  unsigned Index = 0;
557  for (const auto &P : PartitionContainer) {
558  dbgs() << "\nPartition " << Index++ << " (" << &P << "):\n";
559  P.printBlocks();
560  }
561  }
562 
563 private:
564  using PartitionContainerT = std::list<InstPartition>;
565 
566  /// List of partitions.
567  PartitionContainerT PartitionContainer;
568 
569  /// Mapping from Instruction to partition Id. If the instruction
570  /// belongs to multiple partitions the entry contains -1.
571  InstToPartitionIdT InstToPartitionId;
572 
573  Loop *L;
574  LoopInfo *LI;
575  DominatorTree *DT;
576 
577  /// The control structure to merge adjacent partitions if both satisfy
578  /// the \p Predicate.
579  template <class UnaryPredicate>
580  void mergeAdjacentPartitionsIf(UnaryPredicate Predicate) {
581  InstPartition *PrevMatch = nullptr;
582  for (auto I = PartitionContainer.begin(); I != PartitionContainer.end();) {
583  auto DoesMatch = Predicate(&*I);
584  if (PrevMatch == nullptr && DoesMatch) {
585  PrevMatch = &*I;
586  ++I;
587  } else if (PrevMatch != nullptr && DoesMatch) {
588  I->moveTo(*PrevMatch);
589  I = PartitionContainer.erase(I);
590  } else {
591  PrevMatch = nullptr;
592  ++I;
593  }
594  }
595  }
596 
597  /// Assign new LoopIDs for the partition's cloned loop.
598  void setNewLoopID(MDNode *OrigLoopID, InstPartition *Part) {
600  OrigLoopID,
602  Part->hasDepCycle() ? LLVMLoopDistributeFollowupSequential
603  : LLVMLoopDistributeFollowupCoincident});
604  if (PartitionID.hasValue()) {
605  Loop *NewLoop = Part->getDistributedLoop();
606  NewLoop->setLoopID(PartitionID.getValue());
607  }
608  }
609 };
610 
611 /// For each memory instruction, this class maintains difference of the
612 /// number of unsafe dependences that start out from this instruction minus
613 /// those that end here.
614 ///
615 /// By traversing the memory instructions in program order and accumulating this
616 /// number, we know whether any unsafe dependence crosses over a program point.
617 class MemoryInstructionDependences {
619 
620 public:
621  struct Entry {
622  Instruction *Inst;
623  unsigned NumUnsafeDependencesStartOrEnd = 0;
624 
625  Entry(Instruction *Inst) : Inst(Inst) {}
626  };
627 
628  using AccessesType = SmallVector<Entry, 8>;
629 
630  AccessesType::const_iterator begin() const { return Accesses.begin(); }
631  AccessesType::const_iterator end() const { return Accesses.end(); }
632 
633  MemoryInstructionDependences(
634  const SmallVectorImpl<Instruction *> &Instructions,
635  const SmallVectorImpl<Dependence> &Dependences) {
636  Accesses.append(Instructions.begin(), Instructions.end());
637 
638  LLVM_DEBUG(dbgs() << "Backward dependences:\n");
639  for (auto &Dep : Dependences)
640  if (Dep.isPossiblyBackward()) {
641  // Note that the designations source and destination follow the program
642  // order, i.e. source is always first. (The direction is given by the
643  // DepType.)
644  ++Accesses[Dep.Source].NumUnsafeDependencesStartOrEnd;
645  --Accesses[Dep.Destination].NumUnsafeDependencesStartOrEnd;
646 
647  LLVM_DEBUG(Dep.print(dbgs(), 2, Instructions));
648  }
649  }
650 
651 private:
652  AccessesType Accesses;
653 };
654 
655 /// The actual class performing the per-loop work.
656 class LoopDistributeForLoop {
657 public:
658  LoopDistributeForLoop(Loop *L, Function *F, LoopInfo *LI, DominatorTree *DT,
660  : L(L), F(F), LI(LI), DT(DT), SE(SE), ORE(ORE) {
661  setForced();
662  }
663 
664  /// Try to distribute an inner-most loop.
665  bool processLoop(std::function<const LoopAccessInfo &(Loop &)> &GetLAA) {
666  assert(L->empty() && "Only process inner loops.");
667 
668  LLVM_DEBUG(dbgs() << "\nLDist: In \""
669  << L->getHeader()->getParent()->getName()
670  << "\" checking " << *L << "\n");
671 
672  if (!L->getExitBlock())
673  return fail("MultipleExitBlocks", "multiple exit blocks");
674  if (!L->isLoopSimplifyForm())
675  return fail("NotLoopSimplifyForm",
676  "loop is not in loop-simplify form");
677 
678  BasicBlock *PH = L->getLoopPreheader();
679 
680  // LAA will check that we only have a single exiting block.
681  LAI = &GetLAA(*L);
682 
683  // Currently, we only distribute to isolate the part of the loop with
684  // dependence cycles to enable partial vectorization.
685  if (LAI->canVectorizeMemory())
686  return fail("MemOpsCanBeVectorized",
687  "memory operations are safe for vectorization");
688 
689  auto *Dependences = LAI->getDepChecker().getDependences();
690  if (!Dependences || Dependences->empty())
691  return fail("NoUnsafeDeps", "no unsafe dependences to isolate");
692 
693  InstPartitionContainer Partitions(L, LI, DT);
694 
695  // First, go through each memory operation and assign them to consecutive
696  // partitions (the order of partitions follows program order). Put those
697  // with unsafe dependences into "cyclic" partition otherwise put each store
698  // in its own "non-cyclic" partition (we'll merge these later).
699  //
700  // Note that a memory operation (e.g. Load2 below) at a program point that
701  // has an unsafe dependence (Store3->Load1) spanning over it must be
702  // included in the same cyclic partition as the dependent operations. This
703  // is to preserve the original program order after distribution. E.g.:
704  //
705  // NumUnsafeDependencesStartOrEnd NumUnsafeDependencesActive
706  // Load1 -. 1 0->1
707  // Load2 | /Unsafe/ 0 1
708  // Store3 -' -1 1->0
709  // Load4 0 0
710  //
711  // NumUnsafeDependencesActive > 0 indicates this situation and in this case
712  // we just keep assigning to the same cyclic partition until
713  // NumUnsafeDependencesActive reaches 0.
714  const MemoryDepChecker &DepChecker = LAI->getDepChecker();
715  MemoryInstructionDependences MID(DepChecker.getMemoryInstructions(),
716  *Dependences);
717 
718  int NumUnsafeDependencesActive = 0;
719  for (auto &InstDep : MID) {
720  Instruction *I = InstDep.Inst;
721  // We update NumUnsafeDependencesActive post-instruction, catch the
722  // start of a dependence directly via NumUnsafeDependencesStartOrEnd.
723  if (NumUnsafeDependencesActive ||
724  InstDep.NumUnsafeDependencesStartOrEnd > 0)
725  Partitions.addToCyclicPartition(I);
726  else
727  Partitions.addToNewNonCyclicPartition(I);
728  NumUnsafeDependencesActive += InstDep.NumUnsafeDependencesStartOrEnd;
729  assert(NumUnsafeDependencesActive >= 0 &&
730  "Negative number of dependences active");
731  }
732 
733  // Add partitions for values used outside. These partitions can be out of
734  // order from the original program order. This is OK because if the
735  // partition uses a load we will merge this partition with the original
736  // partition of the load that we set up in the previous loop (see
737  // mergeToAvoidDuplicatedLoads).
738  auto DefsUsedOutside = findDefsUsedOutsideOfLoop(L);
739  for (auto *Inst : DefsUsedOutside)
740  Partitions.addToNewNonCyclicPartition(Inst);
741 
742  LLVM_DEBUG(dbgs() << "Seeded partitions:\n" << Partitions);
743  if (Partitions.getSize() < 2)
744  return fail("CantIsolateUnsafeDeps",
745  "cannot isolate unsafe dependencies");
746 
747  // Run the merge heuristics: Merge non-cyclic adjacent partitions since we
748  // should be able to vectorize these together.
749  Partitions.mergeBeforePopulating();
750  LLVM_DEBUG(dbgs() << "\nMerged partitions:\n" << Partitions);
751  if (Partitions.getSize() < 2)
752  return fail("CantIsolateUnsafeDeps",
753  "cannot isolate unsafe dependencies");
754 
755  // Now, populate the partitions with non-memory operations.
756  Partitions.populateUsedSet();
757  LLVM_DEBUG(dbgs() << "\nPopulated partitions:\n" << Partitions);
758 
759  // In order to preserve original lexical order for loads, keep them in the
760  // partition that we set up in the MemoryInstructionDependences loop.
761  if (Partitions.mergeToAvoidDuplicatedLoads()) {
762  LLVM_DEBUG(dbgs() << "\nPartitions merged to ensure unique loads:\n"
763  << Partitions);
764  if (Partitions.getSize() < 2)
765  return fail("CantIsolateUnsafeDeps",
766  "cannot isolate unsafe dependencies");
767  }
768 
769  // Don't distribute the loop if we need too many SCEV run-time checks, or
770  // any if it's illegal.
771  const SCEVUnionPredicate &Pred = LAI->getPSE().getUnionPredicate();
772  if (LAI->hasConvergentOp() && !Pred.isAlwaysTrue()) {
773  return fail("RuntimeCheckWithConvergent",
774  "may not insert runtime check with convergent operation");
775  }
776 
777  if (Pred.getComplexity() > (IsForced.getValueOr(false)
780  return fail("TooManySCEVRuntimeChecks",
781  "too many SCEV run-time checks needed.\n");
782 
783  if (!IsForced.getValueOr(false) && hasDisableAllTransformsHint(L))
784  return fail("HeuristicDisabled", "distribution heuristic disabled");
785 
786  LLVM_DEBUG(dbgs() << "\nDistributing loop: " << *L << "\n");
787  // We're done forming the partitions set up the reverse mapping from
788  // instructions to partitions.
789  Partitions.setupPartitionIdOnInstructions();
790 
791  // To keep things simple have an empty preheader before we version or clone
792  // the loop. (Also split if this has no predecessor, i.e. entry, because we
793  // rely on PH having a predecessor.)
794  if (!PH->getSinglePredecessor() || &*PH->begin() != PH->getTerminator())
795  SplitBlock(PH, PH->getTerminator(), DT, LI);
796 
797  // If we need run-time checks, version the loop now.
798  auto PtrToPartition = Partitions.computePartitionSetForPointers(*LAI);
799  const auto *RtPtrChecking = LAI->getRuntimePointerChecking();
800  const auto &AllChecks = RtPtrChecking->getChecks();
801  auto Checks = includeOnlyCrossPartitionChecks(AllChecks, PtrToPartition,
802  RtPtrChecking);
803 
804  if (LAI->hasConvergentOp() && !Checks.empty()) {
805  return fail("RuntimeCheckWithConvergent",
806  "may not insert runtime check with convergent operation");
807  }
808 
809  if (!Pred.isAlwaysTrue() || !Checks.empty()) {
810  assert(!LAI->hasConvergentOp() && "inserting illegal loop versioning");
811 
812  MDNode *OrigLoopID = L->getLoopID();
813 
814  LLVM_DEBUG(dbgs() << "\nPointers:\n");
816  LoopVersioning LVer(*LAI, L, LI, DT, SE, false);
817  LVer.setAliasChecks(std::move(Checks));
818  LVer.setSCEVChecks(LAI->getPSE().getUnionPredicate());
819  LVer.versionLoop(DefsUsedOutside);
820  LVer.annotateLoopWithNoAlias();
821 
822  // The unversioned loop will not be changed, so we inherit all attributes
823  // from the original loop, but remove the loop distribution metadata to
824  // avoid to distribute it again.
825  MDNode *UnversionedLoopID =
826  makeFollowupLoopID(OrigLoopID,
828  LLVMLoopDistributeFollowupFallback},
829  "llvm.loop.distribute.", true)
830  .getValue();
831  LVer.getNonVersionedLoop()->setLoopID(UnversionedLoopID);
832  }
833 
834  // Create identical copies of the original loop for each partition and hook
835  // them up sequentially.
836  Partitions.cloneLoops();
837 
838  // Now, we remove the instruction from each loop that don't belong to that
839  // partition.
840  Partitions.removeUnusedInsts();
841  LLVM_DEBUG(dbgs() << "\nAfter removing unused Instrs:\n");
842  LLVM_DEBUG(Partitions.printBlocks());
843 
844  if (LDistVerify) {
845  LI->verify(*DT);
847  }
848 
849  ++NumLoopsDistributed;
850  // Report the success.
851  ORE->emit([&]() {
852  return OptimizationRemark(LDIST_NAME, "Distribute", L->getStartLoc(),
853  L->getHeader())
854  << "distributed loop";
855  });
856  return true;
857  }
858 
859  /// Provide diagnostics then \return with false.
860  bool fail(StringRef RemarkName, StringRef Message) {
861  LLVMContext &Ctx = F->getContext();
862  bool Forced = isForced().getValueOr(false);
863 
864  LLVM_DEBUG(dbgs() << "Skipping; " << Message << "\n");
865 
866  // With Rpass-missed report that distribution failed.
867  ORE->emit([&]() {
868  return OptimizationRemarkMissed(LDIST_NAME, "NotDistributed",
869  L->getStartLoc(), L->getHeader())
870  << "loop not distributed: use -Rpass-analysis=loop-distribute for "
871  "more "
872  "info";
873  });
874 
875  // With Rpass-analysis report why. This is on by default if distribution
876  // was requested explicitly.
877  ORE->emit(OptimizationRemarkAnalysis(
879  RemarkName, L->getStartLoc(), L->getHeader())
880  << "loop not distributed: " << Message);
881 
882  // Also issue a warning if distribution was requested explicitly but it
883  // failed.
884  if (Forced)
886  *F, L->getStartLoc(), "loop not distributed: failed "
887  "explicitly specified loop distribution"));
888 
889  return false;
890  }
891 
892  /// Return if distribution forced to be enabled/disabled for the loop.
893  ///
894  /// If the optional has a value, it indicates whether distribution was forced
895  /// to be enabled (true) or disabled (false). If the optional has no value
896  /// distribution was not forced either way.
897  const Optional<bool> &isForced() const { return IsForced; }
898 
899 private:
900  /// Filter out checks between pointers from the same partition.
901  ///
902  /// \p PtrToPartition contains the partition number for pointers. Partition
903  /// number -1 means that the pointer is used in multiple partitions. In this
904  /// case we can't safely omit the check.
906  includeOnlyCrossPartitionChecks(
908  const SmallVectorImpl<int> &PtrToPartition,
909  const RuntimePointerChecking *RtPtrChecking) {
911 
912  copy_if(AllChecks, std::back_inserter(Checks),
914  for (unsigned PtrIdx1 : Check.first->Members)
915  for (unsigned PtrIdx2 : Check.second->Members)
916  // Only include this check if there is a pair of pointers
917  // that require checking and the pointers fall into
918  // separate partitions.
919  //
920  // (Note that we already know at this point that the two
921  // pointer groups need checking but it doesn't follow
922  // that each pair of pointers within the two groups need
923  // checking as well.
924  //
925  // In other words we don't want to include a check just
926  // because there is a pair of pointers between the two
927  // pointer groups that require checks and a different
928  // pair whose pointers fall into different partitions.)
929  if (RtPtrChecking->needsChecking(PtrIdx1, PtrIdx2) &&
931  PtrToPartition, PtrIdx1, PtrIdx2))
932  return true;
933  return false;
934  });
935 
936  return Checks;
937  }
938 
939  /// Check whether the loop metadata is forcing distribution to be
940  /// enabled/disabled.
941  void setForced() {
943  findStringMetadataForLoop(L, "llvm.loop.distribute.enable");
944  if (!Value)
945  return;
946 
947  const MDOperand *Op = *Value;
948  assert(Op && mdconst::hasa<ConstantInt>(*Op) && "invalid metadata");
949  IsForced = mdconst::extract<ConstantInt>(*Op)->getZExtValue();
950  }
951 
952  Loop *L;
953  Function *F;
954 
955  // Analyses used.
956  LoopInfo *LI;
957  const LoopAccessInfo *LAI = nullptr;
958  DominatorTree *DT;
959  ScalarEvolution *SE;
961 
962  /// Indicates whether distribution is forced to be enabled/disabled for
963  /// the loop.
964  ///
965  /// If the optional has a value, it indicates whether distribution was forced
966  /// to be enabled (true) or disabled (false). If the optional has no value
967  /// distribution was not forced either way.
968  Optional<bool> IsForced;
969 };
970 
971 } // end anonymous namespace
972 
973 /// Shared implementation between new and old PMs.
974 static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT,
976  std::function<const LoopAccessInfo &(Loop &)> &GetLAA) {
977  // Build up a worklist of inner-loops to vectorize. This is necessary as the
978  // act of distributing a loop creates new loops and can invalidate iterators
979  // across the loops.
980  SmallVector<Loop *, 8> Worklist;
981 
982  for (Loop *TopLevelLoop : *LI)
983  for (Loop *L : depth_first(TopLevelLoop))
984  // We only handle inner-most loops.
985  if (L->empty())
986  Worklist.push_back(L);
987 
988  // Now walk the identified inner loops.
989  bool Changed = false;
990  for (Loop *L : Worklist) {
991  LoopDistributeForLoop LDL(L, &F, LI, DT, SE, ORE);
992 
993  // If distribution was forced for the specific loop to be
994  // enabled/disabled, follow that. Otherwise use the global flag.
995  if (LDL.isForced().getValueOr(EnableLoopDistribute))
996  Changed |= LDL.processLoop(GetLAA);
997  }
998 
999  // Process each loop nest in the function.
1000  return Changed;
1001 }
1002 
1003 namespace {
1004 
1005 /// The pass class.
1006 class LoopDistributeLegacy : public FunctionPass {
1007 public:
1008  static char ID;
1009 
1010  LoopDistributeLegacy() : FunctionPass(ID) {
1011  // The default is set by the caller.
1013  }
1014 
1015  bool runOnFunction(Function &F) override {
1016  if (skipFunction(F))
1017  return false;
1018 
1019  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1020  auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>();
1021  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1022  auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1023  auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
1024  std::function<const LoopAccessInfo &(Loop &)> GetLAA =
1025  [&](Loop &L) -> const LoopAccessInfo & { return LAA->getInfo(&L); };
1026 
1027  return runImpl(F, LI, DT, SE, ORE, GetLAA);
1028  }
1029 
1030  void getAnalysisUsage(AnalysisUsage &AU) const override {
1039  }
1040 };
1041 
1042 } // end anonymous namespace
1043 
1046  auto &LI = AM.getResult<LoopAnalysis>(F);
1047  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1048  auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
1050 
1051  // We don't directly need these analyses but they're required for loop
1052  // analyses so provide them below.
1053  auto &AA = AM.getResult<AAManager>(F);
1054  auto &AC = AM.getResult<AssumptionAnalysis>(F);
1055  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1056  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1057 
1058  auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
1059  std::function<const LoopAccessInfo &(Loop &)> GetLAA =
1060  [&](Loop &L) -> const LoopAccessInfo & {
1061  LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, nullptr};
1062  return LAM.getResult<LoopAccessAnalysis>(L, AR);
1063  };
1064 
1065  bool Changed = runImpl(F, &LI, &DT, &SE, &ORE, GetLAA);
1066  if (!Changed)
1067  return PreservedAnalyses::all();
1068  PreservedAnalyses PA;
1069  PA.preserve<LoopAnalysis>();
1071  PA.preserve<GlobalsAA>();
1072  return PA;
1073 }
1074 
1076 
1077 static const char ldist_name[] = "Loop Distribution";
1078 
1079 INITIALIZE_PASS_BEGIN(LoopDistributeLegacy, LDIST_NAME, ldist_name, false,
1080  false)
1086 INITIALIZE_PASS_END(LoopDistributeLegacy, LDIST_NAME, ldist_name, false, false)
1087 
1088 FunctionPass *llvm::createLoopDistributePass() { return new LoopDistributeLegacy(); }
Legacy wrapper pass to provide the GlobalsAAResult object.
static bool Check(DecodeStatus &Out, DecodeStatus In)
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
void printChecks(raw_ostream &OS, const SmallVectorImpl< PointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:710
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Diagnostic information for missed-optimization remarks.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
#define LDIST_NAME
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:224
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:776
This class represents lattice values for constants.
Definition: AllocatorList.h:23
This is the interface for a simple mod/ref and alias analysis over globals.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1239
static cl::opt< unsigned > PragmaDistributeSCEVCheckThreshold("loop-distribute-scev-check-threshold-with-pragma", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed for Loop " "Distribution for loop marked with #pragma loop distribute(enable)"))
const RuntimePointerChecking * getRuntimePointerChecking() const
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
The main scalar evolution driver.
This file contains the declarations for metadata subclasses.
static cl::opt< bool > LDistVerify("loop-distribute-verify", cl::Hidden, cl::desc("Turn on DominatorTree and LoopInfo verification " "after Loop Distribution"), cl::init(false))
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:160
Analysis pass providing the TargetTransformInfo.
STATISTIC(NumFunctions, "Total number of functions")
Metadata node.
Definition: Metadata.h:863
SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Return the list of instructions that use Ptr to read or write memory.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:230
F(f)
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:137
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
Definition: LoopInfoImpl.h:683
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:268
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
static const char *const LLVMLoopDistributeFollowupSequential
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
Diagnostic information for optimization failures.
static bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)
Check if pointers are in the same partition.
Diagnostic information for optimization analysis remarks.
friend const_iterator begin(StringRef path, Style style)
Get begin iterator over path.
Definition: Path.cpp:224
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1152
SmallVector< Instruction *, 8 > findDefsUsedOutsideOfLoop(Loop *L)
Returns the instructions that use values defined in the loop.
Definition: LoopUtils.cpp:118
BlockT * getHeader() const
Definition: LoopInfo.h:102
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:273
const SmallVectorImpl< Instruction * > & getMemoryInstructions() const
The vector of memory access instructions.
static const char *const LLVMLoopDistributeFollowupCoincident
static const char *const LLVMLoopDistributeFollowupFallback
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:474
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:255
static cl::opt< bool > DistributeNonIfConvertible("loop-distribute-non-if-convertible", cl::Hidden, cl::desc("Whether to distribute into a loop that may not be " "if-convertible by the loop vectorizer"), cl::init(false))
This header provides classes for managing per-loop analyses.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
void replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:20
static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE, std::function< const LoopAccessInfo &(Loop &)> &GetLAA)
Shared implementation between new and old PMs.
static bool runOnFunction(Function &F, bool PostInlining)
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
friend const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
bool needsChecking(const CheckingPtrGroup &M, const CheckingPtrGroup &N) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:233
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node&#39;s...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This analysis provides dependence information for the memory accesses of a loop.
INITIALIZE_PASS_BEGIN(LoopDistributeLegacy, LDIST_NAME, ldist_name, false, false) FunctionPass *llvm
This file contains the declarations for the subclasses of Constant, which represent the different fla...
A manager for alias analyses.
Diagnostic information for applied optimization remarks.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
Represent the analysis usage information of a pass.
Analysis pass providing a never-invalidated alias analysis result.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:75
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1436
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:572
std::pair< const CheckingPtrGroup *, const CheckingPtrGroup * > PointerCheck
A memcheck which made up of a pair of grouped pointers.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
static cl::opt< unsigned > DistributeSCEVCheckThreshold("loop-distribute-scev-check-threshold", cl::init(8), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed for Loop " "Distribution"))
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:209
A function analysis which provides an AssumptionCache.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
bool isAlwaysTrue() const override
Implementation of the SCEVPredicate interface.
const SCEVUnionPredicate & getUnionPredicate() const
static uint64_t add(uint64_t LeftOp, uint64_t RightOp)
Definition: FileCheck.cpp:207
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
void initializeLoopDistributeLegacyPass(PassRegistry &)
Drive the analysis of memory accesses in the loop.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can&#39;t ove...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Definition: LoopUtils.cpp:250
static const char *const LLVMLoopDistributeFollowupAll
static const char ldist_name[]
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
Analysis pass that exposes the ScalarEvolution for a function.
unsigned getComplexity() const override
We estimate the complexity of a union predicate as the size number of predicates in the union...
bool hasValue() const
Definition: Optional.h:259
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:425
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:450
This analysis provides dependence information for the memory accesses of a loop.
Dependece between memory access instructions.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:506
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
Optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
Definition: LoopUtils.cpp:199
bool hasConvergentOp() const
Return true if there is a convergent operation in the loop.
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
iterator_range< value_op_iterator > operand_values()
Definition: User.h:261
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:174
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
raw_ostream & operator<<(raw_ostream &OS, const APInt &I)
Definition: APInt.h:2038
bool empty() const
Definition: LoopInfo.h:148
Analysis pass providing the TargetLibraryInfo.
iterator_range< df_iterator< T > > depth_first(const T &G)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This class represents a composition of other SCEV predicates, and is the class that most clients will...
static cl::opt< bool > EnableLoopDistribute("enable-loop-distribute", cl::Hidden, cl::desc("Enable the new, experimental LoopDistribution Pass"), cl::init(false))
LLVM Value Representation.
Definition: Value.h:72
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
OptimizationRemarkEmitter legacy analysis pass.
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Split the specified block at the specified instruction - everything before SplitPt stays in Old and e...
Loop * cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB, Loop *OrigLoop, ValueToValueMapTy &VMap, const Twine &NameSuffix, LoopInfo *LI, DominatorTree *DT, SmallVectorImpl< BasicBlock *> &Blocks)
Clones a loop OrigLoop.
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:1177
print Print MemDeps of function
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:259
This pass exposes codegen information to IR-level passes.
This header defines various interfaces for pass management in LLVM.
bool hasDisableAllTransformsHint(const Loop *L)
Look for the loop attribute that disables all transformation heuristic.
Definition: LoopUtils.cpp:331
const SmallVector< PointerCheck, 4 > & getChecks() const
Returns the checks that generateChecks created.
Dependence - This class represents a dependence between two memory memory references in a function...
#define LLVM_DEBUG(X)
Definition: Debug.h:122
for(unsigned i=Desc.getNumOperands(), e=OldMI.getNumOperands();i !=e;++i)
The optimization diagnostic interface.
bool canVectorizeMemory() const
Return true we can analyze the memory accesses in the loop and there are no memory dependence cycles...
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg)
FunctionPass * createLoopDistributePass()
const BasicBlock * getParent() const
Definition: Instruction.h:66
void remapInstructionsInBlocks(const SmallVectorImpl< BasicBlock *> &Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:1044