LLVM  15.0.0git
LoopDistribute.cpp
Go to the documentation of this file.
1 //===- LoopDistribute.cpp - Loop Distribution Pass ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the Loop Distribution Pass. Its main focus is to
10 // distribute loops that cannot be vectorized due to dependence cycles. It
11 // tries to isolate the offending dependences into a new loop allowing
12 // vectorization of the remaining parts.
13 //
14 // For dependence analysis, the pass uses the LoopVectorizer's
15 // LoopAccessAnalysis. Because this analysis presumes no change in the order of
16 // memory operations, special care is taken to preserve the lexical order of
17 // these operations.
18 //
19 // Similarly to the Vectorizer, the pass also supports loop versioning to
20 // run-time disambiguate potentially overlapping arrays.
21 //
22 //===----------------------------------------------------------------------===//
23 
25 #include "llvm/ADT/DenseMap.h"
28 #include "llvm/ADT/Optional.h"
29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/ADT/SmallPtrSet.h"
31 #include "llvm/ADT/SmallVector.h"
32 #include "llvm/ADT/Statistic.h"
33 #include "llvm/ADT/StringRef.h"
34 #include "llvm/ADT/Twine.h"
40 #include "llvm/Analysis/LoopInfo.h"
45 #include "llvm/IR/BasicBlock.h"
46 #include "llvm/IR/Constants.h"
47 #include "llvm/IR/DiagnosticInfo.h"
48 #include "llvm/IR/Dominators.h"
49 #include "llvm/IR/Function.h"
50 #include "llvm/IR/Instruction.h"
51 #include "llvm/IR/Instructions.h"
52 #include "llvm/IR/LLVMContext.h"
53 #include "llvm/IR/Metadata.h"
54 #include "llvm/IR/PassManager.h"
55 #include "llvm/IR/Value.h"
56 #include "llvm/InitializePasses.h"
57 #include "llvm/Pass.h"
58 #include "llvm/Support/Casting.h"
60 #include "llvm/Support/Debug.h"
62 #include "llvm/Transforms/Scalar.h"
68 #include <cassert>
69 #include <functional>
70 #include <list>
71 #include <tuple>
72 #include <utility>
73 
74 using namespace llvm;
75 
76 #define LDIST_NAME "loop-distribute"
77 #define DEBUG_TYPE LDIST_NAME
78 
79 /// @{
80 /// Metadata attribute names
81 static const char *const LLVMLoopDistributeFollowupAll =
82  "llvm.loop.distribute.followup_all";
83 static const char *const LLVMLoopDistributeFollowupCoincident =
84  "llvm.loop.distribute.followup_coincident";
85 static const char *const LLVMLoopDistributeFollowupSequential =
86  "llvm.loop.distribute.followup_sequential";
87 static const char *const LLVMLoopDistributeFollowupFallback =
88  "llvm.loop.distribute.followup_fallback";
89 /// @}
90 
91 static cl::opt<bool>
92  LDistVerify("loop-distribute-verify", cl::Hidden,
93  cl::desc("Turn on DominatorTree and LoopInfo verification "
94  "after Loop Distribution"),
95  cl::init(false));
96 
98  "loop-distribute-non-if-convertible", cl::Hidden,
99  cl::desc("Whether to distribute into a loop that may not be "
100  "if-convertible by the loop vectorizer"),
101  cl::init(false));
102 
104  "loop-distribute-scev-check-threshold", cl::init(8), cl::Hidden,
105  cl::desc("The maximum number of SCEV checks allowed for Loop "
106  "Distribution"));
107 
109  "loop-distribute-scev-check-threshold-with-pragma", cl::init(128),
110  cl::Hidden,
111  cl::desc(
112  "The maximum number of SCEV checks allowed for Loop "
113  "Distribution for loop marked with #pragma loop distribute(enable)"));
114 
116  "enable-loop-distribute", cl::Hidden,
117  cl::desc("Enable the new, experimental LoopDistribution Pass"),
118  cl::init(false));
119 
120 STATISTIC(NumLoopsDistributed, "Number of loops distributed");
121 
122 namespace {
123 
124 /// Maintains the set of instructions of the loop for a partition before
125 /// cloning. After cloning, it hosts the new loop.
126 class InstPartition {
127  using InstructionSet = SmallPtrSet<Instruction *, 8>;
128 
129 public:
130  InstPartition(Instruction *I, Loop *L, bool DepCycle = false)
131  : DepCycle(DepCycle), OrigLoop(L) {
132  Set.insert(I);
133  }
134 
135  /// Returns whether this partition contains a dependence cycle.
136  bool hasDepCycle() const { return DepCycle; }
137 
138  /// Adds an instruction to this partition.
139  void add(Instruction *I) { Set.insert(I); }
140 
141  /// Collection accessors.
142  InstructionSet::iterator begin() { return Set.begin(); }
143  InstructionSet::iterator end() { return Set.end(); }
144  InstructionSet::const_iterator begin() const { return Set.begin(); }
145  InstructionSet::const_iterator end() const { return Set.end(); }
146  bool empty() const { return Set.empty(); }
147 
148  /// Moves this partition into \p Other. This partition becomes empty
149  /// after this.
150  void moveTo(InstPartition &Other) {
151  Other.Set.insert(Set.begin(), Set.end());
152  Set.clear();
153  Other.DepCycle |= DepCycle;
154  }
155 
156  /// Populates the partition with a transitive closure of all the
157  /// instructions that the seeded instructions dependent on.
158  void populateUsedSet() {
159  // FIXME: We currently don't use control-dependence but simply include all
160  // blocks (possibly empty at the end) and let simplifycfg mostly clean this
161  // up.
162  for (auto *B : OrigLoop->getBlocks())
163  Set.insert(B->getTerminator());
164 
165  // Follow the use-def chains to form a transitive closure of all the
166  // instructions that the originally seeded instructions depend on.
167  SmallVector<Instruction *, 8> Worklist(Set.begin(), Set.end());
168  while (!Worklist.empty()) {
169  Instruction *I = Worklist.pop_back_val();
170  // Insert instructions from the loop that we depend on.
171  for (Value *V : I->operand_values()) {
172  auto *I = dyn_cast<Instruction>(V);
173  if (I && OrigLoop->contains(I->getParent()) && Set.insert(I).second)
174  Worklist.push_back(I);
175  }
176  }
177  }
178 
179  /// Clones the original loop.
180  ///
181  /// Updates LoopInfo and DominatorTree using the information that block \p
182  /// LoopDomBB dominates the loop.
183  Loop *cloneLoopWithPreheader(BasicBlock *InsertBefore, BasicBlock *LoopDomBB,
184  unsigned Index, LoopInfo *LI,
185  DominatorTree *DT) {
186  ClonedLoop = ::cloneLoopWithPreheader(InsertBefore, LoopDomBB, OrigLoop,
187  VMap, Twine(".ldist") + Twine(Index),
188  LI, DT, ClonedLoopBlocks);
189  return ClonedLoop;
190  }
191 
192  /// The cloned loop. If this partition is mapped to the original loop,
193  /// this is null.
194  const Loop *getClonedLoop() const { return ClonedLoop; }
195 
196  /// Returns the loop where this partition ends up after distribution.
197  /// If this partition is mapped to the original loop then use the block from
198  /// the loop.
199  Loop *getDistributedLoop() const {
200  return ClonedLoop ? ClonedLoop : OrigLoop;
201  }
202 
203  /// The VMap that is populated by cloning and then used in
204  /// remapinstruction to remap the cloned instructions.
205  ValueToValueMapTy &getVMap() { return VMap; }
206 
207  /// Remaps the cloned instructions using VMap.
208  void remapInstructions() {
209  remapInstructionsInBlocks(ClonedLoopBlocks, VMap);
210  }
211 
212  /// Based on the set of instructions selected for this partition,
213  /// removes the unnecessary ones.
214  void removeUnusedInsts() {
216 
217  for (auto *Block : OrigLoop->getBlocks())
218  for (auto &Inst : *Block)
219  if (!Set.count(&Inst)) {
220  Instruction *NewInst = &Inst;
221  if (!VMap.empty())
222  NewInst = cast<Instruction>(VMap[NewInst]);
223 
224  assert(!isa<BranchInst>(NewInst) &&
225  "Branches are marked used early on");
226  Unused.push_back(NewInst);
227  }
228 
229  // Delete the instructions backwards, as it has a reduced likelihood of
230  // having to update as many def-use and use-def chains.
231  for (auto *Inst : reverse(Unused)) {
232  if (!Inst->use_empty())
233  Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
234  Inst->eraseFromParent();
235  }
236  }
237 
238  void print() const {
239  if (DepCycle)
240  dbgs() << " (cycle)\n";
241  for (auto *I : Set)
242  // Prefix with the block name.
243  dbgs() << " " << I->getParent()->getName() << ":" << *I << "\n";
244  }
245 
246  void printBlocks() const {
247  for (auto *BB : getDistributedLoop()->getBlocks())
248  dbgs() << *BB;
249  }
250 
251 private:
252  /// Instructions from OrigLoop selected for this partition.
253  InstructionSet Set;
254 
255  /// Whether this partition contains a dependence cycle.
256  bool DepCycle;
257 
258  /// The original loop.
259  Loop *OrigLoop;
260 
261  /// The cloned loop. If this partition is mapped to the original loop,
262  /// this is null.
263  Loop *ClonedLoop = nullptr;
264 
265  /// The blocks of ClonedLoop including the preheader. If this
266  /// partition is mapped to the original loop, this is empty.
267  SmallVector<BasicBlock *, 8> ClonedLoopBlocks;
268 
269  /// These gets populated once the set of instructions have been
270  /// finalized. If this partition is mapped to the original loop, these are not
271  /// set.
272  ValueToValueMapTy VMap;
273 };
274 
275 /// Holds the set of Partitions. It populates them, merges them and then
276 /// clones the loops.
277 class InstPartitionContainer {
278  using InstToPartitionIdT = DenseMap<Instruction *, int>;
279 
280 public:
281  InstPartitionContainer(Loop *L, LoopInfo *LI, DominatorTree *DT)
282  : L(L), LI(LI), DT(DT) {}
283 
284  /// Returns the number of partitions.
285  unsigned getSize() const { return PartitionContainer.size(); }
286 
287  /// Adds \p Inst into the current partition if that is marked to
288  /// contain cycles. Otherwise start a new partition for it.
289  void addToCyclicPartition(Instruction *Inst) {
290  // If the current partition is non-cyclic. Start a new one.
291  if (PartitionContainer.empty() || !PartitionContainer.back().hasDepCycle())
292  PartitionContainer.emplace_back(Inst, L, /*DepCycle=*/true);
293  else
294  PartitionContainer.back().add(Inst);
295  }
296 
297  /// Adds \p Inst into a partition that is not marked to contain
298  /// dependence cycles.
299  ///
300  // Initially we isolate memory instructions into as many partitions as
301  // possible, then later we may merge them back together.
302  void addToNewNonCyclicPartition(Instruction *Inst) {
303  PartitionContainer.emplace_back(Inst, L);
304  }
305 
306  /// Merges adjacent non-cyclic partitions.
307  ///
308  /// The idea is that we currently only want to isolate the non-vectorizable
309  /// partition. We could later allow more distribution among these partition
310  /// too.
311  void mergeAdjacentNonCyclic() {
312  mergeAdjacentPartitionsIf(
313  [](const InstPartition *P) { return !P->hasDepCycle(); });
314  }
315 
316  /// If a partition contains only conditional stores, we won't vectorize
317  /// it. Try to merge it with a previous cyclic partition.
318  void mergeNonIfConvertible() {
319  mergeAdjacentPartitionsIf([&](const InstPartition *Partition) {
320  if (Partition->hasDepCycle())
321  return true;
322 
323  // Now, check if all stores are conditional in this partition.
324  bool seenStore = false;
325 
326  for (auto *Inst : *Partition)
327  if (isa<StoreInst>(Inst)) {
328  seenStore = true;
330  return false;
331  }
332  return seenStore;
333  });
334  }
335 
336  /// Merges the partitions according to various heuristics.
337  void mergeBeforePopulating() {
338  mergeAdjacentNonCyclic();
340  mergeNonIfConvertible();
341  }
342 
343  /// Merges partitions in order to ensure that no loads are duplicated.
344  ///
345  /// We can't duplicate loads because that could potentially reorder them.
346  /// LoopAccessAnalysis provides dependency information with the context that
347  /// the order of memory operation is preserved.
348  ///
349  /// Return if any partitions were merged.
350  bool mergeToAvoidDuplicatedLoads() {
351  using LoadToPartitionT = DenseMap<Instruction *, InstPartition *>;
352  using ToBeMergedT = EquivalenceClasses<InstPartition *>;
353 
354  LoadToPartitionT LoadToPartition;
355  ToBeMergedT ToBeMerged;
356 
357  // Step through the partitions and create equivalence between partitions
358  // that contain the same load. Also put partitions in between them in the
359  // same equivalence class to avoid reordering of memory operations.
360  for (PartitionContainerT::iterator I = PartitionContainer.begin(),
361  E = PartitionContainer.end();
362  I != E; ++I) {
363  auto *PartI = &*I;
364 
365  // If a load occurs in two partitions PartI and PartJ, merge all
366  // partitions (PartI, PartJ] into PartI.
367  for (Instruction *Inst : *PartI)
368  if (isa<LoadInst>(Inst)) {
369  bool NewElt;
370  LoadToPartitionT::iterator LoadToPart;
371 
372  std::tie(LoadToPart, NewElt) =
373  LoadToPartition.insert(std::make_pair(Inst, PartI));
374  if (!NewElt) {
375  LLVM_DEBUG(dbgs()
376  << "Merging partitions due to this load in multiple "
377  << "partitions: " << PartI << ", " << LoadToPart->second
378  << "\n"
379  << *Inst << "\n");
380 
381  auto PartJ = I;
382  do {
383  --PartJ;
384  ToBeMerged.unionSets(PartI, &*PartJ);
385  } while (&*PartJ != LoadToPart->second);
386  }
387  }
388  }
389  if (ToBeMerged.empty())
390  return false;
391 
392  // Merge the member of an equivalence class into its class leader. This
393  // makes the members empty.
394  for (ToBeMergedT::iterator I = ToBeMerged.begin(), E = ToBeMerged.end();
395  I != E; ++I) {
396  if (!I->isLeader())
397  continue;
398 
399  auto PartI = I->getData();
400  for (auto PartJ : make_range(std::next(ToBeMerged.member_begin(I)),
401  ToBeMerged.member_end())) {
402  PartJ->moveTo(*PartI);
403  }
404  }
405 
406  // Remove the empty partitions.
407  PartitionContainer.remove_if(
408  [](const InstPartition &P) { return P.empty(); });
409 
410  return true;
411  }
412 
413  /// Sets up the mapping between instructions to partitions. If the
414  /// instruction is duplicated across multiple partitions, set the entry to -1.
415  void setupPartitionIdOnInstructions() {
416  int PartitionID = 0;
417  for (const auto &Partition : PartitionContainer) {
418  for (Instruction *Inst : Partition) {
419  bool NewElt;
420  InstToPartitionIdT::iterator Iter;
421 
422  std::tie(Iter, NewElt) =
423  InstToPartitionId.insert(std::make_pair(Inst, PartitionID));
424  if (!NewElt)
425  Iter->second = -1;
426  }
427  ++PartitionID;
428  }
429  }
430 
431  /// Populates the partition with everything that the seeding
432  /// instructions require.
433  void populateUsedSet() {
434  for (auto &P : PartitionContainer)
435  P.populateUsedSet();
436  }
437 
438  /// This performs the main chunk of the work of cloning the loops for
439  /// the partitions.
440  void cloneLoops() {
441  BasicBlock *OrigPH = L->getLoopPreheader();
442  // At this point the predecessor of the preheader is either the memcheck
443  // block or the top part of the original preheader.
444  BasicBlock *Pred = OrigPH->getSinglePredecessor();
445  assert(Pred && "Preheader does not have a single predecessor");
446  BasicBlock *ExitBlock = L->getExitBlock();
447  assert(ExitBlock && "No single exit block");
448  Loop *NewLoop;
449 
450  assert(!PartitionContainer.empty() && "at least two partitions expected");
451  // We're cloning the preheader along with the loop so we already made sure
452  // it was empty.
453  assert(&*OrigPH->begin() == OrigPH->getTerminator() &&
454  "preheader not empty");
455 
456  // Preserve the original loop ID for use after the transformation.
457  MDNode *OrigLoopID = L->getLoopID();
458 
459  // Create a loop for each partition except the last. Clone the original
460  // loop before PH along with adding a preheader for the cloned loop. Then
461  // update PH to point to the newly added preheader.
462  BasicBlock *TopPH = OrigPH;
463  unsigned Index = getSize() - 1;
464  for (auto I = std::next(PartitionContainer.rbegin()),
465  E = PartitionContainer.rend();
466  I != E; ++I, --Index, TopPH = NewLoop->getLoopPreheader()) {
467  auto *Part = &*I;
468 
469  NewLoop = Part->cloneLoopWithPreheader(TopPH, Pred, Index, LI, DT);
470 
471  Part->getVMap()[ExitBlock] = TopPH;
472  Part->remapInstructions();
473  setNewLoopID(OrigLoopID, Part);
474  }
475  Pred->getTerminator()->replaceUsesOfWith(OrigPH, TopPH);
476 
477  // Also set a new loop ID for the last loop.
478  setNewLoopID(OrigLoopID, &PartitionContainer.back());
479 
480  // Now go in forward order and update the immediate dominator for the
481  // preheaders with the exiting block of the previous loop. Dominance
482  // within the loop is updated in cloneLoopWithPreheader.
483  for (auto Curr = PartitionContainer.cbegin(),
484  Next = std::next(PartitionContainer.cbegin()),
485  E = PartitionContainer.cend();
486  Next != E; ++Curr, ++Next)
488  Next->getDistributedLoop()->getLoopPreheader(),
489  Curr->getDistributedLoop()->getExitingBlock());
490  }
491 
492  /// Removes the dead instructions from the cloned loops.
493  void removeUnusedInsts() {
494  for (auto &Partition : PartitionContainer)
495  Partition.removeUnusedInsts();
496  }
497 
498  /// For each memory pointer, it computes the partitionId the pointer is
499  /// used in.
500  ///
501  /// This returns an array of int where the I-th entry corresponds to I-th
502  /// entry in LAI.getRuntimePointerCheck(). If the pointer is used in multiple
503  /// partitions its entry is set to -1.
505  computePartitionSetForPointers(const LoopAccessInfo &LAI) {
506  const RuntimePointerChecking *RtPtrCheck = LAI.getRuntimePointerChecking();
507 
508  unsigned N = RtPtrCheck->Pointers.size();
509  SmallVector<int, 8> PtrToPartitions(N);
510  for (unsigned I = 0; I < N; ++I) {
511  Value *Ptr = RtPtrCheck->Pointers[I].PointerValue;
512  auto Instructions =
513  LAI.getInstructionsForAccess(Ptr, RtPtrCheck->Pointers[I].IsWritePtr);
514 
515  int &Partition = PtrToPartitions[I];
516  // First set it to uninitialized.
517  Partition = -2;
518  for (Instruction *Inst : Instructions) {
519  // Note that this could be -1 if Inst is duplicated across multiple
520  // partitions.
521  int ThisPartition = this->InstToPartitionId[Inst];
522  if (Partition == -2)
523  Partition = ThisPartition;
524  // -1 means belonging to multiple partitions.
525  else if (Partition == -1)
526  break;
527  else if (Partition != (int)ThisPartition)
528  Partition = -1;
529  }
530  assert(Partition != -2 && "Pointer not belonging to any partition");
531  }
532 
533  return PtrToPartitions;
534  }
535 
536  void print(raw_ostream &OS) const {
537  unsigned Index = 0;
538  for (const auto &P : PartitionContainer) {
539  OS << "Partition " << Index++ << " (" << &P << "):\n";
540  P.print();
541  }
542  }
543 
544  void dump() const { print(dbgs()); }
545 
546 #ifndef NDEBUG
547  friend raw_ostream &operator<<(raw_ostream &OS,
548  const InstPartitionContainer &Partitions) {
549  Partitions.print(OS);
550  return OS;
551  }
552 #endif
553 
554  void printBlocks() const {
555  unsigned Index = 0;
556  for (const auto &P : PartitionContainer) {
557  dbgs() << "\nPartition " << Index++ << " (" << &P << "):\n";
558  P.printBlocks();
559  }
560  }
561 
562 private:
563  using PartitionContainerT = std::list<InstPartition>;
564 
565  /// List of partitions.
566  PartitionContainerT PartitionContainer;
567 
568  /// Mapping from Instruction to partition Id. If the instruction
569  /// belongs to multiple partitions the entry contains -1.
570  InstToPartitionIdT InstToPartitionId;
571 
572  Loop *L;
573  LoopInfo *LI;
574  DominatorTree *DT;
575 
576  /// The control structure to merge adjacent partitions if both satisfy
577  /// the \p Predicate.
578  template <class UnaryPredicate>
579  void mergeAdjacentPartitionsIf(UnaryPredicate Predicate) {
580  InstPartition *PrevMatch = nullptr;
581  for (auto I = PartitionContainer.begin(); I != PartitionContainer.end();) {
582  auto DoesMatch = Predicate(&*I);
583  if (PrevMatch == nullptr && DoesMatch) {
584  PrevMatch = &*I;
585  ++I;
586  } else if (PrevMatch != nullptr && DoesMatch) {
587  I->moveTo(*PrevMatch);
588  I = PartitionContainer.erase(I);
589  } else {
590  PrevMatch = nullptr;
591  ++I;
592  }
593  }
594  }
595 
596  /// Assign new LoopIDs for the partition's cloned loop.
597  void setNewLoopID(MDNode *OrigLoopID, InstPartition *Part) {
599  OrigLoopID,
601  Part->hasDepCycle() ? LLVMLoopDistributeFollowupSequential
603  if (PartitionID.hasValue()) {
604  Loop *NewLoop = Part->getDistributedLoop();
605  NewLoop->setLoopID(PartitionID.getValue());
606  }
607  }
608 };
609 
610 /// For each memory instruction, this class maintains difference of the
611 /// number of unsafe dependences that start out from this instruction minus
612 /// those that end here.
613 ///
614 /// By traversing the memory instructions in program order and accumulating this
615 /// number, we know whether any unsafe dependence crosses over a program point.
616 class MemoryInstructionDependences {
618 
619 public:
620  struct Entry {
621  Instruction *Inst;
622  unsigned NumUnsafeDependencesStartOrEnd = 0;
623 
624  Entry(Instruction *Inst) : Inst(Inst) {}
625  };
626 
627  using AccessesType = SmallVector<Entry, 8>;
628 
629  AccessesType::const_iterator begin() const { return Accesses.begin(); }
630  AccessesType::const_iterator end() const { return Accesses.end(); }
631 
632  MemoryInstructionDependences(
634  const SmallVectorImpl<Dependence> &Dependences) {
635  Accesses.append(Instructions.begin(), Instructions.end());
636 
637  LLVM_DEBUG(dbgs() << "Backward dependences:\n");
638  for (auto &Dep : Dependences)
639  if (Dep.isPossiblyBackward()) {
640  // Note that the designations source and destination follow the program
641  // order, i.e. source is always first. (The direction is given by the
642  // DepType.)
643  ++Accesses[Dep.Source].NumUnsafeDependencesStartOrEnd;
644  --Accesses[Dep.Destination].NumUnsafeDependencesStartOrEnd;
645 
646  LLVM_DEBUG(Dep.print(dbgs(), 2, Instructions));
647  }
648  }
649 
650 private:
651  AccessesType Accesses;
652 };
653 
654 /// The actual class performing the per-loop work.
655 class LoopDistributeForLoop {
656 public:
657  LoopDistributeForLoop(Loop *L, Function *F, LoopInfo *LI, DominatorTree *DT,
659  : L(L), F(F), LI(LI), DT(DT), SE(SE), ORE(ORE) {
660  setForced();
661  }
662 
663  /// Try to distribute an inner-most loop.
664  bool processLoop(std::function<const LoopAccessInfo &(Loop &)> &GetLAA) {
665  assert(L->isInnermost() && "Only process inner loops.");
666 
667  LLVM_DEBUG(dbgs() << "\nLDist: In \""
668  << L->getHeader()->getParent()->getName()
669  << "\" checking " << *L << "\n");
670 
671  // Having a single exit block implies there's also one exiting block.
672  if (!L->getExitBlock())
673  return fail("MultipleExitBlocks", "multiple exit blocks");
674  if (!L->isLoopSimplifyForm())
675  return fail("NotLoopSimplifyForm",
676  "loop is not in loop-simplify form");
677  if (!L->isRotatedForm())
678  return fail("NotBottomTested", "loop is not bottom tested");
679 
680  BasicBlock *PH = L->getLoopPreheader();
681 
682  LAI = &GetLAA(*L);
683 
684  // Currently, we only distribute to isolate the part of the loop with
685  // dependence cycles to enable partial vectorization.
686  if (LAI->canVectorizeMemory())
687  return fail("MemOpsCanBeVectorized",
688  "memory operations are safe for vectorization");
689 
690  auto *Dependences = LAI->getDepChecker().getDependences();
691  if (!Dependences || Dependences->empty())
692  return fail("NoUnsafeDeps", "no unsafe dependences to isolate");
693 
694  InstPartitionContainer Partitions(L, LI, DT);
695 
696  // First, go through each memory operation and assign them to consecutive
697  // partitions (the order of partitions follows program order). Put those
698  // with unsafe dependences into "cyclic" partition otherwise put each store
699  // in its own "non-cyclic" partition (we'll merge these later).
700  //
701  // Note that a memory operation (e.g. Load2 below) at a program point that
702  // has an unsafe dependence (Store3->Load1) spanning over it must be
703  // included in the same cyclic partition as the dependent operations. This
704  // is to preserve the original program order after distribution. E.g.:
705  //
706  // NumUnsafeDependencesStartOrEnd NumUnsafeDependencesActive
707  // Load1 -. 1 0->1
708  // Load2 | /Unsafe/ 0 1
709  // Store3 -' -1 1->0
710  // Load4 0 0
711  //
712  // NumUnsafeDependencesActive > 0 indicates this situation and in this case
713  // we just keep assigning to the same cyclic partition until
714  // NumUnsafeDependencesActive reaches 0.
715  const MemoryDepChecker &DepChecker = LAI->getDepChecker();
716  MemoryInstructionDependences MID(DepChecker.getMemoryInstructions(),
717  *Dependences);
718 
719  int NumUnsafeDependencesActive = 0;
720  for (auto &InstDep : MID) {
721  Instruction *I = InstDep.Inst;
722  // We update NumUnsafeDependencesActive post-instruction, catch the
723  // start of a dependence directly via NumUnsafeDependencesStartOrEnd.
724  if (NumUnsafeDependencesActive ||
725  InstDep.NumUnsafeDependencesStartOrEnd > 0)
726  Partitions.addToCyclicPartition(I);
727  else
728  Partitions.addToNewNonCyclicPartition(I);
729  NumUnsafeDependencesActive += InstDep.NumUnsafeDependencesStartOrEnd;
730  assert(NumUnsafeDependencesActive >= 0 &&
731  "Negative number of dependences active");
732  }
733 
734  // Add partitions for values used outside. These partitions can be out of
735  // order from the original program order. This is OK because if the
736  // partition uses a load we will merge this partition with the original
737  // partition of the load that we set up in the previous loop (see
738  // mergeToAvoidDuplicatedLoads).
739  auto DefsUsedOutside = findDefsUsedOutsideOfLoop(L);
740  for (auto *Inst : DefsUsedOutside)
741  Partitions.addToNewNonCyclicPartition(Inst);
742 
743  LLVM_DEBUG(dbgs() << "Seeded partitions:\n" << Partitions);
744  if (Partitions.getSize() < 2)
745  return fail("CantIsolateUnsafeDeps",
746  "cannot isolate unsafe dependencies");
747 
748  // Run the merge heuristics: Merge non-cyclic adjacent partitions since we
749  // should be able to vectorize these together.
750  Partitions.mergeBeforePopulating();
751  LLVM_DEBUG(dbgs() << "\nMerged partitions:\n" << Partitions);
752  if (Partitions.getSize() < 2)
753  return fail("CantIsolateUnsafeDeps",
754  "cannot isolate unsafe dependencies");
755 
756  // Now, populate the partitions with non-memory operations.
757  Partitions.populateUsedSet();
758  LLVM_DEBUG(dbgs() << "\nPopulated partitions:\n" << Partitions);
759 
760  // In order to preserve original lexical order for loads, keep them in the
761  // partition that we set up in the MemoryInstructionDependences loop.
762  if (Partitions.mergeToAvoidDuplicatedLoads()) {
763  LLVM_DEBUG(dbgs() << "\nPartitions merged to ensure unique loads:\n"
764  << Partitions);
765  if (Partitions.getSize() < 2)
766  return fail("CantIsolateUnsafeDeps",
767  "cannot isolate unsafe dependencies");
768  }
769 
770  // Don't distribute the loop if we need too many SCEV run-time checks, or
771  // any if it's illegal.
772  const SCEVPredicate &Pred = LAI->getPSE().getPredicate();
773  if (LAI->hasConvergentOp() && !Pred.isAlwaysTrue()) {
774  return fail("RuntimeCheckWithConvergent",
775  "may not insert runtime check with convergent operation");
776  }
777 
778  if (Pred.getComplexity() > (IsForced.getValueOr(false)
781  return fail("TooManySCEVRuntimeChecks",
782  "too many SCEV run-time checks needed.\n");
783 
784  if (!IsForced.getValueOr(false) && hasDisableAllTransformsHint(L))
785  return fail("HeuristicDisabled", "distribution heuristic disabled");
786 
787  LLVM_DEBUG(dbgs() << "\nDistributing loop: " << *L << "\n");
788  // We're done forming the partitions set up the reverse mapping from
789  // instructions to partitions.
790  Partitions.setupPartitionIdOnInstructions();
791 
792  // If we need run-time checks, version the loop now.
793  auto PtrToPartition = Partitions.computePartitionSetForPointers(*LAI);
794  const auto *RtPtrChecking = LAI->getRuntimePointerChecking();
795  const auto &AllChecks = RtPtrChecking->getChecks();
796  auto Checks = includeOnlyCrossPartitionChecks(AllChecks, PtrToPartition,
797  RtPtrChecking);
798 
799  if (LAI->hasConvergentOp() && !Checks.empty()) {
800  return fail("RuntimeCheckWithConvergent",
801  "may not insert runtime check with convergent operation");
802  }
803 
804  // To keep things simple have an empty preheader before we version or clone
805  // the loop. (Also split if this has no predecessor, i.e. entry, because we
806  // rely on PH having a predecessor.)
807  if (!PH->getSinglePredecessor() || &*PH->begin() != PH->getTerminator())
808  SplitBlock(PH, PH->getTerminator(), DT, LI);
809 
810  if (!Pred.isAlwaysTrue() || !Checks.empty()) {
811  assert(!LAI->hasConvergentOp() && "inserting illegal loop versioning");
812 
813  MDNode *OrigLoopID = L->getLoopID();
814 
815  LLVM_DEBUG(dbgs() << "\nPointers:\n");
817  LoopVersioning LVer(*LAI, Checks, L, LI, DT, SE);
818  LVer.versionLoop(DefsUsedOutside);
819  LVer.annotateLoopWithNoAlias();
820 
821  // The unversioned loop will not be changed, so we inherit all attributes
822  // from the original loop, but remove the loop distribution metadata to
823  // avoid to distribute it again.
824  MDNode *UnversionedLoopID =
825  makeFollowupLoopID(OrigLoopID,
828  "llvm.loop.distribute.", true)
829  .getValue();
830  LVer.getNonVersionedLoop()->setLoopID(UnversionedLoopID);
831  }
832 
833  // Create identical copies of the original loop for each partition and hook
834  // them up sequentially.
835  Partitions.cloneLoops();
836 
837  // Now, we remove the instruction from each loop that don't belong to that
838  // partition.
839  Partitions.removeUnusedInsts();
840  LLVM_DEBUG(dbgs() << "\nAfter removing unused Instrs:\n");
841  LLVM_DEBUG(Partitions.printBlocks());
842 
843  if (LDistVerify) {
844  LI->verify(*DT);
846  }
847 
848  ++NumLoopsDistributed;
849  // Report the success.
850  ORE->emit([&]() {
851  return OptimizationRemark(LDIST_NAME, "Distribute", L->getStartLoc(),
852  L->getHeader())
853  << "distributed loop";
854  });
855  return true;
856  }
857 
858  /// Provide diagnostics then \return with false.
859  bool fail(StringRef RemarkName, StringRef Message) {
860  LLVMContext &Ctx = F->getContext();
861  bool Forced = isForced().getValueOr(false);
862 
863  LLVM_DEBUG(dbgs() << "Skipping; " << Message << "\n");
864 
865  // With Rpass-missed report that distribution failed.
866  ORE->emit([&]() {
867  return OptimizationRemarkMissed(LDIST_NAME, "NotDistributed",
868  L->getStartLoc(), L->getHeader())
869  << "loop not distributed: use -Rpass-analysis=loop-distribute for "
870  "more "
871  "info";
872  });
873 
874  // With Rpass-analysis report why. This is on by default if distribution
875  // was requested explicitly.
876  ORE->emit(OptimizationRemarkAnalysis(
878  RemarkName, L->getStartLoc(), L->getHeader())
879  << "loop not distributed: " << Message);
880 
881  // Also issue a warning if distribution was requested explicitly but it
882  // failed.
883  if (Forced)
885  *F, L->getStartLoc(), "loop not distributed: failed "
886  "explicitly specified loop distribution"));
887 
888  return false;
889  }
890 
891  /// Return if distribution forced to be enabled/disabled for the loop.
892  ///
893  /// If the optional has a value, it indicates whether distribution was forced
894  /// to be enabled (true) or disabled (false). If the optional has no value
895  /// distribution was not forced either way.
896  const Optional<bool> &isForced() const { return IsForced; }
897 
898 private:
899  /// Filter out checks between pointers from the same partition.
900  ///
901  /// \p PtrToPartition contains the partition number for pointers. Partition
902  /// number -1 means that the pointer is used in multiple partitions. In this
903  /// case we can't safely omit the check.
904  SmallVector<RuntimePointerCheck, 4> includeOnlyCrossPartitionChecks(
905  const SmallVectorImpl<RuntimePointerCheck> &AllChecks,
906  const SmallVectorImpl<int> &PtrToPartition,
907  const RuntimePointerChecking *RtPtrChecking) {
909 
910  copy_if(AllChecks, std::back_inserter(Checks),
911  [&](const RuntimePointerCheck &Check) {
912  for (unsigned PtrIdx1 : Check.first->Members)
913  for (unsigned PtrIdx2 : Check.second->Members)
914  // Only include this check if there is a pair of pointers
915  // that require checking and the pointers fall into
916  // separate partitions.
917  //
918  // (Note that we already know at this point that the two
919  // pointer groups need checking but it doesn't follow
920  // that each pair of pointers within the two groups need
921  // checking as well.
922  //
923  // In other words we don't want to include a check just
924  // because there is a pair of pointers between the two
925  // pointer groups that require checks and a different
926  // pair whose pointers fall into different partitions.)
927  if (RtPtrChecking->needsChecking(PtrIdx1, PtrIdx2) &&
929  PtrToPartition, PtrIdx1, PtrIdx2))
930  return true;
931  return false;
932  });
933 
934  return Checks;
935  }
936 
937  /// Check whether the loop metadata is forcing distribution to be
938  /// enabled/disabled.
939  void setForced() {
941  findStringMetadataForLoop(L, "llvm.loop.distribute.enable");
942  if (!Value)
943  return;
944 
945  const MDOperand *Op = *Value;
946  assert(Op && mdconst::hasa<ConstantInt>(*Op) && "invalid metadata");
947  IsForced = mdconst::extract<ConstantInt>(*Op)->getZExtValue();
948  }
949 
950  Loop *L;
951  Function *F;
952 
953  // Analyses used.
954  LoopInfo *LI;
955  const LoopAccessInfo *LAI = nullptr;
956  DominatorTree *DT;
957  ScalarEvolution *SE;
959 
960  /// Indicates whether distribution is forced to be enabled/disabled for
961  /// the loop.
962  ///
963  /// If the optional has a value, it indicates whether distribution was forced
964  /// to be enabled (true) or disabled (false). If the optional has no value
965  /// distribution was not forced either way.
966  Optional<bool> IsForced;
967 };
968 
969 } // end anonymous namespace
970 
971 /// Shared implementation between new and old PMs.
972 static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT,
974  std::function<const LoopAccessInfo &(Loop &)> &GetLAA) {
975  // Build up a worklist of inner-loops to vectorize. This is necessary as the
976  // act of distributing a loop creates new loops and can invalidate iterators
977  // across the loops.
978  SmallVector<Loop *, 8> Worklist;
979 
980  for (Loop *TopLevelLoop : *LI)
981  for (Loop *L : depth_first(TopLevelLoop))
982  // We only handle inner-most loops.
983  if (L->isInnermost())
984  Worklist.push_back(L);
985 
986  // Now walk the identified inner loops.
987  bool Changed = false;
988  for (Loop *L : Worklist) {
989  LoopDistributeForLoop LDL(L, &F, LI, DT, SE, ORE);
990 
991  // If distribution was forced for the specific loop to be
992  // enabled/disabled, follow that. Otherwise use the global flag.
993  if (LDL.isForced().getValueOr(EnableLoopDistribute))
994  Changed |= LDL.processLoop(GetLAA);
995  }
996 
997  // Process each loop nest in the function.
998  return Changed;
999 }
1000 
1001 namespace {
1002 
1003 /// The pass class.
1004 class LoopDistributeLegacy : public FunctionPass {
1005 public:
1006  static char ID;
1007 
1008  LoopDistributeLegacy() : FunctionPass(ID) {
1009  // The default is set by the caller.
1011  }
1012 
1013  bool runOnFunction(Function &F) override {
1014  if (skipFunction(F))
1015  return false;
1016 
1017  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1018  auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>();
1019  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1020  auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1021  auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
1022  std::function<const LoopAccessInfo &(Loop &)> GetLAA =
1023  [&](Loop &L) -> const LoopAccessInfo & { return LAA->getInfo(&L); };
1024 
1025  return runImpl(F, LI, DT, SE, ORE, GetLAA);
1026  }
1027 
1028  void getAnalysisUsage(AnalysisUsage &AU) const override {
1037  }
1038 };
1039 
1040 } // end anonymous namespace
1041 
1044  auto &LI = AM.getResult<LoopAnalysis>(F);
1045  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1046  auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
1048 
1049  // We don't directly need these analyses but they're required for loop
1050  // analyses so provide them below.
1051  auto &AA = AM.getResult<AAManager>(F);
1052  auto &AC = AM.getResult<AssumptionAnalysis>(F);
1053  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1054  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1055 
1056  auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
1057  std::function<const LoopAccessInfo &(Loop &)> GetLAA =
1058  [&](Loop &L) -> const LoopAccessInfo & {
1059  LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE,
1060  TLI, TTI, nullptr, nullptr, nullptr};
1061  return LAM.getResult<LoopAccessAnalysis>(L, AR);
1062  };
1063 
1064  bool Changed = runImpl(F, &LI, &DT, &SE, &ORE, GetLAA);
1065  if (!Changed)
1066  return PreservedAnalyses::all();
1067  PreservedAnalyses PA;
1068  PA.preserve<LoopAnalysis>();
1070  return PA;
1071 }
1072 
1074 
1075 static const char ldist_name[] = "Loop Distribution";
1076 
1077 INITIALIZE_PASS_BEGIN(LoopDistributeLegacy, LDIST_NAME, ldist_name, false,
1078  false)
1084 INITIALIZE_PASS_END(LoopDistributeLegacy, LDIST_NAME, ldist_name, false, false)
1085 
1086 FunctionPass *llvm::createLoopDistributePass() { return new LoopDistributeLegacy(); }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
AssumptionCache.h
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:1299
llvm::MemoryDepChecker
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
Definition: LoopAccessAnalysis.h:86
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2461
llvm::OptimizationRemarkMissed
Diagnostic information for missed-optimization remarks.
Definition: DiagnosticInfo.h:735
DistributeSCEVCheckThreshold
static cl::opt< unsigned > DistributeSCEVCheckThreshold("loop-distribute-scev-check-threshold", cl::init(8), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed for Loop " "Distribution"))
llvm::ScalarEvolutionAnalysis
Analysis pass that exposes the ScalarEvolution for a function.
Definition: ScalarEvolution.h:2115
llvm::LoopAccessLegacyAnalysis
This analysis provides dependence information for the memory accesses of a loop.
Definition: LoopAccessAnalysis.h:770
llvm::minidump::StreamType::Unused
@ Unused
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::sys::path::const_iterator::end
friend const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
llvm::RuntimePointerCheck
std::pair< const RuntimeCheckingPtrGroup *, const RuntimeCheckingPtrGroup * > RuntimePointerCheck
A memcheck which made up of a pair of grouped pointers.
Definition: LoopAccessAnalysis.h:369
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::LoopBase::getExitBlock
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:81
Optional.h
llvm::PredicatedScalarEvolution::getPredicate
const SCEVPredicate & getPredicate() const
Definition: ScalarEvolution.cpp:14175
ValueMapper.h
print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:189
Metadata.h
llvm::cloneLoopWithPreheader
Loop * cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB, Loop *OrigLoop, ValueToValueMapTy &VMap, const Twine &NameSuffix, LoopInfo *LI, DominatorTree *DT, SmallVectorImpl< BasicBlock * > &Blocks)
Clones a loop OrigLoop.
Definition: CloneFunction.cpp:896
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:780
Scalar.h
llvm::hasDisableAllTransformsHint
bool hasDisableAllTransformsHint(const Loop *L)
Look for the loop attribute that disables all transformation heuristic.
Definition: LoopUtils.cpp:347
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
StringRef.h
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::MipsISD::LDL
@ LDL
Definition: MipsISelLowering.h:252
llvm::SmallVector< Instruction *, 8 >
llvm::LoopInfoBase::verify
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
Definition: LoopInfoImpl.h:689
Statistic.h
llvm::EquivalenceClasses
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
Definition: EquivalenceClasses.h:60
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:167
LoopAccessAnalysis.h
llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:630
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
llvm::makeFollowupLoopID
Optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Definition: LoopUtils.cpp:266
llvm::Dependence
Dependence - This class represents a dependence between two memory memory references in a function.
Definition: DependenceAnalysis.h:71
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
GlobalsModRef.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::LoopAccessAnalysis
This analysis provides dependence information for the memory accesses of a loop.
Definition: LoopAccessAnalysis.h:812
ScalarEvolution.h
llvm::findDefsUsedOutsideOfLoop
SmallVector< Instruction *, 8 > findDefsUsedOutsideOfLoop(Loop *L)
Returns the instructions that use values defined in the loop.
Definition: LoopUtils.cpp:126
DenseMap.h
Instructions
Code Generation Notes for reduce the size of the ISel and reduce repetition in the implementation In a small number of this can cause even when no optimisation has taken place Instructions
Definition: MSA.txt:11
llvm::RuntimePointerChecking::needsChecking
bool needsChecking(const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
Definition: LoopAccessAnalysis.cpp:344
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:380
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
PragmaDistributeSCEVCheckThreshold
static cl::opt< unsigned > PragmaDistributeSCEVCheckThreshold("loop-distribute-scev-check-threshold-with-pragma", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed for Loop " "Distribution for loop marked with #pragma loop distribute(enable)"))
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1271
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:51
llvm::Optional
Definition: APInt.h:33
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
LLVMLoopDistributeFollowupFallback
static const char *const LLVMLoopDistributeFollowupFallback
Definition: LoopDistribute.cpp:87
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:877
llvm::BasicBlock::getSinglePredecessor
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:261
STLExtras.h
LLVMLoopDistributeFollowupAll
static const char *const LLVMLoopDistributeFollowupAll
Definition: LoopDistribute.cpp:81
llvm::Sched::Fast
@ Fast
Definition: TargetLowering.h:104
llvm::initializeLoopDistributeLegacyPass
void initializeLoopDistributeLegacyPass(PassRegistry &)
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
DepthFirstIterator.h
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::SCEVPredicate
This class represents an assumption made using SCEV expressions which can be checked at run-time.
Definition: ScalarEvolution.h:215
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
LoopAnalysisManager.h
llvm::Optional::hasValue
constexpr bool hasValue() const
Definition: Optional.h:283
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Instruction.h
CommandLine.h
ldist_name
static const char ldist_name[]
Definition: LoopDistribute.cpp:1075
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Constants.h
EnableLoopDistribute
static cl::opt< bool > EnableLoopDistribute("enable-loop-distribute", cl::Hidden, cl::desc("Enable the new, experimental LoopDistribution Pass"), cl::init(false))
llvm::LoopAccessInfo::blockNeedsPredication
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Definition: LoopAccessAnalysis.cpp:2317
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Twine.h
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:297
llvm::remapInstructionsInBlocks
void remapInstructionsInBlocks(const SmallVectorImpl< BasicBlock * > &Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
Definition: CloneFunction.cpp:882
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
Check
#define Check(C,...)
Definition: Lint.cpp:170
TargetLibraryInfo.h
false
Definition: StackSlotColoring.cpp:141
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::Instruction
Definition: Instruction.h:42
runImpl
static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE, std::function< const LoopAccessInfo &(Loop &)> &GetLAA)
Shared implementation between new and old PMs.
Definition: LoopDistribute.cpp:972
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:302
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:54
llvm::operator<<
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:230
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1769
LoopUtils.h
llvm::ScalarEvolutionWrapperPass
Definition: ScalarEvolution.h:2145
SmallPtrSet.h
llvm::createLoopDistributePass
FunctionPass * createLoopDistributePass()
llvm::DominatorTreeBase::changeImmediateDominator
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
Definition: GenericDomTree.h:655
llvm::MemoryDepChecker::getDependences
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
Definition: LoopAccessAnalysis.h:220
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::LoopAccessInfo::hasConvergentOp
bool hasConvergentOp() const
Return true if there is a convergent operation in the loop.
Definition: LoopAccessAnalysis.h:570
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
LoopInfo.h
DistributeNonIfConvertible
static cl::opt< bool > DistributeNonIfConvertible("loop-distribute-non-if-convertible", cl::Hidden, cl::desc("Whether to distribute into a loop that may not be " "if-convertible by the loop vectorizer"), cl::init(false))
LDIST_NAME
#define LDIST_NAME
Definition: LoopDistribute.cpp:76
llvm::findStringMetadataForLoop
Optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
Definition: LoopInfo.cpp:1051
llvm::Loop::isLoopSimplifyForm
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to,...
Definition: LoopInfo.cpp:475
BasicBlock.h
llvm::cl::opt< bool >
llvm::PPC::Predicate
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
llvm::RuntimePointerChecking
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
Definition: LoopAccessAnalysis.h:383
llvm::MemoryDepChecker::Dependence
Dependece between memory access instructions.
Definition: LoopAccessAnalysis.h:107
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:173
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:173
llvm::LoopVersioning
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Definition: LoopVersioning.h:41
llvm::ARM_AM::add
@ add
Definition: ARMAddressingModes.h:39
llvm::LoopAccessInfo::getInstructionsForAccess
SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Return the list of instructions that use Ptr to read or write memory.
Definition: LoopAccessAnalysis.h:604
llvm::RuntimePointerChecking::getChecks
const SmallVectorImpl< RuntimePointerCheck > & getChecks() const
Returns the checks that generateChecks created.
Definition: LoopAccessAnalysis.h:443
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::DenseMap
Definition: DenseMap.h:716
fail
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg)
Definition: BPFISelLowering.cpp:38
llvm::LoopAccessInfo::getRuntimePointerChecking
const RuntimePointerChecking * getRuntimePointerChecking() const
Definition: LoopAccessAnalysis.h:572
I
#define I(x, y, z)
Definition: MD5.cpp:58
Cloning.h
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
llvm::LoopAccessInfo
Drive the analysis of memory accesses in the loop.
Definition: LoopAccessAnalysis.h:558
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:166
llvm::sys::path::const_iterator::begin
friend const_iterator begin(StringRef path, Style style)
Get begin iterator over path.
Definition: Path.cpp:226
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MemoryDepChecker::getMemoryInstructions
const SmallVectorImpl< Instruction * > & getMemoryInstructions() const
The vector of memory access instructions.
Definition: LoopAccessAnalysis.h:228
llvm::LoopDistributePass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: LoopDistribute.cpp:1042
llvm::Optional::getValue
constexpr const T & getValue() const &
Definition: Optional.h:279
llvm::LoopAccessInfo::canVectorizeMemory
bool canVectorizeMemory() const
Return true we can analyze the memory accesses in the loop and there are no memory dependence cycles.
Definition: LoopAccessAnalysis.h:565
llvm::SCEVPredicate::isAlwaysTrue
virtual bool isAlwaysTrue() const =0
Returns true if the predicate is always true.
iterator_range.h
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:162
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:82
llvm::MDNode
Metadata node.
Definition: Metadata.h:937
llvm::LoopAccessInfo::getDepChecker
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
Definition: LoopAccessAnalysis.h:600
LLVMLoopDistributeFollowupSequential
static const char *const LLVMLoopDistributeFollowupSequential
Definition: LoopDistribute.cpp:85
llvm::LoopInfo
Definition: LoopInfo.h:1086
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::SCEVPredicate::getComplexity
virtual unsigned getComplexity() const
Returns the estimated complexity of this predicate.
Definition: ScalarEvolution.h:238
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
if
if(llvm_vc STREQUAL "") set(fake_version_inc "$
Definition: CMakeLists.txt:14
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::OptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition: DiagnosticInfo.h:781
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:305
llvm::ValueMap< const Value *, WeakTrackingVH >
for
this could be done in SelectionDAGISel along with other special for
Definition: README.txt:104
llvm::User::replaceUsesOfWith
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
LAM
LoopAnalysisManager LAM
Definition: PassBuilderBindings.cpp:58
llvm::depth_first
iterator_range< df_iterator< T > > depth_first(const T &G)
Definition: DepthFirstIterator.h:230
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::RuntimePointerChecking::Pointers
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
Definition: LoopAccessAnalysis.h:479
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:165
llvm::RuntimePointerChecking::arePointersInSamePartition
static bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)
Check if pointers are in the same partition.
Definition: LoopAccessAnalysis.cpp:530
llvm::LLVMContext::diagnose
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Definition: LLVMContext.cpp:243
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::OptimizationRemarkEmitterWrapperPass
OptimizationRemarkEmitter legacy analysis pass.
Definition: OptimizationRemarkEmitter.h:146
llvm::empty
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:268
llvm::copy_if
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1653
llvm::Loop::setLoopID
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:524
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:341
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
LDistVerify
static cl::opt< bool > LDistVerify("loop-distribute-verify", cl::Hidden, cl::desc("Turn on DominatorTree and LoopInfo verification " "after Loop Distribution"), cl::init(false))
llvm::RuntimePointerChecking::printChecks
void printChecks(raw_ostream &OS, const SmallVectorImpl< RuntimePointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
Definition: LoopAccessAnalysis.cpp:556
llvm::Loop::getLoopID
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:500
Casting.h
DiagnosticInfo.h
Function.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
PassManager.h
EquivalenceClasses.h
llvm::DiagnosticInfoOptimizationFailure
Diagnostic information for optimization failures.
Definition: DiagnosticInfo.h:977
llvm::OptimizationRemarkAnalysis::AlwaysPrint
static const char * AlwaysPrint
Definition: DiagnosticInfo.h:821
LoopVersioning.h
LoopDistribute.h
AA
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(LoopDistributeLegacy, LDIST_NAME, ldist_name, false, false) FunctionPass *llvm
Definition: LoopDistribute.cpp:1077
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:267
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:690
Instructions.h
SmallVector.h
Dominators.h
LLVMLoopDistributeFollowupCoincident
static const char *const LLVMLoopDistributeFollowupCoincident
Definition: LoopDistribute.cpp:83
N
#define N
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::GlobalsAAWrapperPass
Legacy wrapper pass to provide the GlobalsAAResult object.
Definition: GlobalsModRef.h:148
llvm::DominatorTreeBase::verify
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
Definition: GenericDomTree.h:802
TargetTransformInfo.h
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:119
llvm::SmallVectorImpl< Instruction * >
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::InnerAnalysisManagerProxy
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:937
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
LLVMContext.h
llvm::Loop::isRotatedForm
bool isRotatedForm() const
Return true if the loop is in rotated form.
Definition: LoopInfo.h:791
llvm::cl::desc
Definition: CommandLine.h:405
llvm::LoopAccessInfo::getPSE
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
Definition: LoopAccessAnalysis.h:635
raw_ostream.h
BasicBlockUtils.h
llvm::SplitBlock
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Definition: BasicBlockUtils.cpp:837
Value.h
InitializePasses.h
llvm::OptimizationRemarkEmitterAnalysis
Definition: OptimizationRemarkEmitter.h:164
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:443
llvm::MDOperand
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:773
llvm::LoopAnalysis
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1246
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1225
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37