LLVM 17.0.0git
LoopUtils.cpp
Go to the documentation of this file.
1//===-- LoopUtils.cpp - Loop Utility functions -------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines common loop utility functions.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/DenseSet.h"
16#include "llvm/ADT/ScopeExit.h"
17#include "llvm/ADT/SetVector.h"
33#include "llvm/IR/DIBuilder.h"
34#include "llvm/IR/Dominators.h"
37#include "llvm/IR/MDBuilder.h"
38#include "llvm/IR/Module.h"
41#include "llvm/IR/ValueHandle.h"
43#include "llvm/Pass.h"
44#include "llvm/Support/Debug.h"
48
49using namespace llvm;
50using namespace llvm::PatternMatch;
51
52#define DEBUG_TYPE "loop-utils"
53
54static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced";
55static const char *LLVMLoopDisableLICM = "llvm.licm.disable";
56
58 MemorySSAUpdater *MSSAU,
59 bool PreserveLCSSA) {
60 bool Changed = false;
61
62 // We re-use a vector for the in-loop predecesosrs.
63 SmallVector<BasicBlock *, 4> InLoopPredecessors;
64
65 auto RewriteExit = [&](BasicBlock *BB) {
66 assert(InLoopPredecessors.empty() &&
67 "Must start with an empty predecessors list!");
68 auto Cleanup = make_scope_exit([&] { InLoopPredecessors.clear(); });
69
70 // See if there are any non-loop predecessors of this exit block and
71 // keep track of the in-loop predecessors.
72 bool IsDedicatedExit = true;
73 for (auto *PredBB : predecessors(BB))
74 if (L->contains(PredBB)) {
75 if (isa<IndirectBrInst>(PredBB->getTerminator()))
76 // We cannot rewrite exiting edges from an indirectbr.
77 return false;
78
79 InLoopPredecessors.push_back(PredBB);
80 } else {
81 IsDedicatedExit = false;
82 }
83
84 assert(!InLoopPredecessors.empty() && "Must have *some* loop predecessor!");
85
86 // Nothing to do if this is already a dedicated exit.
87 if (IsDedicatedExit)
88 return false;
89
90 auto *NewExitBB = SplitBlockPredecessors(
91 BB, InLoopPredecessors, ".loopexit", DT, LI, MSSAU, PreserveLCSSA);
92
93 if (!NewExitBB)
95 dbgs() << "WARNING: Can't create a dedicated exit block for loop: "
96 << *L << "\n");
97 else
98 LLVM_DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
99 << NewExitBB->getName() << "\n");
100 return true;
101 };
102
103 // Walk the exit blocks directly rather than building up a data structure for
104 // them, but only visit each one once.
106 for (auto *BB : L->blocks())
107 for (auto *SuccBB : successors(BB)) {
108 // We're looking for exit blocks so skip in-loop successors.
109 if (L->contains(SuccBB))
110 continue;
111
112 // Visit each exit block exactly once.
113 if (!Visited.insert(SuccBB).second)
114 continue;
115
116 Changed |= RewriteExit(SuccBB);
117 }
118
119 return Changed;
120}
121
122/// Returns the instructions that use values defined in the loop.
125
126 for (auto *Block : L->getBlocks())
127 // FIXME: I believe that this could use copy_if if the Inst reference could
128 // be adapted into a pointer.
129 for (auto &Inst : *Block) {
130 auto Users = Inst.users();
131 if (any_of(Users, [&](User *U) {
132 auto *Use = cast<Instruction>(U);
133 return !L->contains(Use->getParent());
134 }))
135 UsedOutside.push_back(&Inst);
136 }
137
138 return UsedOutside;
139}
140
142 // By definition, all loop passes need the LoopInfo analysis and the
143 // Dominator tree it depends on. Because they all participate in the loop
144 // pass manager, they must also preserve these.
149
150 // We must also preserve LoopSimplify and LCSSA. We locally access their IDs
151 // here because users shouldn't directly get them from this header.
152 extern char &LoopSimplifyID;
153 extern char &LCSSAID;
158 // This is used in the LPPassManager to perform LCSSA verification on passes
159 // which preserve lcssa form
162
163 // Loop passes are designed to run inside of a loop pass manager which means
164 // that any function analyses they require must be required by the first loop
165 // pass in the manager (so that it is computed before the loop pass manager
166 // runs) and preserved by all loop pasess in the manager. To make this
167 // reasonably robust, the set needed for most loop passes is maintained here.
168 // If your loop pass requires an analysis not listed here, you will need to
169 // carefully audit the loop pass manager nesting structure that results.
177 // FIXME: When all loop passes preserve MemorySSA, it can be required and
178 // preserved here instead of the individual handling in each pass.
179}
180
181/// Manually defined generic "LoopPass" dependency initialization. This is used
182/// to initialize the exact set of passes from above in \c
183/// getLoopAnalysisUsage. It can be used within a loop pass's initialization
184/// with:
185///
186/// INITIALIZE_PASS_DEPENDENCY(LoopPass)
187///
188/// As-if "LoopPass" were a pass.
192 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
193 INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
200}
201
202/// Create MDNode for input string.
203static MDNode *createStringMetadata(Loop *TheLoop, StringRef Name, unsigned V) {
204 LLVMContext &Context = TheLoop->getHeader()->getContext();
205 Metadata *MDs[] = {
208 return MDNode::get(Context, MDs);
209}
210
211/// Set input string into loop metadata by keeping other values intact.
212/// If the string is already in loop metadata update value if it is
213/// different.
214void llvm::addStringMetadataToLoop(Loop *TheLoop, const char *StringMD,
215 unsigned V) {
217 // If the loop already has metadata, retain it.
218 MDNode *LoopID = TheLoop->getLoopID();
219 if (LoopID) {
220 for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
221 MDNode *Node = cast<MDNode>(LoopID->getOperand(i));
222 // If it is of form key = value, try to parse it.
223 if (Node->getNumOperands() == 2) {
224 MDString *S = dyn_cast<MDString>(Node->getOperand(0));
225 if (S && S->getString().equals(StringMD)) {
226 ConstantInt *IntMD =
227 mdconst::extract_or_null<ConstantInt>(Node->getOperand(1));
228 if (IntMD && IntMD->getSExtValue() == V)
229 // It is already in place. Do nothing.
230 return;
231 // We need to update the value, so just skip it here and it will
232 // be added after copying other existed nodes.
233 continue;
234 }
235 }
236 MDs.push_back(Node);
237 }
238 }
239 // Add new metadata.
240 MDs.push_back(createStringMetadata(TheLoop, StringMD, V));
241 // Replace current metadata node with new one.
242 LLVMContext &Context = TheLoop->getHeader()->getContext();
243 MDNode *NewLoopID = MDNode::get(Context, MDs);
244 // Set operand 0 to refer to the loop id itself.
245 NewLoopID->replaceOperandWith(0, NewLoopID);
246 TheLoop->setLoopID(NewLoopID);
247}
248
249std::optional<ElementCount>
251 std::optional<int> Width =
252 getOptionalIntLoopAttribute(TheLoop, "llvm.loop.vectorize.width");
253
254 if (Width) {
255 std::optional<int> IsScalable = getOptionalIntLoopAttribute(
256 TheLoop, "llvm.loop.vectorize.scalable.enable");
257 return ElementCount::get(*Width, IsScalable.value_or(false));
258 }
259
260 return std::nullopt;
261}
262
263std::optional<MDNode *> llvm::makeFollowupLoopID(
264 MDNode *OrigLoopID, ArrayRef<StringRef> FollowupOptions,
265 const char *InheritOptionsExceptPrefix, bool AlwaysNew) {
266 if (!OrigLoopID) {
267 if (AlwaysNew)
268 return nullptr;
269 return std::nullopt;
270 }
271
272 assert(OrigLoopID->getOperand(0) == OrigLoopID);
273
274 bool InheritAllAttrs = !InheritOptionsExceptPrefix;
275 bool InheritSomeAttrs =
276 InheritOptionsExceptPrefix && InheritOptionsExceptPrefix[0] != '\0';
278 MDs.push_back(nullptr);
279
280 bool Changed = false;
281 if (InheritAllAttrs || InheritSomeAttrs) {
282 for (const MDOperand &Existing : drop_begin(OrigLoopID->operands())) {
283 MDNode *Op = cast<MDNode>(Existing.get());
284
285 auto InheritThisAttribute = [InheritSomeAttrs,
286 InheritOptionsExceptPrefix](MDNode *Op) {
287 if (!InheritSomeAttrs)
288 return false;
289
290 // Skip malformatted attribute metadata nodes.
291 if (Op->getNumOperands() == 0)
292 return true;
293 Metadata *NameMD = Op->getOperand(0).get();
294 if (!isa<MDString>(NameMD))
295 return true;
296 StringRef AttrName = cast<MDString>(NameMD)->getString();
297
298 // Do not inherit excluded attributes.
299 return !AttrName.startswith(InheritOptionsExceptPrefix);
300 };
301
302 if (InheritThisAttribute(Op))
303 MDs.push_back(Op);
304 else
305 Changed = true;
306 }
307 } else {
308 // Modified if we dropped at least one attribute.
309 Changed = OrigLoopID->getNumOperands() > 1;
310 }
311
312 bool HasAnyFollowup = false;
313 for (StringRef OptionName : FollowupOptions) {
314 MDNode *FollowupNode = findOptionMDForLoopID(OrigLoopID, OptionName);
315 if (!FollowupNode)
316 continue;
317
318 HasAnyFollowup = true;
319 for (const MDOperand &Option : drop_begin(FollowupNode->operands())) {
320 MDs.push_back(Option.get());
321 Changed = true;
322 }
323 }
324
325 // Attributes of the followup loop not specified explicity, so signal to the
326 // transformation pass to add suitable attributes.
327 if (!AlwaysNew && !HasAnyFollowup)
328 return std::nullopt;
329
330 // If no attributes were added or remove, the previous loop Id can be reused.
331 if (!AlwaysNew && !Changed)
332 return OrigLoopID;
333
334 // No attributes is equivalent to having no !llvm.loop metadata at all.
335 if (MDs.size() == 1)
336 return nullptr;
337
338 // Build the new loop ID.
339 MDTuple *FollowupLoopID = MDNode::get(OrigLoopID->getContext(), MDs);
340 FollowupLoopID->replaceOperandWith(0, FollowupLoopID);
341 return FollowupLoopID;
342}
343
346}
347
350}
351
353 if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable"))
354 return TM_SuppressedByUser;
355
356 std::optional<int> Count =
357 getOptionalIntLoopAttribute(L, "llvm.loop.unroll.count");
358 if (Count)
359 return *Count == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
360
361 if (getBooleanLoopAttribute(L, "llvm.loop.unroll.enable"))
362 return TM_ForcedByUser;
363
364 if (getBooleanLoopAttribute(L, "llvm.loop.unroll.full"))
365 return TM_ForcedByUser;
366
368 return TM_Disable;
369
370 return TM_Unspecified;
371}
372
374 if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.disable"))
375 return TM_SuppressedByUser;
376
377 std::optional<int> Count =
378 getOptionalIntLoopAttribute(L, "llvm.loop.unroll_and_jam.count");
379 if (Count)
380 return *Count == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
381
382 if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.enable"))
383 return TM_ForcedByUser;
384
386 return TM_Disable;
387
388 return TM_Unspecified;
389}
390
392 std::optional<bool> Enable =
393 getOptionalBoolLoopAttribute(L, "llvm.loop.vectorize.enable");
394
395 if (Enable == false)
396 return TM_SuppressedByUser;
397
398 std::optional<ElementCount> VectorizeWidth =
400 std::optional<int> InterleaveCount =
401 getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count");
402
403 // 'Forcing' vector width and interleave count to one effectively disables
404 // this tranformation.
405 if (Enable == true && VectorizeWidth && VectorizeWidth->isScalar() &&
406 InterleaveCount == 1)
407 return TM_SuppressedByUser;
408
409 if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))
410 return TM_Disable;
411
412 if (Enable == true)
413 return TM_ForcedByUser;
414
415 if ((VectorizeWidth && VectorizeWidth->isScalar()) && InterleaveCount == 1)
416 return TM_Disable;
417
418 if ((VectorizeWidth && VectorizeWidth->isVector()) || InterleaveCount > 1)
419 return TM_Enable;
420
422 return TM_Disable;
423
424 return TM_Unspecified;
425}
426
428 if (getBooleanLoopAttribute(L, "llvm.loop.distribute.enable"))
429 return TM_ForcedByUser;
430
432 return TM_Disable;
433
434 return TM_Unspecified;
435}
436
438 if (getBooleanLoopAttribute(L, "llvm.loop.licm_versioning.disable"))
439 return TM_SuppressedByUser;
440
442 return TM_Disable;
443
444 return TM_Unspecified;
445}
446
447/// Does a BFS from a given node to all of its children inside a given loop.
448/// The returned vector of nodes includes the starting point.
452 auto AddRegionToWorklist = [&](DomTreeNode *DTN) {
453 // Only include subregions in the top level loop.
454 BasicBlock *BB = DTN->getBlock();
455 if (CurLoop->contains(BB))
456 Worklist.push_back(DTN);
457 };
458
459 AddRegionToWorklist(N);
460
461 for (size_t I = 0; I < Worklist.size(); I++) {
462 for (DomTreeNode *Child : Worklist[I]->children())
463 AddRegionToWorklist(Child);
464 }
465
466 return Worklist;
467}
468
470 LoopInfo *LI, MemorySSA *MSSA) {
471 assert((!DT || L->isLCSSAForm(*DT)) && "Expected LCSSA!");
472 auto *Preheader = L->getLoopPreheader();
473 assert(Preheader && "Preheader should exist!");
474
475 std::unique_ptr<MemorySSAUpdater> MSSAU;
476 if (MSSA)
477 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
478
479 // Now that we know the removal is safe, remove the loop by changing the
480 // branch from the preheader to go to the single exit block.
481 //
482 // Because we're deleting a large chunk of code at once, the sequence in which
483 // we remove things is very important to avoid invalidation issues.
484
485 // Tell ScalarEvolution that the loop is deleted. Do this before
486 // deleting the loop so that ScalarEvolution can look at the loop
487 // to determine what it needs to clean up.
488 if (SE) {
489 SE->forgetLoop(L);
491 }
492
493 Instruction *OldTerm = Preheader->getTerminator();
494 assert(!OldTerm->mayHaveSideEffects() &&
495 "Preheader must end with a side-effect-free terminator");
496 assert(OldTerm->getNumSuccessors() == 1 &&
497 "Preheader must have a single successor");
498 // Connect the preheader to the exit block. Keep the old edge to the header
499 // around to perform the dominator tree update in two separate steps
500 // -- #1 insertion of the edge preheader -> exit and #2 deletion of the edge
501 // preheader -> header.
502 //
503 //
504 // 0. Preheader 1. Preheader 2. Preheader
505 // | | | |
506 // V | V |
507 // Header <--\ | Header <--\ | Header <--\
508 // | | | | | | | | | | |
509 // | V | | | V | | | V |
510 // | Body --/ | | Body --/ | | Body --/
511 // V V V V V
512 // Exit Exit Exit
513 //
514 // By doing this is two separate steps we can perform the dominator tree
515 // update without using the batch update API.
516 //
517 // Even when the loop is never executed, we cannot remove the edge from the
518 // source block to the exit block. Consider the case where the unexecuted loop
519 // branches back to an outer loop. If we deleted the loop and removed the edge
520 // coming to this inner loop, this will break the outer loop structure (by
521 // deleting the backedge of the outer loop). If the outer loop is indeed a
522 // non-loop, it will be deleted in a future iteration of loop deletion pass.
523 IRBuilder<> Builder(OldTerm);
524
525 auto *ExitBlock = L->getUniqueExitBlock();
526 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
527 if (ExitBlock) {
528 assert(ExitBlock && "Should have a unique exit block!");
529 assert(L->hasDedicatedExits() && "Loop should have dedicated exits!");
530
531 Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock);
532 // Remove the old branch. The conditional branch becomes a new terminator.
533 OldTerm->eraseFromParent();
534
535 // Rewrite phis in the exit block to get their inputs from the Preheader
536 // instead of the exiting block.
537 for (PHINode &P : ExitBlock->phis()) {
538 // Set the zero'th element of Phi to be from the preheader and remove all
539 // other incoming values. Given the loop has dedicated exits, all other
540 // incoming values must be from the exiting blocks.
541 int PredIndex = 0;
542 P.setIncomingBlock(PredIndex, Preheader);
543 // Removes all incoming values from all other exiting blocks (including
544 // duplicate values from an exiting block).
545 // Nuke all entries except the zero'th entry which is the preheader entry.
546 // NOTE! We need to remove Incoming Values in the reverse order as done
547 // below, to keep the indices valid for deletion (removeIncomingValues
548 // updates getNumIncomingValues and shifts all values down into the
549 // operand being deleted).
550 for (unsigned i = 0, e = P.getNumIncomingValues() - 1; i != e; ++i)
551 P.removeIncomingValue(e - i, false);
552
553 assert((P.getNumIncomingValues() == 1 &&
554 P.getIncomingBlock(PredIndex) == Preheader) &&
555 "Should have exactly one value and that's from the preheader!");
556 }
557
558 if (DT) {
559 DTU.applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}});
560 if (MSSA) {
561 MSSAU->applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}},
562 *DT);
563 if (VerifyMemorySSA)
564 MSSA->verifyMemorySSA();
565 }
566 }
567
568 // Disconnect the loop body by branching directly to its exit.
569 Builder.SetInsertPoint(Preheader->getTerminator());
570 Builder.CreateBr(ExitBlock);
571 // Remove the old branch.
572 Preheader->getTerminator()->eraseFromParent();
573 } else {
574 assert(L->hasNoExitBlocks() &&
575 "Loop should have either zero or one exit blocks.");
576
577 Builder.SetInsertPoint(OldTerm);
578 Builder.CreateUnreachable();
579 Preheader->getTerminator()->eraseFromParent();
580 }
581
582 if (DT) {
583 DTU.applyUpdates({{DominatorTree::Delete, Preheader, L->getHeader()}});
584 if (MSSA) {
585 MSSAU->applyUpdates({{DominatorTree::Delete, Preheader, L->getHeader()}},
586 *DT);
588 L->block_end());
589 MSSAU->removeBlocks(DeadBlockSet);
590 if (VerifyMemorySSA)
591 MSSA->verifyMemorySSA();
592 }
593 }
594
595 // Use a map to unique and a vector to guarantee deterministic ordering.
598
599 if (ExitBlock) {
600 // Given LCSSA form is satisfied, we should not have users of instructions
601 // within the dead loop outside of the loop. However, LCSSA doesn't take
602 // unreachable uses into account. We handle them here.
603 // We could do it after drop all references (in this case all users in the
604 // loop will be already eliminated and we have less work to do but according
605 // to API doc of User::dropAllReferences only valid operation after dropping
606 // references, is deletion. So let's substitute all usages of
607 // instruction from the loop with poison value of corresponding type first.
608 for (auto *Block : L->blocks())
609 for (Instruction &I : *Block) {
610 auto *Poison = PoisonValue::get(I.getType());
611 for (Use &U : llvm::make_early_inc_range(I.uses())) {
612 if (auto *Usr = dyn_cast<Instruction>(U.getUser()))
613 if (L->contains(Usr->getParent()))
614 continue;
615 // If we have a DT then we can check that uses outside a loop only in
616 // unreachable block.
617 if (DT)
619 "Unexpected user in reachable block");
620 U.set(Poison);
621 }
622 auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I);
623 if (!DVI)
624 continue;
625 if (!DeadDebugSet.insert(DebugVariable(DVI)).second)
626 continue;
627 DeadDebugInst.push_back(DVI);
628 }
629
630 // After the loop has been deleted all the values defined and modified
631 // inside the loop are going to be unavailable.
632 // Since debug values in the loop have been deleted, inserting an undef
633 // dbg.value truncates the range of any dbg.value before the loop where the
634 // loop used to be. This is particularly important for constant values.
635 Instruction *InsertDbgValueBefore = ExitBlock->getFirstNonPHI();
636 assert(InsertDbgValueBefore &&
637 "There should be a non-PHI instruction in exit block, else these "
638 "instructions will have no parent.");
639 for (auto *DVI : DeadDebugInst) {
640 DVI->setKillLocation();
641 DVI->moveBefore(InsertDbgValueBefore);
642 }
643 }
644
645 // Remove the block from the reference counting scheme, so that we can
646 // delete it freely later.
647 for (auto *Block : L->blocks())
648 Block->dropAllReferences();
649
650 if (MSSA && VerifyMemorySSA)
651 MSSA->verifyMemorySSA();
652
653 if (LI) {
654 // Erase the instructions and the blocks without having to worry
655 // about ordering because we already dropped the references.
656 // NOTE: This iteration is safe because erasing the block does not remove
657 // its entry from the loop's block list. We do that in the next section.
658 for (BasicBlock *BB : L->blocks())
659 BB->eraseFromParent();
660
661 // Finally, the blocks from loopinfo. This has to happen late because
662 // otherwise our loop iterators won't work.
663
665 blocks.insert(L->block_begin(), L->block_end());
666 for (BasicBlock *BB : blocks)
667 LI->removeBlock(BB);
668
669 // The last step is to update LoopInfo now that we've eliminated this loop.
670 // Note: LoopInfo::erase remove the given loop and relink its subloops with
671 // its parent. While removeLoop/removeChildLoop remove the given loop but
672 // not relink its subloops, which is what we want.
673 if (Loop *ParentLoop = L->getParentLoop()) {
674 Loop::iterator I = find(*ParentLoop, L);
675 assert(I != ParentLoop->end() && "Couldn't find loop");
676 ParentLoop->removeChildLoop(I);
677 } else {
678 Loop::iterator I = find(*LI, L);
679 assert(I != LI->end() && "Couldn't find loop");
680 LI->removeLoop(I);
681 }
682 LI->destroy(L);
683 }
684}
685
687 LoopInfo &LI, MemorySSA *MSSA) {
688 auto *Latch = L->getLoopLatch();
689 assert(Latch && "multiple latches not yet supported");
690 auto *Header = L->getHeader();
691 Loop *OutermostLoop = L->getOutermostLoop();
692
693 SE.forgetLoop(L);
695
696 std::unique_ptr<MemorySSAUpdater> MSSAU;
697 if (MSSA)
698 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
699
700 // Update the CFG and domtree. We chose to special case a couple of
701 // of common cases for code quality and test readability reasons.
702 [&]() -> void {
703 if (auto *BI = dyn_cast<BranchInst>(Latch->getTerminator())) {
704 if (!BI->isConditional()) {
705 DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
706 (void)changeToUnreachable(BI, /*PreserveLCSSA*/ true, &DTU,
707 MSSAU.get());
708 return;
709 }
710
711 // Conditional latch/exit - note that latch can be shared by inner
712 // and outer loop so the other target doesn't need to an exit
713 if (L->isLoopExiting(Latch)) {
714 // TODO: Generalize ConstantFoldTerminator so that it can be used
715 // here without invalidating LCSSA or MemorySSA. (Tricky case for
716 // LCSSA: header is an exit block of a preceeding sibling loop w/o
717 // dedicated exits.)
718 const unsigned ExitIdx = L->contains(BI->getSuccessor(0)) ? 1 : 0;
719 BasicBlock *ExitBB = BI->getSuccessor(ExitIdx);
720
721 DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
722 Header->removePredecessor(Latch, true);
723
725 auto *NewBI = Builder.CreateBr(ExitBB);
726 // Transfer the metadata to the new branch instruction (minus the
727 // loop info since this is no longer a loop)
728 NewBI->copyMetadata(*BI, {LLVMContext::MD_dbg,
729 LLVMContext::MD_annotation});
730
731 BI->eraseFromParent();
732 DTU.applyUpdates({{DominatorTree::Delete, Latch, Header}});
733 if (MSSA)
734 MSSAU->applyUpdates({{DominatorTree::Delete, Latch, Header}}, DT);
735 return;
736 }
737 }
738
739 // General case. By splitting the backedge, and then explicitly making it
740 // unreachable we gracefully handle corner cases such as switch and invoke
741 // termiantors.
742 auto *BackedgeBB = SplitEdge(Latch, Header, &DT, &LI, MSSAU.get());
743
744 DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
745 (void)changeToUnreachable(BackedgeBB->getTerminator(),
746 /*PreserveLCSSA*/ true, &DTU, MSSAU.get());
747 }();
748
749 // Erase (and destroy) this loop instance. Handles relinking sub-loops
750 // and blocks within the loop as needed.
751 LI.erase(L);
752
753 // If the loop we broke had a parent, then changeToUnreachable might have
754 // caused a block to be removed from the parent loop (see loop_nest_lcssa
755 // test case in zero-btc.ll for an example), thus changing the parent's
756 // exit blocks. If that happened, we need to rebuild LCSSA on the outermost
757 // loop which might have a had a block removed.
758 if (OutermostLoop != L)
759 formLCSSARecursively(*OutermostLoop, DT, &LI, &SE);
760}
761
762
763/// Checks if \p L has an exiting latch branch. There may also be other
764/// exiting blocks. Returns branch instruction terminating the loop
765/// latch if above check is successful, nullptr otherwise.
767 BasicBlock *Latch = L->getLoopLatch();
768 if (!Latch)
769 return nullptr;
770
771 BranchInst *LatchBR = dyn_cast<BranchInst>(Latch->getTerminator());
772 if (!LatchBR || LatchBR->getNumSuccessors() != 2 || !L->isLoopExiting(Latch))
773 return nullptr;
774
775 assert((LatchBR->getSuccessor(0) == L->getHeader() ||
776 LatchBR->getSuccessor(1) == L->getHeader()) &&
777 "At least one edge out of the latch must go to the header");
778
779 return LatchBR;
780}
781
782/// Return the estimated trip count for any exiting branch which dominates
783/// the loop latch.
784static std::optional<uint64_t> getEstimatedTripCount(BranchInst *ExitingBranch,
785 Loop *L,
786 uint64_t &OrigExitWeight) {
787 // To estimate the number of times the loop body was executed, we want to
788 // know the number of times the backedge was taken, vs. the number of times
789 // we exited the loop.
790 uint64_t LoopWeight, ExitWeight;
791 if (!extractBranchWeights(*ExitingBranch, LoopWeight, ExitWeight))
792 return std::nullopt;
793
794 if (L->contains(ExitingBranch->getSuccessor(1)))
795 std::swap(LoopWeight, ExitWeight);
796
797 if (!ExitWeight)
798 // Don't have a way to return predicated infinite
799 return std::nullopt;
800
801 OrigExitWeight = ExitWeight;
802
803 // Estimated exit count is a ratio of the loop weight by the weight of the
804 // edge exiting the loop, rounded to nearest.
805 uint64_t ExitCount = llvm::divideNearest(LoopWeight, ExitWeight);
806 // Estimated trip count is one plus estimated exit count.
807 return ExitCount + 1;
808}
809
810std::optional<unsigned>
812 unsigned *EstimatedLoopInvocationWeight) {
813 // Currently we take the estimate exit count only from the loop latch,
814 // ignoring other exiting blocks. This can overestimate the trip count
815 // if we exit through another exit, but can never underestimate it.
816 // TODO: incorporate information from other exits
817 if (BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L)) {
818 uint64_t ExitWeight;
819 if (std::optional<uint64_t> EstTripCount =
820 getEstimatedTripCount(LatchBranch, L, ExitWeight)) {
821 if (EstimatedLoopInvocationWeight)
822 *EstimatedLoopInvocationWeight = ExitWeight;
823 return *EstTripCount;
824 }
825 }
826 return std::nullopt;
827}
828
829bool llvm::setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount,
830 unsigned EstimatedloopInvocationWeight) {
831 // At the moment, we currently support changing the estimate trip count of
832 // the latch branch only. We could extend this API to manipulate estimated
833 // trip counts for any exit.
835 if (!LatchBranch)
836 return false;
837
838 // Calculate taken and exit weights.
839 unsigned LatchExitWeight = 0;
840 unsigned BackedgeTakenWeight = 0;
841
842 if (EstimatedTripCount > 0) {
843 LatchExitWeight = EstimatedloopInvocationWeight;
844 BackedgeTakenWeight = (EstimatedTripCount - 1) * LatchExitWeight;
845 }
846
847 // Make a swap if back edge is taken when condition is "false".
848 if (LatchBranch->getSuccessor(0) != L->getHeader())
849 std::swap(BackedgeTakenWeight, LatchExitWeight);
850
851 MDBuilder MDB(LatchBranch->getContext());
852
853 // Set/Update profile metadata.
854 LatchBranch->setMetadata(
855 LLVMContext::MD_prof,
856 MDB.createBranchWeights(BackedgeTakenWeight, LatchExitWeight));
857
858 return true;
859}
860
862 ScalarEvolution &SE) {
863 Loop *OuterL = InnerLoop->getParentLoop();
864 if (!OuterL)
865 return true;
866
867 // Get the backedge taken count for the inner loop
868 BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
869 const SCEV *InnerLoopBECountSC = SE.getExitCount(InnerLoop, InnerLoopLatch);
870 if (isa<SCEVCouldNotCompute>(InnerLoopBECountSC) ||
871 !InnerLoopBECountSC->getType()->isIntegerTy())
872 return false;
873
874 // Get whether count is invariant to the outer loop
876 SE.getLoopDisposition(InnerLoopBECountSC, OuterL);
878 return false;
879
880 return true;
881}
882
884 switch (RK) {
885 default:
886 llvm_unreachable("Unknown min/max recurrence kind");
887 case RecurKind::UMin:
888 return CmpInst::ICMP_ULT;
889 case RecurKind::UMax:
890 return CmpInst::ICMP_UGT;
891 case RecurKind::SMin:
892 return CmpInst::ICMP_SLT;
893 case RecurKind::SMax:
894 return CmpInst::ICMP_SGT;
895 case RecurKind::FMin:
896 return CmpInst::FCMP_OLT;
897 case RecurKind::FMax:
898 return CmpInst::FCMP_OGT;
899 }
900}
901
903 RecurKind RK, Value *Left, Value *Right) {
904 if (auto VTy = dyn_cast<VectorType>(Left->getType()))
905 StartVal = Builder.CreateVectorSplat(VTy->getElementCount(), StartVal);
906 Value *Cmp =
907 Builder.CreateCmp(CmpInst::ICMP_NE, Left, StartVal, "rdx.select.cmp");
908 return Builder.CreateSelect(Cmp, Left, Right, "rdx.select");
909}
910
912 Value *Right) {
914 Value *Cmp = Builder.CreateCmp(Pred, Left, Right, "rdx.minmax.cmp");
915 Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
916 return Select;
917}
918
919// Helper to generate an ordered reduction.
921 unsigned Op, RecurKind RdxKind) {
922 unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements();
923
924 // Extract and apply reduction ops in ascending order:
925 // e.g. ((((Acc + Scl[0]) + Scl[1]) + Scl[2]) + ) ... + Scl[VF-1]
926 Value *Result = Acc;
927 for (unsigned ExtractIdx = 0; ExtractIdx != VF; ++ExtractIdx) {
928 Value *Ext =
929 Builder.CreateExtractElement(Src, Builder.getInt32(ExtractIdx));
930
931 if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
932 Result = Builder.CreateBinOp((Instruction::BinaryOps)Op, Result, Ext,
933 "bin.rdx");
934 } else {
936 "Invalid min/max");
937 Result = createMinMaxOp(Builder, RdxKind, Result, Ext);
938 }
939 }
940
941 return Result;
942}
943
944// Helper to generate a log2 shuffle reduction.
946 unsigned Op, RecurKind RdxKind) {
947 unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements();
948 // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
949 // and vector ops, reducing the set of values being computed by half each
950 // round.
951 assert(isPowerOf2_32(VF) &&
952 "Reduction emission only supported for pow2 vectors!");
953 // Note: fast-math-flags flags are controlled by the builder configuration
954 // and are assumed to apply to all generated arithmetic instructions. Other
955 // poison generating flags (nsw/nuw/inbounds/inrange/exact) are not part
956 // of the builder configuration, and since they're not passed explicitly,
957 // will never be relevant here. Note that it would be generally unsound to
958 // propagate these from an intrinsic call to the expansion anyways as we/
959 // change the order of operations.
960 Value *TmpVec = Src;
961 SmallVector<int, 32> ShuffleMask(VF);
962 for (unsigned i = VF; i != 1; i >>= 1) {
963 // Move the upper half of the vector to the lower half.
964 for (unsigned j = 0; j != i / 2; ++j)
965 ShuffleMask[j] = i / 2 + j;
966
967 // Fill the rest of the mask with undef.
968 std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), -1);
969
970 Value *Shuf = Builder.CreateShuffleVector(TmpVec, ShuffleMask, "rdx.shuf");
971
972 if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
973 TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
974 "bin.rdx");
975 } else {
977 "Invalid min/max");
978 TmpVec = createMinMaxOp(Builder, RdxKind, TmpVec, Shuf);
979 }
980 }
981 // The result is in the first element of the vector.
982 return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
983}
984
987 Value *Src,
988 const RecurrenceDescriptor &Desc,
989 PHINode *OrigPhi) {
991 Desc.getRecurrenceKind()) &&
992 "Unexpected reduction kind");
993 Value *InitVal = Desc.getRecurrenceStartValue();
994 Value *NewVal = nullptr;
995
996 // First use the original phi to determine the new value we're trying to
997 // select from in the loop.
998 SelectInst *SI = nullptr;
999 for (auto *U : OrigPhi->users()) {
1000 if ((SI = dyn_cast<SelectInst>(U)))
1001 break;
1002 }
1003 assert(SI && "One user of the original phi should be a select");
1004
1005 if (SI->getTrueValue() == OrigPhi)
1006 NewVal = SI->getFalseValue();
1007 else {
1008 assert(SI->getFalseValue() == OrigPhi &&
1009 "At least one input to the select should be the original Phi");
1010 NewVal = SI->getTrueValue();
1011 }
1012
1013 // Create a splat vector with the new value and compare this to the vector
1014 // we want to reduce.
1015 ElementCount EC = cast<VectorType>(Src->getType())->getElementCount();
1016 Value *Right = Builder.CreateVectorSplat(EC, InitVal);
1017 Value *Cmp =
1018 Builder.CreateCmp(CmpInst::ICMP_NE, Src, Right, "rdx.select.cmp");
1019
1020 // If any predicate is true it means that we want to select the new value.
1021 Cmp = Builder.CreateOrReduce(Cmp);
1022 return Builder.CreateSelect(Cmp, NewVal, InitVal, "rdx.select");
1023}
1024
1026 const TargetTransformInfo *TTI,
1027 Value *Src, RecurKind RdxKind) {
1028 auto *SrcVecEltTy = cast<VectorType>(Src->getType())->getElementType();
1029 switch (RdxKind) {
1030 case RecurKind::Add:
1031 return Builder.CreateAddReduce(Src);
1032 case RecurKind::Mul:
1033 return Builder.CreateMulReduce(Src);
1034 case RecurKind::And:
1035 return Builder.CreateAndReduce(Src);
1036 case RecurKind::Or:
1037 return Builder.CreateOrReduce(Src);
1038 case RecurKind::Xor:
1039 return Builder.CreateXorReduce(Src);
1040 case RecurKind::FMulAdd:
1041 case RecurKind::FAdd:
1042 return Builder.CreateFAddReduce(ConstantFP::getNegativeZero(SrcVecEltTy),
1043 Src);
1044 case RecurKind::FMul:
1045 return Builder.CreateFMulReduce(ConstantFP::get(SrcVecEltTy, 1.0), Src);
1046 case RecurKind::SMax:
1047 return Builder.CreateIntMaxReduce(Src, true);
1048 case RecurKind::SMin:
1049 return Builder.CreateIntMinReduce(Src, true);
1050 case RecurKind::UMax:
1051 return Builder.CreateIntMaxReduce(Src, false);
1052 case RecurKind::UMin:
1053 return Builder.CreateIntMinReduce(Src, false);
1054 case RecurKind::FMax:
1055 return Builder.CreateFPMaxReduce(Src);
1056 case RecurKind::FMin:
1057 return Builder.CreateFPMinReduce(Src);
1058 default:
1059 llvm_unreachable("Unhandled opcode");
1060 }
1061}
1062
1064 const TargetTransformInfo *TTI,
1065 const RecurrenceDescriptor &Desc, Value *Src,
1066 PHINode *OrigPhi) {
1067 // TODO: Support in-order reductions based on the recurrence descriptor.
1068 // All ops in the reduction inherit fast-math-flags from the recurrence
1069 // descriptor.
1071 B.setFastMathFlags(Desc.getFastMathFlags());
1072
1073 RecurKind RK = Desc.getRecurrenceKind();
1075 return createSelectCmpTargetReduction(B, TTI, Src, Desc, OrigPhi);
1076
1077 return createSimpleTargetReduction(B, TTI, Src, RK);
1078}
1079
1081 const RecurrenceDescriptor &Desc,
1082 Value *Src, Value *Start) {
1083 assert((Desc.getRecurrenceKind() == RecurKind::FAdd ||
1084 Desc.getRecurrenceKind() == RecurKind::FMulAdd) &&
1085 "Unexpected reduction kind");
1086 assert(Src->getType()->isVectorTy() && "Expected a vector type");
1087 assert(!Start->getType()->isVectorTy() && "Expected a scalar type");
1088
1089 return B.CreateFAddReduce(Start, Src);
1090}
1091
1093 bool IncludeWrapFlags) {
1094 auto *VecOp = dyn_cast<Instruction>(I);
1095 if (!VecOp)
1096 return;
1097 auto *Intersection = (OpValue == nullptr) ? dyn_cast<Instruction>(VL[0])
1098 : dyn_cast<Instruction>(OpValue);
1099 if (!Intersection)
1100 return;
1101 const unsigned Opcode = Intersection->getOpcode();
1102 VecOp->copyIRFlags(Intersection, IncludeWrapFlags);
1103 for (auto *V : VL) {
1104 auto *Instr = dyn_cast<Instruction>(V);
1105 if (!Instr)
1106 continue;
1107 if (OpValue == nullptr || Opcode == Instr->getOpcode())
1108 VecOp->andIRFlags(V);
1109 }
1110}
1111
1112bool llvm::isKnownNegativeInLoop(const SCEV *S, const Loop *L,
1113 ScalarEvolution &SE) {
1114 const SCEV *Zero = SE.getZero(S->getType());
1115 return SE.isAvailableAtLoopEntry(S, L) &&
1116 SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLT, S, Zero);
1117}
1118
1120 ScalarEvolution &SE) {
1121 const SCEV *Zero = SE.getZero(S->getType());
1122 return SE.isAvailableAtLoopEntry(S, L) &&
1123 SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGE, S, Zero);
1124}
1125
1127 bool Signed) {
1128 unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth();
1131 auto Predicate = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1132 return SE.isAvailableAtLoopEntry(S, L) &&
1133 SE.isLoopEntryGuardedByCond(L, Predicate, S,
1134 SE.getConstant(Min));
1135}
1136
1138 bool Signed) {
1139 unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth();
1142 auto Predicate = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1143 return SE.isAvailableAtLoopEntry(S, L) &&
1144 SE.isLoopEntryGuardedByCond(L, Predicate, S,
1145 SE.getConstant(Max));
1146}
1147
1148//===----------------------------------------------------------------------===//
1149// rewriteLoopExitValues - Optimize IV users outside the loop.
1150// As a side effect, reduces the amount of IV processing within the loop.
1151//===----------------------------------------------------------------------===//
1152
1153static bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) {
1156 Visited.insert(I);
1157 WorkList.push_back(I);
1158 while (!WorkList.empty()) {
1159 const Instruction *Curr = WorkList.pop_back_val();
1160 // This use is outside the loop, nothing to do.
1161 if (!L->contains(Curr))
1162 continue;
1163 // Do we assume it is a "hard" use which will not be eliminated easily?
1164 if (Curr->mayHaveSideEffects())
1165 return true;
1166 // Otherwise, add all its users to worklist.
1167 for (const auto *U : Curr->users()) {
1168 auto *UI = cast<Instruction>(U);
1169 if (Visited.insert(UI).second)
1170 WorkList.push_back(UI);
1171 }
1172 }
1173 return false;
1174}
1175
1176// Collect information about PHI nodes which can be transformed in
1177// rewriteLoopExitValues.
1179 PHINode *PN; // For which PHI node is this replacement?
1180 unsigned Ith; // For which incoming value?
1181 const SCEV *ExpansionSCEV; // The SCEV of the incoming value we are rewriting.
1182 Instruction *ExpansionPoint; // Where we'd like to expand that SCEV?
1183 bool HighCost; // Is this expansion a high-cost?
1184
1185 RewritePhi(PHINode *P, unsigned I, const SCEV *Val, Instruction *ExpansionPt,
1186 bool H)
1187 : PN(P), Ith(I), ExpansionSCEV(Val), ExpansionPoint(ExpansionPt),
1188 HighCost(H) {}
1189};
1190
1191// Check whether it is possible to delete the loop after rewriting exit
1192// value. If it is possible, ignore ReplaceExitValue and do rewriting
1193// aggressively.
1194static bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
1195 BasicBlock *Preheader = L->getLoopPreheader();
1196 // If there is no preheader, the loop will not be deleted.
1197 if (!Preheader)
1198 return false;
1199
1200 // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
1201 // We obviate multiple ExitingBlocks case for simplicity.
1202 // TODO: If we see testcase with multiple ExitingBlocks can be deleted
1203 // after exit value rewriting, we can enhance the logic here.
1204 SmallVector<BasicBlock *, 4> ExitingBlocks;
1205 L->getExitingBlocks(ExitingBlocks);
1207 L->getUniqueExitBlocks(ExitBlocks);
1208 if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1)
1209 return false;
1210
1211 BasicBlock *ExitBlock = ExitBlocks[0];
1212 BasicBlock::iterator BI = ExitBlock->begin();
1213 while (PHINode *P = dyn_cast<PHINode>(BI)) {
1214 Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
1215
1216 // If the Incoming value of P is found in RewritePhiSet, we know it
1217 // could be rewritten to use a loop invariant value in transformation
1218 // phase later. Skip it in the loop invariant check below.
1219 bool found = false;
1220 for (const RewritePhi &Phi : RewritePhiSet) {
1221 unsigned i = Phi.Ith;
1222 if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
1223 found = true;
1224 break;
1225 }
1226 }
1227
1228 Instruction *I;
1229 if (!found && (I = dyn_cast<Instruction>(Incoming)))
1230 if (!L->hasLoopInvariantOperands(I))
1231 return false;
1232
1233 ++BI;
1234 }
1235
1236 for (auto *BB : L->blocks())
1237 if (llvm::any_of(*BB, [](Instruction &I) {
1238 return I.mayHaveSideEffects();
1239 }))
1240 return false;
1241
1242 return true;
1243}
1244
1245/// Checks if it is safe to call InductionDescriptor::isInductionPHI for \p Phi,
1246/// and returns true if this Phi is an induction phi in the loop. When
1247/// isInductionPHI returns true, \p ID will be also be set by isInductionPHI.
1248static bool checkIsIndPhi(PHINode *Phi, Loop *L, ScalarEvolution *SE,
1250 if (!Phi)
1251 return false;
1252 if (!L->getLoopPreheader())
1253 return false;
1254 if (Phi->getParent() != L->getHeader())
1255 return false;
1256 return InductionDescriptor::isInductionPHI(Phi, L, SE, ID);
1257}
1258
1260 ScalarEvolution *SE,
1261 const TargetTransformInfo *TTI,
1262 SCEVExpander &Rewriter, DominatorTree *DT,
1265 // Check a pre-condition.
1266 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1267 "Indvars did not preserve LCSSA!");
1268
1269 SmallVector<BasicBlock*, 8> ExitBlocks;
1270 L->getUniqueExitBlocks(ExitBlocks);
1271
1272 SmallVector<RewritePhi, 8> RewritePhiSet;
1273 // Find all values that are computed inside the loop, but used outside of it.
1274 // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan
1275 // the exit blocks of the loop to find them.
1276 for (BasicBlock *ExitBB : ExitBlocks) {
1277 // If there are no PHI nodes in this exit block, then no values defined
1278 // inside the loop are used on this path, skip it.
1279 PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
1280 if (!PN) continue;
1281
1282 unsigned NumPreds = PN->getNumIncomingValues();
1283
1284 // Iterate over all of the PHI nodes.
1285 BasicBlock::iterator BBI = ExitBB->begin();
1286 while ((PN = dyn_cast<PHINode>(BBI++))) {
1287 if (PN->use_empty())
1288 continue; // dead use, don't replace it
1289
1290 if (!SE->isSCEVable(PN->getType()))
1291 continue;
1292
1293 // Iterate over all of the values in all the PHI nodes.
1294 for (unsigned i = 0; i != NumPreds; ++i) {
1295 // If the value being merged in is not integer or is not defined
1296 // in the loop, skip it.
1297 Value *InVal = PN->getIncomingValue(i);
1298 if (!isa<Instruction>(InVal))
1299 continue;
1300
1301 // If this pred is for a subloop, not L itself, skip it.
1302 if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
1303 continue; // The Block is in a subloop, skip it.
1304
1305 // Check that InVal is defined in the loop.
1306 Instruction *Inst = cast<Instruction>(InVal);
1307 if (!L->contains(Inst))
1308 continue;
1309
1310 // Find exit values which are induction variables in the loop, and are
1311 // unused in the loop, with the only use being the exit block PhiNode,
1312 // and the induction variable update binary operator.
1313 // The exit value can be replaced with the final value when it is cheap
1314 // to do so.
1317 PHINode *IndPhi = dyn_cast<PHINode>(Inst);
1318 if (IndPhi) {
1319 if (!checkIsIndPhi(IndPhi, L, SE, ID))
1320 continue;
1321 // This is an induction PHI. Check that the only users are PHI
1322 // nodes, and induction variable update binary operators.
1323 if (llvm::any_of(Inst->users(), [&](User *U) {
1324 if (!isa<PHINode>(U) && !isa<BinaryOperator>(U))
1325 return true;
1326 BinaryOperator *B = dyn_cast<BinaryOperator>(U);
1327 if (B && B != ID.getInductionBinOp())
1328 return true;
1329 return false;
1330 }))
1331 continue;
1332 } else {
1333 // If it is not an induction phi, it must be an induction update
1334 // binary operator with an induction phi user.
1335 BinaryOperator *B = dyn_cast<BinaryOperator>(Inst);
1336 if (!B)
1337 continue;
1338 if (llvm::any_of(Inst->users(), [&](User *U) {
1339 PHINode *Phi = dyn_cast<PHINode>(U);
1340 if (Phi != PN && !checkIsIndPhi(Phi, L, SE, ID))
1341 return true;
1342 return false;
1343 }))
1344 continue;
1345 if (B != ID.getInductionBinOp())
1346 continue;
1347 }
1348 }
1349
1350 // Okay, this instruction has a user outside of the current loop
1351 // and varies predictably *inside* the loop. Evaluate the value it
1352 // contains when the loop exits, if possible. We prefer to start with
1353 // expressions which are true for all exits (so as to maximize
1354 // expression reuse by the SCEVExpander), but resort to per-exit
1355 // evaluation if that fails.
1356 const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
1357 if (isa<SCEVCouldNotCompute>(ExitValue) ||
1358 !SE->isLoopInvariant(ExitValue, L) ||
1359 !Rewriter.isSafeToExpand(ExitValue)) {
1360 // TODO: This should probably be sunk into SCEV in some way; maybe a
1361 // getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for
1362 // most SCEV expressions and other recurrence types (e.g. shift
1363 // recurrences). Is there existing code we can reuse?
1364 const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i));
1365 if (isa<SCEVCouldNotCompute>(ExitCount))
1366 continue;
1367 if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Inst)))
1368 if (AddRec->getLoop() == L)
1369 ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE);
1370 if (isa<SCEVCouldNotCompute>(ExitValue) ||
1371 !SE->isLoopInvariant(ExitValue, L) ||
1372 !Rewriter.isSafeToExpand(ExitValue))
1373 continue;
1374 }
1375
1376 // Computing the value outside of the loop brings no benefit if it is
1377 // definitely used inside the loop in a way which can not be optimized
1378 // away. Avoid doing so unless we know we have a value which computes
1379 // the ExitValue already. TODO: This should be merged into SCEV
1380 // expander to leverage its knowledge of existing expressions.
1381 if (ReplaceExitValue != AlwaysRepl && !isa<SCEVConstant>(ExitValue) &&
1382 !isa<SCEVUnknown>(ExitValue) && hasHardUserWithinLoop(L, Inst))
1383 continue;
1384
1385 // Check if expansions of this SCEV would count as being high cost.
1386 bool HighCost = Rewriter.isHighCostExpansion(
1387 ExitValue, L, SCEVCheapExpansionBudget, TTI, Inst);
1388
1389 // Note that we must not perform expansions until after
1390 // we query *all* the costs, because if we perform temporary expansion
1391 // inbetween, one that we might not intend to keep, said expansion
1392 // *may* affect cost calculation of the the next SCEV's we'll query,
1393 // and next SCEV may errneously get smaller cost.
1394
1395 // Collect all the candidate PHINodes to be rewritten.
1396 Instruction *InsertPt =
1397 (isa<PHINode>(Inst) || isa<LandingPadInst>(Inst)) ?
1398 &*Inst->getParent()->getFirstInsertionPt() : Inst;
1399 RewritePhiSet.emplace_back(PN, i, ExitValue, InsertPt, HighCost);
1400 }
1401 }
1402 }
1403
1404 // TODO: evaluate whether it is beneficial to change how we calculate
1405 // high-cost: if we have SCEV 'A' which we know we will expand, should we
1406 // calculate the cost of other SCEV's after expanding SCEV 'A', thus
1407 // potentially giving cost bonus to those other SCEV's?
1408
1409 bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
1410 int NumReplaced = 0;
1411
1412 // Transformation.
1413 for (const RewritePhi &Phi : RewritePhiSet) {
1414 PHINode *PN = Phi.PN;
1415
1416 // Only do the rewrite when the ExitValue can be expanded cheaply.
1417 // If LoopCanBeDel is true, rewrite exit value aggressively.
1420 !LoopCanBeDel && Phi.HighCost)
1421 continue;
1422
1423 Value *ExitVal = Rewriter.expandCodeFor(
1424 Phi.ExpansionSCEV, Phi.PN->getType(), Phi.ExpansionPoint);
1425
1426 LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: AfterLoopVal = " << *ExitVal
1427 << '\n'
1428 << " LoopVal = " << *(Phi.ExpansionPoint) << "\n");
1429
1430#ifndef NDEBUG
1431 // If we reuse an instruction from a loop which is neither L nor one of
1432 // its containing loops, we end up breaking LCSSA form for this loop by
1433 // creating a new use of its instruction.
1434 if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
1435 if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
1436 if (EVL != L)
1437 assert(EVL->contains(L) && "LCSSA breach detected!");
1438#endif
1439
1440 NumReplaced++;
1441 Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
1442 PN->setIncomingValue(Phi.Ith, ExitVal);
1443 // It's necessary to tell ScalarEvolution about this explicitly so that
1444 // it can walk the def-use list and forget all SCEVs, as it may not be
1445 // watching the PHI itself. Once the new exit value is in place, there
1446 // may not be a def-use connection between the loop and every instruction
1447 // which got a SCEVAddRecExpr for that loop.
1448 SE->forgetValue(PN);
1449
1450 // If this instruction is dead now, delete it. Don't do it now to avoid
1451 // invalidating iterators.
1452 if (isInstructionTriviallyDead(Inst, TLI))
1453 DeadInsts.push_back(Inst);
1454
1455 // Replace PN with ExitVal if that is legal and does not break LCSSA.
1456 if (PN->getNumIncomingValues() == 1 &&
1457 LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
1458 PN->replaceAllUsesWith(ExitVal);
1459 PN->eraseFromParent();
1460 }
1461 }
1462
1463 // The insertion point instruction may have been deleted; clear it out
1464 // so that the rewriter doesn't trip over it later.
1465 Rewriter.clearInsertPoint();
1466 return NumReplaced;
1467}
1468
1469/// Set weights for \p UnrolledLoop and \p RemainderLoop based on weights for
1470/// \p OrigLoop.
1471void llvm::setProfileInfoAfterUnrolling(Loop *OrigLoop, Loop *UnrolledLoop,
1472 Loop *RemainderLoop, uint64_t UF) {
1473 assert(UF > 0 && "Zero unrolled factor is not supported");
1474 assert(UnrolledLoop != RemainderLoop &&
1475 "Unrolled and Remainder loops are expected to distinct");
1476
1477 // Get number of iterations in the original scalar loop.
1478 unsigned OrigLoopInvocationWeight = 0;
1479 std::optional<unsigned> OrigAverageTripCount =
1480 getLoopEstimatedTripCount(OrigLoop, &OrigLoopInvocationWeight);
1481 if (!OrigAverageTripCount)
1482 return;
1483
1484 // Calculate number of iterations in unrolled loop.
1485 unsigned UnrolledAverageTripCount = *OrigAverageTripCount / UF;
1486 // Calculate number of iterations for remainder loop.
1487 unsigned RemainderAverageTripCount = *OrigAverageTripCount % UF;
1488
1489 setLoopEstimatedTripCount(UnrolledLoop, UnrolledAverageTripCount,
1490 OrigLoopInvocationWeight);
1491 setLoopEstimatedTripCount(RemainderLoop, RemainderAverageTripCount,
1492 OrigLoopInvocationWeight);
1493}
1494
1495/// Utility that implements appending of loops onto a worklist.
1496/// Loops are added in preorder (analogous for reverse postorder for trees),
1497/// and the worklist is processed LIFO.
1498template <typename RangeT>
1500 RangeT &&Loops, SmallPriorityWorklist<Loop *, 4> &Worklist) {
1501 // We use an internal worklist to build up the preorder traversal without
1502 // recursion.
1503 SmallVector<Loop *, 4> PreOrderLoops, PreOrderWorklist;
1504
1505 // We walk the initial sequence of loops in reverse because we generally want
1506 // to visit defs before uses and the worklist is LIFO.
1507 for (Loop *RootL : Loops) {
1508 assert(PreOrderLoops.empty() && "Must start with an empty preorder walk.");
1509 assert(PreOrderWorklist.empty() &&
1510 "Must start with an empty preorder walk worklist.");
1511 PreOrderWorklist.push_back(RootL);
1512 do {
1513 Loop *L = PreOrderWorklist.pop_back_val();
1514 PreOrderWorklist.append(L->begin(), L->end());
1515 PreOrderLoops.push_back(L);
1516 } while (!PreOrderWorklist.empty());
1517
1518 Worklist.insert(std::move(PreOrderLoops));
1519 PreOrderLoops.clear();
1520 }
1521}
1522
1523template <typename RangeT>
1527}
1528
1529template void llvm::appendLoopsToWorklist<ArrayRef<Loop *> &>(
1531
1532template void
1533llvm::appendLoopsToWorklist<Loop &>(Loop &L,
1535
1538 appendReversedLoopsToWorklist(LI, Worklist);
1539}
1540
1542 LoopInfo *LI, LPPassManager *LPM) {
1543 Loop &New = *LI->AllocateLoop();
1544 if (PL)
1545 PL->addChildLoop(&New);
1546 else
1547 LI->addTopLevelLoop(&New);
1548
1549 if (LPM)
1550 LPM->addLoop(New);
1551
1552 // Add all of the blocks in L to the new loop.
1553 for (BasicBlock *BB : L->blocks())
1554 if (LI->getLoopFor(BB) == L)
1555 New.addBasicBlockToLoop(cast<BasicBlock>(VM[BB]), *LI);
1556
1557 // Add all of the subloops to the new loop.
1558 for (Loop *I : *L)
1559 cloneLoop(I, &New, VM, LI, LPM);
1560
1561 return &New;
1562}
1563
1564/// IR Values for the lower and upper bounds of a pointer evolution. We
1565/// need to use value-handles because SCEV expansion can invalidate previously
1566/// expanded values. Thus expansion of a pointer can invalidate the bounds for
1567/// a previous one.
1571};
1572
1573/// Expand code for the lower and upper bound of the pointer group \p CG
1574/// in \p TheLoop. \return the values for the bounds.
1576 Loop *TheLoop, Instruction *Loc,
1577 SCEVExpander &Exp) {
1578 LLVMContext &Ctx = Loc->getContext();
1579 Type *PtrArithTy = Type::getInt8PtrTy(Ctx, CG->AddressSpace);
1580
1581 Value *Start = nullptr, *End = nullptr;
1582 LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
1583 Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc);
1584 End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc);
1585 if (CG->NeedsFreeze) {
1586 IRBuilder<> Builder(Loc);
1587 Start = Builder.CreateFreeze(Start, Start->getName() + ".fr");
1588 End = Builder.CreateFreeze(End, End->getName() + ".fr");
1589 }
1590 LLVM_DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High << "\n");
1591 return {Start, End};
1592}
1593
1594/// Turns a collection of checks into a collection of expanded upper and
1595/// lower bounds for both pointers in the check.
1598 Instruction *Loc, SCEVExpander &Exp) {
1600
1601 // Here we're relying on the SCEV Expander's cache to only emit code for the
1602 // same bounds once.
1603 transform(PointerChecks, std::back_inserter(ChecksWithBounds),
1604 [&](const RuntimePointerCheck &Check) {
1605 PointerBounds First = expandBounds(Check.first, L, Loc, Exp),
1606 Second = expandBounds(Check.second, L, Loc, Exp);
1607 return std::make_pair(First, Second);
1608 });
1609
1610 return ChecksWithBounds;
1611}
1612
1614 Instruction *Loc, Loop *TheLoop,
1615 const SmallVectorImpl<RuntimePointerCheck> &PointerChecks,
1616 SCEVExpander &Exp) {
1617 // TODO: Move noalias annotation code from LoopVersioning here and share with LV if possible.
1618 // TODO: Pass RtPtrChecking instead of PointerChecks and SE separately, if possible
1619 auto ExpandedChecks = expandBounds(PointerChecks, TheLoop, Loc, Exp);
1620
1621 LLVMContext &Ctx = Loc->getContext();
1622 IRBuilder<InstSimplifyFolder> ChkBuilder(Ctx,
1623 Loc->getModule()->getDataLayout());
1624 ChkBuilder.SetInsertPoint(Loc);
1625 // Our instructions might fold to a constant.
1626 Value *MemoryRuntimeCheck = nullptr;
1627
1628 for (const auto &Check : ExpandedChecks) {
1629 const PointerBounds &A = Check.first, &B = Check.second;
1630 // Check if two pointers (A and B) conflict where conflict is computed as:
1631 // start(A) <= end(B) && start(B) <= end(A)
1632 unsigned AS0 = A.Start->getType()->getPointerAddressSpace();
1633 unsigned AS1 = B.Start->getType()->getPointerAddressSpace();
1634
1635 assert((AS0 == B.End->getType()->getPointerAddressSpace()) &&
1636 (AS1 == A.End->getType()->getPointerAddressSpace()) &&
1637 "Trying to bounds check pointers with different address spaces");
1638
1639 Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
1640 Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
1641
1642 Value *Start0 = ChkBuilder.CreateBitCast(A.Start, PtrArithTy0, "bc");
1643 Value *Start1 = ChkBuilder.CreateBitCast(B.Start, PtrArithTy1, "bc");
1644 Value *End0 = ChkBuilder.CreateBitCast(A.End, PtrArithTy1, "bc");
1645 Value *End1 = ChkBuilder.CreateBitCast(B.End, PtrArithTy0, "bc");
1646
1647 // [A|B].Start points to the first accessed byte under base [A|B].
1648 // [A|B].End points to the last accessed byte, plus one.
1649 // There is no conflict when the intervals are disjoint:
1650 // NoConflict = (B.Start >= A.End) || (A.Start >= B.End)
1651 //
1652 // bound0 = (B.Start < A.End)
1653 // bound1 = (A.Start < B.End)
1654 // IsConflict = bound0 & bound1
1655 Value *Cmp0 = ChkBuilder.CreateICmpULT(Start0, End1, "bound0");
1656 Value *Cmp1 = ChkBuilder.CreateICmpULT(Start1, End0, "bound1");
1657 Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
1658 if (MemoryRuntimeCheck) {
1659 IsConflict =
1660 ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
1661 }
1662 MemoryRuntimeCheck = IsConflict;
1663 }
1664
1665 return MemoryRuntimeCheck;
1666}
1667
1669 Instruction *Loc, ArrayRef<PointerDiffInfo> Checks, SCEVExpander &Expander,
1670 function_ref<Value *(IRBuilderBase &, unsigned)> GetVF, unsigned IC) {
1671
1672 LLVMContext &Ctx = Loc->getContext();
1673 IRBuilder<InstSimplifyFolder> ChkBuilder(Ctx,
1674 Loc->getModule()->getDataLayout());
1675 ChkBuilder.SetInsertPoint(Loc);
1676 // Our instructions might fold to a constant.
1677 Value *MemoryRuntimeCheck = nullptr;
1678
1679 for (const auto &C : Checks) {
1680 Type *Ty = C.SinkStart->getType();
1681 // Compute VF * IC * AccessSize.
1682 auto *VFTimesUFTimesSize =
1683 ChkBuilder.CreateMul(GetVF(ChkBuilder, Ty->getScalarSizeInBits()),
1684 ConstantInt::get(Ty, IC * C.AccessSize));
1685 Value *Sink = Expander.expandCodeFor(C.SinkStart, Ty, Loc);
1686 Value *Src = Expander.expandCodeFor(C.SrcStart, Ty, Loc);
1687 if (C.NeedsFreeze) {
1688 IRBuilder<> Builder(Loc);
1689 Sink = Builder.CreateFreeze(Sink, Sink->getName() + ".fr");
1690 Src = Builder.CreateFreeze(Src, Src->getName() + ".fr");
1691 }
1692 Value *Diff = ChkBuilder.CreateSub(Sink, Src);
1693 Value *IsConflict =
1694 ChkBuilder.CreateICmpULT(Diff, VFTimesUFTimesSize, "diff.check");
1695
1696 if (MemoryRuntimeCheck) {
1697 IsConflict =
1698 ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
1699 }
1700 MemoryRuntimeCheck = IsConflict;
1701 }
1702
1703 return MemoryRuntimeCheck;
1704}
1705
1706std::optional<IVConditionInfo>
1708 const MemorySSA &MSSA, AAResults &AA) {
1709 auto *TI = dyn_cast<BranchInst>(L.getHeader()->getTerminator());
1710 if (!TI || !TI->isConditional())
1711 return {};
1712
1713 auto *CondI = dyn_cast<CmpInst>(TI->getCondition());
1714 // The case with the condition outside the loop should already be handled
1715 // earlier.
1716 if (!CondI || !L.contains(CondI))
1717 return {};
1718
1719 SmallVector<Instruction *> InstToDuplicate;
1720 InstToDuplicate.push_back(CondI);
1721
1722 SmallVector<Value *, 4> WorkList;
1723 WorkList.append(CondI->op_begin(), CondI->op_end());
1724
1725 SmallVector<MemoryAccess *, 4> AccessesToCheck;
1726 SmallVector<MemoryLocation, 4> AccessedLocs;
1727 while (!WorkList.empty()) {
1728 Instruction *I = dyn_cast<Instruction>(WorkList.pop_back_val());
1729 if (!I || !L.contains(I))
1730 continue;
1731
1732 // TODO: support additional instructions.
1733 if (!isa<LoadInst>(I) && !isa<GetElementPtrInst>(I))
1734 return {};
1735
1736 // Do not duplicate volatile and atomic loads.
1737 if (auto *LI = dyn_cast<LoadInst>(I))
1738 if (LI->isVolatile() || LI->isAtomic())
1739 return {};
1740
1741 InstToDuplicate.push_back(I);
1742 if (MemoryAccess *MA = MSSA.getMemoryAccess(I)) {
1743 if (auto *MemUse = dyn_cast_or_null<MemoryUse>(MA)) {
1744 // Queue the defining access to check for alias checks.
1745 AccessesToCheck.push_back(MemUse->getDefiningAccess());
1746 AccessedLocs.push_back(MemoryLocation::get(I));
1747 } else {
1748 // MemoryDefs may clobber the location or may be atomic memory
1749 // operations. Bail out.
1750 return {};
1751 }
1752 }
1753 WorkList.append(I->op_begin(), I->op_end());
1754 }
1755
1756 if (InstToDuplicate.empty())
1757 return {};
1758
1759 SmallVector<BasicBlock *, 4> ExitingBlocks;
1760 L.getExitingBlocks(ExitingBlocks);
1761 auto HasNoClobbersOnPath =
1762 [&L, &AA, &AccessedLocs, &ExitingBlocks, &InstToDuplicate,
1763 MSSAThreshold](BasicBlock *Succ, BasicBlock *Header,
1764 SmallVector<MemoryAccess *, 4> AccessesToCheck)
1765 -> std::optional<IVConditionInfo> {
1767 // First, collect all blocks in the loop that are on a patch from Succ
1768 // to the header.
1770 WorkList.push_back(Succ);
1771 WorkList.push_back(Header);
1773 Seen.insert(Header);
1774 Info.PathIsNoop &=
1775 all_of(*Header, [](Instruction &I) { return !I.mayHaveSideEffects(); });
1776
1777 while (!WorkList.empty()) {
1778 BasicBlock *Current = WorkList.pop_back_val();
1779 if (!L.contains(Current))
1780 continue;
1781 const auto &SeenIns = Seen.insert(Current);
1782 if (!SeenIns.second)
1783 continue;
1784
1785 Info.PathIsNoop &= all_of(
1786 *Current, [](Instruction &I) { return !I.mayHaveSideEffects(); });
1787 WorkList.append(succ_begin(Current), succ_end(Current));
1788 }
1789
1790 // Require at least 2 blocks on a path through the loop. This skips
1791 // paths that directly exit the loop.
1792 if (Seen.size() < 2)
1793 return {};
1794
1795 // Next, check if there are any MemoryDefs that are on the path through
1796 // the loop (in the Seen set) and they may-alias any of the locations in
1797 // AccessedLocs. If that is the case, they may modify the condition and
1798 // partial unswitching is not possible.
1799 SmallPtrSet<MemoryAccess *, 4> SeenAccesses;
1800 while (!AccessesToCheck.empty()) {
1801 MemoryAccess *Current = AccessesToCheck.pop_back_val();
1802 auto SeenI = SeenAccesses.insert(Current);
1803 if (!SeenI.second || !Seen.contains(Current->getBlock()))
1804 continue;
1805
1806 // Bail out if exceeded the threshold.
1807 if (SeenAccesses.size() >= MSSAThreshold)
1808 return {};
1809
1810 // MemoryUse are read-only accesses.
1811 if (isa<MemoryUse>(Current))
1812 continue;
1813
1814 // For a MemoryDef, check if is aliases any of the location feeding
1815 // the original condition.
1816 if (auto *CurrentDef = dyn_cast<MemoryDef>(Current)) {
1817 if (any_of(AccessedLocs, [&AA, CurrentDef](MemoryLocation &Loc) {
1818 return isModSet(
1819 AA.getModRefInfo(CurrentDef->getMemoryInst(), Loc));
1820 }))
1821 return {};
1822 }
1823
1824 for (Use &U : Current->uses())
1825 AccessesToCheck.push_back(cast<MemoryAccess>(U.getUser()));
1826 }
1827
1828 // We could also allow loops with known trip counts without mustprogress,
1829 // but ScalarEvolution may not be available.
1830 Info.PathIsNoop &= isMustProgress(&L);
1831
1832 // If the path is considered a no-op so far, check if it reaches a
1833 // single exit block without any phis. This ensures no values from the
1834 // loop are used outside of the loop.
1835 if (Info.PathIsNoop) {
1836 for (auto *Exiting : ExitingBlocks) {
1837 if (!Seen.contains(Exiting))
1838 continue;
1839 for (auto *Succ : successors(Exiting)) {
1840 if (L.contains(Succ))
1841 continue;
1842
1843 Info.PathIsNoop &= Succ->phis().empty() &&
1844 (!Info.ExitForPath || Info.ExitForPath == Succ);
1845 if (!Info.PathIsNoop)
1846 break;
1847 assert((!Info.ExitForPath || Info.ExitForPath == Succ) &&
1848 "cannot have multiple exit blocks");
1849 Info.ExitForPath = Succ;
1850 }
1851 }
1852 }
1853 if (!Info.ExitForPath)
1854 Info.PathIsNoop = false;
1855
1856 Info.InstToDuplicate = InstToDuplicate;
1857 return Info;
1858 };
1859
1860 // If we branch to the same successor, partial unswitching will not be
1861 // beneficial.
1862 if (TI->getSuccessor(0) == TI->getSuccessor(1))
1863 return {};
1864
1865 if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(0), L.getHeader(),
1866 AccessesToCheck)) {
1867 Info->KnownValue = ConstantInt::getTrue(TI->getContext());
1868 return Info;
1869 }
1870 if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(1), L.getHeader(),
1871 AccessesToCheck)) {
1872 Info->KnownValue = ConstantInt::getFalse(TI->getContext());
1873 return Info;
1874 }
1875
1876 return {};
1877}
amdgpu AMDGPU Register Bank Select
assume Assume Builder
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseSet and SmallDenseSet classes.
@ Enable
Definition: DwarfDebug.cpp:86
std::string Name
This is the interface for a simple mod/ref and alias analysis over globals.
static const HTTPClientCleanup Cleanup
Definition: HTTPClient.cpp:42
Hexagon Hardware Loops
iv Induction Variable Users
Definition: IVUsers.cpp:48
static cl::opt< ReplaceExitVal > ReplaceExitValue("replexitval", cl::Hidden, cl::init(OnlyCheapRepl), cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), clEnumValN(OnlyCheapRepl, "cheap", "only replace exit value when the cost is cheap"), clEnumValN(UnusedIndVarInLoop, "unusedindvarinloop", "only replace exit value when it is an unused " "induction variable in the loop and has cheap replacement cost"), clEnumValN(NoHardUse, "noharduse", "only replace exit values when loop def likely dead"), clEnumValN(AlwaysRepl, "always", "always replace exit value whenever possible")))
#define Check(C,...)
Definition: Lint.cpp:170
static std::optional< uint64_t > getEstimatedTripCount(BranchInst *ExitingBranch, Loop *L, uint64_t &OrigExitWeight)
Return the estimated trip count for any exiting branch which dominates the loop latch.
Definition: LoopUtils.cpp:784
static bool hasHardUserWithinLoop(const Loop *L, const Instruction *I)
Definition: LoopUtils.cpp:1153
static const char * LLVMLoopDisableLICM
Definition: LoopUtils.cpp:55
static bool canLoopBeDeleted(Loop *L, SmallVector< RewritePhi, 8 > &RewritePhiSet)
Definition: LoopUtils.cpp:1194
static const char * LLVMLoopDisableNonforced
Definition: LoopUtils.cpp:54
static MDNode * createStringMetadata(Loop *TheLoop, StringRef Name, unsigned V)
Create MDNode for input string.
Definition: LoopUtils.cpp:203
static BranchInst * getExpectedExitLoopLatchBranch(Loop *L)
Checks if L has an exiting latch branch.
Definition: LoopUtils.cpp:766
static bool checkIsIndPhi(PHINode *Phi, Loop *L, ScalarEvolution *SE, InductionDescriptor &ID)
Checks if it is safe to call InductionDescriptor::isInductionPHI for Phi, and returns true if this Ph...
Definition: LoopUtils.cpp:1248
static PointerBounds expandBounds(const RuntimeCheckingPtrGroup *CG, Loop *TheLoop, Instruction *Loc, SCEVExpander &Exp)
Expand code for the lower and upper bound of the pointer group CG in TheLoop.
Definition: LoopUtils.cpp:1575
#define I(x, y, z)
Definition: MD5.cpp:58
#define H(x, y, z)
Definition: MD5.cpp:57
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Module.h This file contains the declarations for the Module class.
LLVMContext & Context
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
This file provides a priority worklist.
This file contains the declarations for profiling metadata utility functions.
@ SI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This is the interface for a SCEV-based alias analysis.
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file implements a set that has insertion order iteration characteristics.
static cl::opt< unsigned > MSSAThreshold("simple-loop-unswitch-memoryssa-threshold", cl::desc("Max number of memory uses to explore during " "partial unswitching analysis"), cl::init(100), cl::Hidden)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
Virtual Register Rewriter
Definition: VirtRegMap.cpp:237
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
Definition: APInt.h:75
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:186
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:189
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition: APInt.h:196
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:199
Represent the analysis usage information of a pass.
AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:279
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Legacy wrapper pass to provide the BasicAAResult object.
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:314
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:245
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:35
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
Conditional or Unconditional Branch instruction.
unsigned getNumSuccessors() const
BasicBlock * getSuccessor(unsigned i) const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:718
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:747
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:724
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:722
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:741
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:745
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:743
@ ICMP_NE
not equal
Definition: InstrTypes.h:740
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:419
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:934
static Constant * getNegativeZero(Type *Ty)
Definition: Constants.h:292
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:835
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:887
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:842
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:147
Identifies a unique instance of a variable.
void applyUpdates(ArrayRef< DominatorTree::UpdateType > Updates)
Submit updates to all available trees.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:314
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:335
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition: TypeSize.h:297
Legacy wrapper pass to provide the GlobalsAAResult object.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2138
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1259
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2008
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1390
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1412
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1276
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2550
A struct for saving information about induction variables.
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:70
const BasicBlock * getParent() const
Definition: Instruction.h:90
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1455
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:82
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void addLoop(Loop &L)
Definition: LoopPass.cpp:76
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:139
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:232
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:33
BlockT * getHeader() const
Definition: LoopInfo.h:105
const LoopT * getOutermostLoop() const
Get the outermost loop in which this loop is contained.
Definition: LoopInfo.h:118
std::vector< Loop * >::const_iterator iterator
Definition: LoopInfo.h:168
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:195
block_iterator block_end() const
Definition: LoopInfo.h:194
iterator end() const
Definition: LoopInfo.h:172
bool hasNoExitBlocks() const
Return true if this loop does not have any exit blocks.
Definition: LoopInfoImpl.h:95
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:183
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:188
void getUniqueExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all unique successor blocks of this loop.
Definition: LoopInfoImpl.h:142
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Definition: LoopInfo.h:114
bool hasDedicatedExits() const
Return true if no exit block for the loop has a predecessor that is outside the loop.
Definition: LoopInfoImpl.h:112
iterator begin() const
Definition: LoopInfo.h:171
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
Definition: LoopInfo.h:242
block_iterator block_begin() const
Definition: LoopInfo.h:193
BlockT * getUniqueExitBlock() const
If getUniqueExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:158
LoopT * AllocateLoop(ArgsTy &&... Args)
Definition: LoopInfo.h:956
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
Definition: LoopInfo.h:1049
iterator end() const
Definition: LoopInfo.h:968
void removeBlock(BlockT *BB)
This method completely removes BB from all data structures, including all of the Loop objects it is n...
Definition: LoopInfo.h:1057
LoopT * removeLoop(iterator I)
This removes the specified top-level loop from this loop info object.
Definition: LoopInfo.h:1019
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:992
void destroy(LoopT *L)
Destroy a loop that has been removed from the LoopInfo nest.
Definition: LoopInfo.h:1096
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1293
bool replacementPreservesLCSSAForm(Instruction *From, Value *To)
Returns true if replacing From with To everywhere is guaranteed to preserve LCSSA form.
Definition: LoopInfo.h:1140
void erase(Loop *L)
Update LoopInfo after removing the last backedge from a loop.
Definition: LoopInfo.cpp:876
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
bool isLCSSAForm(const DominatorTree &DT, bool IgnoreTokens=true) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:462
bool hasLoopInvariantOperands(const Instruction *I) const
Return true if all the operands of the specified instruction are loop invariant.
Definition: LoopInfo.cpp:66
bool isRecursivelyLCSSAForm(const DominatorTree &DT, const LoopInfo &LI, bool IgnoreTokens=true) const
Return true if this Loop and all inner subloops are in LCSSA form.
Definition: LoopInfo.cpp:469
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:525
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:501
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
Metadata node.
Definition: Metadata.h:943
void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
Definition: Metadata.cpp:968
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1291
ArrayRef< MDOperand > operands() const
Definition: Metadata.h:1289
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1399
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1297
LLVMContext & getContext() const
Definition: Metadata.h:1107
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:772
A single uniqued string.
Definition: Metadata.h:611
StringRef getString() const
Definition: Metadata.cpp:507
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:497
Tuple of metadata.
Definition: Metadata.h:1328
BasicBlock * getBlock() const
Definition: MemorySSA.h:164
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:986
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:700
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1863
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:717
Root of the metadata hierarchy.
Definition: Metadata.h:61
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
void setIncomingValue(unsigned i, Value *V)
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:38
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1759
bool insert(const T &X)
Insert a new element into the PriorityWorklist.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:69
FastMathFlags getFastMathFlags() const
TrackingVH< Value > getRecurrenceStartValue() const
RecurKind getRecurrenceKind() const
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
static bool isSelectCmpRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
A global registry used in conjunction with static constructors to make pluggable components (like tar...
Definition: Registry.h:44
Legacy wrapper pass to provide the SCEVAAResult object.
This class uses information about analyze scalars to rewrite expressions in canonical form.
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
This class represents an analyzed expression in the program.
Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
const SCEV * getConstant(ConstantInt *V)
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LoopDisposition getLoopDisposition(const SCEV *S, const Loop *L)
Return the "disposition" of the given SCEV with respect to the given loop.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
void forgetBlockAndLoopDispositions(Value *V=nullptr)
Called when the client has changed the disposition of values in a loop or block.
LoopDisposition
An enum describing the relationship between a SCEV and a loop.
@ LoopInvariant
The SCEV is loop-invariant.
bool isAvailableAtLoopEntry(const SCEV *S, const Loop *L)
Determine if the SCEV can be evaluated at loop's entry.
const SCEV * getExitCount(const Loop *L, const BasicBlock *ExitingBlock, ExitCountKind Kind=Exact)
Return the number of times the backedge executes before the given exit would be taken; if not exactly...
bool isLoopEntryGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test whether entry to the loop is protected by a conditional between LHS and RHS.
This class represents the LLVM 'select' instruction.
Implements a dense probed hash-table based set with some number of buckets stored inline.
Definition: DenseSet.h:290
A version of PriorityWorklist that selects small size optimized data structures for the vector and ma...
size_type size() const
Definition: SmallPtrSet.h:93
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:389
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:301
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:687
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool startswith(StringRef Prefix) const
Definition: StringRef.h:261
bool equals(StringRef RHS) const
equals - Check for string equality, this is more efficient than compare() when the relative ordering ...
Definition: StringRef.h:164
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Value handle that tracks a Value across RAUW.
Definition: ValueHandle.h:331
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:222
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
iterator_range< user_iterator > users()
Definition: Value.h:421
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:994
iterator_range< use_iterator > uses()
Definition: Value.h:376
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
An efficient, type-erasing, non-owning reference to a callable.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:386
std::optional< ElementCount > getOptionalElementCountLoopAttribute(const Loop *TheLoop)
Find a combination of metadata ("llvm.loop.vectorize.width" and "llvm.loop.vectorize....
Definition: LoopUtils.cpp:250
SmallVector< DomTreeNode *, 16 > collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop)
Does a BFS from a given node to all of its children inside a given loop.
Definition: LoopUtils.cpp:450
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:102
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1755
std::optional< unsigned > getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight=nullptr)
Returns a loop's estimated trip count based on branch weight metadata.
Definition: LoopUtils.cpp:811
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1735
bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
Definition: LoopInfo.cpp:1085
std::pair< const RuntimeCheckingPtrGroup *, const RuntimeCheckingPtrGroup * > RuntimePointerCheck
A memcheck which made up of a pair of grouped pointers.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
Definition: ScopeExit.h:59
Value * createSelectCmpOp(IRBuilderBase &Builder, Value *StartVal, RecurKind RK, Value *Left, Value *Right)
See RecurrenceDescriptor::isSelectCmpPattern for a description of the pattern we are trying to match.
Definition: LoopUtils.cpp:902
std::optional< bool > getOptionalBoolLoopAttribute(const Loop *TheLoop, StringRef Name)
Definition: LoopInfo.cpp:1067
Value * createSimpleTargetReduction(IRBuilderBase &B, const TargetTransformInfo *TTI, Value *Src, RecurKind RdxKind)
Create a target reduction of the given vector.
Definition: LoopUtils.cpp:1025
void appendReversedLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
Definition: LoopUtils.cpp:1499
auto successors(const MachineBasicBlock *BB)
Value * createTargetReduction(IRBuilderBase &B, const TargetTransformInfo *TTI, const RecurrenceDescriptor &Desc, Value *Src, PHINode *OrigPhi=nullptr)
Create a generic target reduction using a recurrence descriptor Desc The target is queried to determi...
Definition: LoopUtils.cpp:1063
void initializeLoopPassPass(PassRegistry &)
Manually defined generic "LoopPass" dependency initialization.
Definition: LoopUtils.cpp:189
bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition: LCSSA.cpp:410
std::optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Definition: LoopUtils.cpp:263
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:99
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:721
char & LCSSAID
Definition: LCSSA.cpp:492
char & LoopSimplifyID
Value * createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, Value *Right)
Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
Definition: LoopUtils.cpp:911
void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, unsigned V=0)
Set input string into loop metadata by keeping other values intact.
Definition: LoopUtils.cpp:214
bool cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, bool Signed)
Returns true if S is defined and never is equal to signed/unsigned max.
Definition: LoopUtils.cpp:1137
TransformationMode hasVectorizeTransformation(const Loop *L)
Definition: LoopUtils.cpp:391
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1910
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:398
uint64_t divideNearest(uint64_t Numerator, uint64_t Denominator)
Returns the integer nearest(Numerator / Denominator).
Definition: MathExtras.h:513
SmallVector< Instruction *, 8 > findDefsUsedOutsideOfLoop(Loop *L)
Returns the instructions that use values defined in the loop.
Definition: LoopUtils.cpp:123
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:484
bool isMustProgress(const Loop *L)
Return true if this loop can be assumed to make progress.
Definition: LoopInfo.cpp:1118
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:288
bool isModSet(const ModRefInfo MRI)
Definition: ModRef.h:48
TransformationMode hasUnrollAndJamTransformation(const Loop *L)
Definition: LoopUtils.cpp:373
void deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, LoopInfo *LI, MemorySSA *MSSA=nullptr)
This function deletes dead loops.
Definition: LoopUtils.cpp:469
Value * getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op, RecurKind MinMaxKind=RecurKind::None)
Generates a vector reduction using shufflevectors to reduce the value.
Definition: LoopUtils.cpp:945
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool hasDisableAllTransformsHint(const Loop *L)
Look for the loop attribute that disables all transformation heuristic.
Definition: LoopUtils.cpp:344
Value * createOrderedReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, Value *Start)
Create an ordered reduction intrinsic using the given recurrence descriptor Desc.
Definition: LoopUtils.cpp:1080
cl::opt< unsigned > SCEVCheapExpansionBudget
TransformationMode hasUnrollTransformation(const Loop *L)
Definition: LoopUtils.cpp:352
TransformationMode hasDistributeTransformation(const Loop *L)
Definition: LoopUtils.cpp:427
void breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI, MemorySSA *MSSA)
Remove the backedge of the specified loop.
Definition: LoopUtils.cpp:686
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:141
void propagateIRFlags(Value *I, ArrayRef< Value * > VL, Value *OpValue=nullptr, bool IncludeWrapFlags=true)
Get the intersection (logical and) of all of the potential IR flags of each scalar operation (VL) tha...
Definition: LoopUtils.cpp:1092
unsigned changeToUnreachable(Instruction *I, bool PreserveLCSSA=false, DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Insert an unreachable instruction before the specified instruction, making it and the rest of the cod...
Definition: Local.cpp:2244
std::optional< int > getOptionalIntLoopAttribute(const Loop *TheLoop, StringRef Name)
Find named metadata for a loop with an integer value.
Definition: LoopInfo.cpp:1089
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
CmpInst::Predicate getMinMaxReductionPredicate(RecurKind RK)
Returns the comparison predicate used when expanding a min/max reduction.
Definition: LoopUtils.cpp:883
TransformationMode hasLICMVersioningTransformation(const Loop *L)
Definition: LoopUtils.cpp:437
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:89
TransformationMode
The mode sets how eager a transformation should be applied.
Definition: LoopUtils.h:267
@ TM_Unspecified
The pass can use heuristics to determine whether a transformation should be applied.
Definition: LoopUtils.h:270
@ TM_SuppressedByUser
The transformation must not be applied.
Definition: LoopUtils.h:290
@ TM_ForcedByUser
The transformation was directed by the user, e.g.
Definition: LoopUtils.h:284
@ TM_Disable
The transformation should not be applied.
Definition: LoopUtils.h:276
@ TM_Enable
The transformation should be applied without considering a cost model.
Definition: LoopUtils.h:273
bool hasDisableLICMTransformsHint(const Loop *L)
Look for the loop attribute that disables the LICM transformation heuristics.
Definition: LoopUtils.cpp:348
Value * addRuntimeChecks(Instruction *Loc, Loop *TheLoop, const SmallVectorImpl< RuntimePointerCheck > &PointerChecks, SCEVExpander &Expander)
Add code that checks at runtime if the accessed arrays in PointerChecks overlap.
Definition: LoopUtils.cpp:1613
RecurKind
These are the kinds of recurrences that we support.
Definition: IVDescriptors.h:35
bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, unsigned EstimatedLoopInvocationWeight)
Set a loop's branch weight metadata to reflect that loop has EstimatedTripCount iterations and Estima...
Definition: LoopUtils.cpp:829
void setProfileInfoAfterUnrolling(Loop *OrigLoop, Loop *UnrolledLoop, Loop *RemainderLoop, uint64_t UF)
Set weights for UnrolledLoop and RemainderLoop based on weights for OrigLoop and the following distri...
Definition: LoopUtils.cpp:1471
bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
Definition: LoopUtils.cpp:57
void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
Definition: LoopUtils.cpp:1524
bool isKnownNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE)
Returns true if we can prove that S is defined and always negative in loop L.
Definition: LoopUtils.cpp:1112
Value * createSelectCmpTargetReduction(IRBuilderBase &B, const TargetTransformInfo *TTI, Value *Src, const RecurrenceDescriptor &Desc, PHINode *OrigPhi)
Create a target reduction of the given vector Src for a reduction of the kind RecurKind::SelectICmp o...
Definition: LoopUtils.cpp:985
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
bool hasIterationCountInvariantInParent(Loop *L, ScalarEvolution &SE)
Check inner loop (L) backedge count is known to be invariant on all iterations of its outer loop.
Definition: LoopUtils.cpp:861
auto predecessors(const MachineBasicBlock *BB)
int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
Definition: LoopUtils.cpp:1259
iterator_range< typename GraphTraits< GraphType >::ChildIteratorType > children(const typename GraphTraits< GraphType >::NodeRef &G)
Definition: GraphTraits.h:123
Value * addDiffRuntimeChecks(Instruction *Loc, ArrayRef< PointerDiffInfo > Checks, SCEVExpander &Expander, function_ref< Value *(IRBuilderBase &, unsigned)> GetVF, unsigned IC)
Definition: LoopUtils.cpp:1668
ReplaceExitVal
Definition: LoopUtils.h:437
@ UnusedIndVarInLoop
Definition: LoopUtils.h:441
@ OnlyCheapRepl
Definition: LoopUtils.h:439
@ AlwaysRepl
Definition: LoopUtils.h:442
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
std::optional< IVConditionInfo > hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold, const MemorySSA &MSSA, AAResults &AA)
Check if the loop header has a conditional branch that is not loop-invariant, because it involves loa...
Definition: LoopUtils.cpp:1707
bool cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, bool Signed)
Returns true if S is defined and never is equal to signed/unsigned min.
Definition: LoopUtils.cpp:1126
bool isKnownNonNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE)
Returns true if we can prove that S is defined and always non-negative in loop L.
Definition: LoopUtils.cpp:1119
Value * getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src, unsigned Op, RecurKind MinMaxKind=RecurKind::None)
Generates an ordered vector reduction using extracts to reduce the value.
Definition: LoopUtils.cpp:920
MDNode * findOptionMDForLoopID(MDNode *LoopID, StringRef Name)
Find and return the loop attribute node for the attribute Name in LoopID.
Definition: LoopInfo.cpp:1017
Loop * cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, LoopInfo *LI, LPPassManager *LPM)
Recursively clone the specified loop and all of its children, mapping the blocks with the specified m...
Definition: LoopUtils.cpp:1541
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
#define N
IR Values for the lower and upper bounds of a pointer evolution.
Definition: LoopUtils.cpp:1568
TrackingVH< Value > Start
Definition: LoopUtils.cpp:1569
TrackingVH< Value > End
Definition: LoopUtils.cpp:1570
unsigned Ith
Definition: LoopUtils.cpp:1180
RewritePhi(PHINode *P, unsigned I, const SCEV *Val, Instruction *ExpansionPt, bool H)
Definition: LoopUtils.cpp:1185
const SCEV * ExpansionSCEV
Definition: LoopUtils.cpp:1181
PHINode * PN
Definition: LoopUtils.cpp:1179
Instruction * ExpansionPoint
Definition: LoopUtils.cpp:1182
Struct to hold information about a partially invariant condition.
Definition: LoopUtils.h:515
unsigned AddressSpace
Address space of the involved pointers.
bool NeedsFreeze
Whether the pointer needs to be frozen after expansion, e.g.
const SCEV * High
The SCEV expression which represents the upper bound of all the pointers in this group.
const SCEV * Low
The SCEV expression which represents the lower bound of all the pointers in this group.