LLVM 23.0.0git
HardwareLoops.cpp
Go to the documentation of this file.
1//===-- HardwareLoops.cpp - Target Independent Hardware Loops --*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// Insert hardware loop intrinsics into loops which are deemed profitable by
10/// the target, by querying TargetTransformInfo. A hardware loop comprises of
11/// two intrinsics: one, outside the loop, to set the loop iteration count and
12/// another, in the exit block, to decrement the counter. The decremented value
13/// can either be carried through the loop via a phi or handled in some opaque
14/// way by the target.
15///
16//===----------------------------------------------------------------------===//
17
19#include "llvm/ADT/Statistic.h"
27#include "llvm/CodeGen/Passes.h"
28#include "llvm/IR/BasicBlock.h"
29#include "llvm/IR/Constants.h"
30#include "llvm/IR/Dominators.h"
31#include "llvm/IR/IRBuilder.h"
33#include "llvm/IR/Value.h"
35#include "llvm/Pass.h"
36#include "llvm/PassRegistry.h"
38#include "llvm/Support/Debug.h"
44
45#define DEBUG_TYPE "hardware-loops"
46
47#define HW_LOOPS_NAME "Hardware Loop Insertion"
48
49using namespace llvm;
50
51static cl::opt<bool>
52ForceHardwareLoops("force-hardware-loops", cl::Hidden, cl::init(false),
53 cl::desc("Force hardware loops intrinsics to be inserted"));
54
55static cl::opt<bool>
57 "force-hardware-loop-phi", cl::Hidden, cl::init(false),
58 cl::desc("Force hardware loop counter to be updated through a phi"));
59
60static cl::opt<bool>
61ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false),
62 cl::desc("Force allowance of nested hardware loops"));
63
65LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(1),
66 cl::desc("Set the loop decrement value"));
67
69CounterBitWidth("hardware-loop-counter-bitwidth", cl::Hidden, cl::init(32),
70 cl::desc("Set the loop counter bitwidth"));
71
72static cl::opt<bool>
74 "force-hardware-loop-guard", cl::Hidden, cl::init(false),
75 cl::desc("Force generation of loop guard intrinsic"));
76
77STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
78
79#ifndef NDEBUG
80static void debugHWLoopFailure(const StringRef DebugMsg,
81 Instruction *I) {
82 dbgs() << "HWLoops: " << DebugMsg;
83 if (I)
84 dbgs() << ' ' << *I;
85 else
86 dbgs() << '.';
87 dbgs() << '\n';
88}
89#endif
90
93 BasicBlock *CodeRegion = L->getHeader();
94 DebugLoc DL = L->getStartLoc();
95
96 if (I) {
97 CodeRegion = I->getParent();
98 // If there is no debug location attached to the instruction, revert back to
99 // using the loop's.
100 if (I->getDebugLoc())
101 DL = I->getDebugLoc();
102 }
103
104 OptimizationRemarkAnalysis R(DEBUG_TYPE, RemarkName, DL, CodeRegion);
105 R << "hardware-loop not created: ";
106 return R;
107}
108
109namespace {
110
111 void reportHWLoopFailure(const StringRef Msg, const StringRef ORETag,
112 OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr) {
114 ORE->emit(createHWLoopAnalysis(ORETag, TheLoop, I) << Msg);
115 }
116
117 using TTI = TargetTransformInfo;
118
119 class HardwareLoopsLegacy : public FunctionPass {
120 public:
121 static char ID;
122
123 HardwareLoopsLegacy() : FunctionPass(ID) {}
124
125 bool runOnFunction(Function &F) override;
126
127 void getAnalysisUsage(AnalysisUsage &AU) const override {
128 AU.addRequired<LoopInfoWrapperPass>();
129 AU.addPreserved<LoopInfoWrapperPass>();
130 AU.addRequired<DominatorTreeWrapperPass>();
131 AU.addPreserved<DominatorTreeWrapperPass>();
132 AU.addRequired<ScalarEvolutionWrapperPass>();
133 AU.addPreserved<ScalarEvolutionWrapperPass>();
134 AU.addRequired<AssumptionCacheTracker>();
135 AU.addRequired<TargetTransformInfoWrapperPass>();
136 AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
137 AU.addPreserved<BranchProbabilityInfoWrapperPass>();
138 }
139 };
140
141 class HardwareLoopsImpl {
142 public:
143 HardwareLoopsImpl(ScalarEvolution &SE, LoopInfo &LI, bool PreserveLCSSA,
144 DominatorTree &DT, const TargetTransformInfo &TTI,
145 TargetLibraryInfo *TLI, AssumptionCache &AC,
146 OptimizationRemarkEmitter *ORE, HardwareLoopOptions &Opts)
147 : SE(SE), LI(LI), PreserveLCSSA(PreserveLCSSA), DT(DT), TTI(TTI),
148 TLI(TLI), AC(AC), ORE(ORE), Opts(Opts) {}
149
150 bool run(Function &F);
151
152 private:
153 // Try to convert the given Loop into a hardware loop.
154 bool TryConvertLoop(Loop *L, LLVMContext &Ctx);
155
156 // Given that the target believes the loop to be profitable, try to
157 // convert it.
158 bool TryConvertLoop(HardwareLoopInfo &HWLoopInfo);
159
160 ScalarEvolution &SE;
161 LoopInfo &LI;
162 bool PreserveLCSSA;
163 DominatorTree &DT;
164 const TargetTransformInfo &TTI;
165 TargetLibraryInfo *TLI = nullptr;
166 AssumptionCache &AC;
167 OptimizationRemarkEmitter *ORE;
168 HardwareLoopOptions &Opts;
169 bool MadeChange = false;
170 };
171
172 class HardwareLoop {
173 // Expand the trip count scev into a value that we can use.
174 Value *InitLoopCount();
175
176 // Insert the set_loop_iteration intrinsic.
177 Value *InsertIterationSetup(Value *LoopCountInit);
178
179 // Insert the loop_decrement intrinsic.
180 void InsertLoopDec();
181
182 // Insert the loop_decrement_reg intrinsic.
183 Instruction *InsertLoopRegDec(Value *EltsRem);
184
185 // If the target requires the counter value to be updated in the loop,
186 // insert a phi to hold the value. The intended purpose is for use by
187 // loop_decrement_reg.
188 PHINode *InsertPHICounter(Value *NumElts, Value *EltsRem);
189
190 // Create a new cmp, that checks the returned value of loop_decrement*,
191 // and update the exit branch to use it.
192 void UpdateBranch(Value *EltsRem);
193
194 public:
195 HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE,
196 OptimizationRemarkEmitter *ORE, HardwareLoopOptions &Opts)
197 : SE(SE), ORE(ORE), Opts(Opts), L(Info.L),
198 M(L->getHeader()->getModule()), ExitCount(Info.ExitCount),
199 CountType(Info.CountType), ExitBranch(Info.ExitBranch),
200 LoopDecrement(Info.LoopDecrement), UsePHICounter(Info.CounterInReg),
201 UseLoopGuard(Info.PerformEntryTest) {}
202
203 void Create();
204
205 private:
206 ScalarEvolution &SE;
207 OptimizationRemarkEmitter *ORE = nullptr;
208 HardwareLoopOptions &Opts;
209 Loop *L = nullptr;
210 Module *M = nullptr;
211 const SCEV *ExitCount = nullptr;
212 Type *CountType = nullptr;
213 BranchInst *ExitBranch = nullptr;
214 Value *LoopDecrement = nullptr;
215 bool UsePHICounter = false;
216 bool UseLoopGuard = false;
217 BasicBlock *BeginBB = nullptr;
218 };
219}
220
221char HardwareLoopsLegacy::ID = 0;
222
223bool HardwareLoopsLegacy::runOnFunction(Function &F) {
224 if (skipFunction(F))
225 return false;
226
227 LLVM_DEBUG(dbgs() << "HWLoops: Running on " << F.getName() << "\n");
228
229 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
230 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
231 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
232 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
233 auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
234 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
235 auto *TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
236 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
237 bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
238
239 HardwareLoopOptions Opts;
248 if (LoopDecrement.getNumOccurrences())
250 if (CounterBitWidth.getNumOccurrences())
252
253 HardwareLoopsImpl Impl(SE, LI, PreserveLCSSA, DT, TTI, TLI, AC, ORE, Opts);
254 return Impl.run(F);
255}
256
259 auto &LI = AM.getResult<LoopAnalysis>(F);
260 auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
261 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
262 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
263 auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
264 auto &AC = AM.getResult<AssumptionAnalysis>(F);
266
267 HardwareLoopsImpl Impl(SE, LI, true, DT, TTI, TLI, AC, ORE, Opts);
268 bool Changed = Impl.run(F);
269 if (!Changed)
270 return PreservedAnalyses::all();
271
277 return PA;
278}
279
280bool HardwareLoopsImpl::run(Function &F) {
281 LLVMContext &Ctx = F.getContext();
282 for (Loop *L : LI)
283 if (L->isOutermost())
284 TryConvertLoop(L, Ctx);
285 return MadeChange;
286}
287
288// Return true if the search should stop, which will be when an inner loop is
289// converted and the parent loop doesn't support containing a hardware loop.
290bool HardwareLoopsImpl::TryConvertLoop(Loop *L, LLVMContext &Ctx) {
291 // Process nested loops first.
292 bool AnyChanged = false;
293 for (Loop *SL : *L)
294 AnyChanged |= TryConvertLoop(SL, Ctx);
295 if (AnyChanged) {
296 reportHWLoopFailure("nested hardware-loops not supported", "HWLoopNested",
297 ORE, L);
298 return true; // Stop search.
299 }
300
301 LLVM_DEBUG(dbgs() << "HWLoops: Loop " << L->getHeader()->getName() << "\n");
302
303 HardwareLoopInfo HWLoopInfo(L);
304 if (!HWLoopInfo.canAnalyze(LI)) {
305 reportHWLoopFailure("cannot analyze loop, irreducible control flow",
306 "HWLoopCannotAnalyze", ORE, L);
307 return false;
308 }
309
310 if (!Opts.Force &&
311 !TTI.isHardwareLoopProfitable(L, SE, AC, TLI, HWLoopInfo)) {
312 reportHWLoopFailure("it's not profitable to create a hardware-loop",
313 "HWLoopNotProfitable", ORE, L);
314 return false;
315 }
316
317 // Allow overriding of the counter width and loop decrement value.
318 if (Opts.Bitwidth.has_value()) {
319 HWLoopInfo.CountType = IntegerType::get(Ctx, Opts.Bitwidth.value());
320 }
321
322 if (Opts.Decrement.has_value())
323 HWLoopInfo.LoopDecrement =
324 ConstantInt::get(HWLoopInfo.CountType, Opts.Decrement.value());
325
326 MadeChange |= TryConvertLoop(HWLoopInfo);
327 return MadeChange && (!HWLoopInfo.IsNestingLegal && !Opts.ForceNested);
328}
329
330bool HardwareLoopsImpl::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
331
332 Loop *L = HWLoopInfo.L;
333 LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L);
334
335 if (!HWLoopInfo.isHardwareLoopCandidate(SE, LI, DT, Opts.getForceNested(),
336 Opts.getForcePhi())) {
337 // TODO: there can be many reasons a loop is not considered a
338 // candidate, so we should let isHardwareLoopCandidate fill in the
339 // reason and then report a better message here.
340 reportHWLoopFailure("loop is not a candidate", "HWLoopNoCandidate", ORE, L);
341 return false;
342 }
343
344 assert(
345 (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) &&
346 "Hardware Loop must have set exit info.");
347
348 BasicBlock *Preheader = L->getLoopPreheader();
349
350 // If we don't have a preheader, then insert one.
351 if (!Preheader)
352 Preheader = InsertPreheaderForLoop(L, &DT, &LI, nullptr, PreserveLCSSA);
353 if (!Preheader)
354 return false;
355
356 HardwareLoop HWLoop(HWLoopInfo, SE, ORE, Opts);
357 HWLoop.Create();
358 ++NumHWLoops;
359 return true;
360}
361
362void HardwareLoop::Create() {
363 LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n");
364
365 Value *LoopCountInit = InitLoopCount();
366 if (!LoopCountInit) {
367 reportHWLoopFailure("could not safely create a loop count expression",
368 "HWLoopNotSafe", ORE, L);
369 return;
370 }
371
372 Value *Setup = InsertIterationSetup(LoopCountInit);
373
374 if (UsePHICounter || Opts.ForcePhi) {
375 Instruction *LoopDec = InsertLoopRegDec(LoopCountInit);
376 Value *EltsRem = InsertPHICounter(Setup, LoopDec);
377 LoopDec->setOperand(0, EltsRem);
378 UpdateBranch(LoopDec);
379 } else
380 InsertLoopDec();
381
382 // Run through the basic blocks of the loop and see if any of them have dead
383 // PHIs that can be removed.
384 for (auto *I : L->blocks())
386}
387
388static bool CanGenerateTest(Loop *L, Value *Count) {
389 BasicBlock *Preheader = L->getLoopPreheader();
390 if (!Preheader->getSinglePredecessor())
391 return false;
392
393 BasicBlock *Pred = Preheader->getSinglePredecessor();
394 if (!isa<BranchInst>(Pred->getTerminator()))
395 return false;
396
397 auto *BI = cast<BranchInst>(Pred->getTerminator());
398 if (BI->isUnconditional() || !isa<ICmpInst>(BI->getCondition()))
399 return false;
400
401 // Check that the icmp is checking for equality of Count and zero and that
402 // a non-zero value results in entering the loop.
403 auto ICmp = cast<ICmpInst>(BI->getCondition());
404 LLVM_DEBUG(dbgs() << " - Found condition: " << *ICmp << "\n");
405 if (!ICmp->isEquality())
406 return false;
407
408 auto IsCompareZero = [](ICmpInst *ICmp, Value *Count, unsigned OpIdx) {
409 if (auto *Const = dyn_cast<ConstantInt>(ICmp->getOperand(OpIdx)))
410 return Const->isZero() && ICmp->getOperand(OpIdx ^ 1) == Count;
411 return false;
412 };
413
414 // Check if Count is a zext.
415 Value *CountBefZext =
416 isa<ZExtInst>(Count) ? cast<ZExtInst>(Count)->getOperand(0) : nullptr;
417
418 if (!IsCompareZero(ICmp, Count, 0) && !IsCompareZero(ICmp, Count, 1) &&
419 !IsCompareZero(ICmp, CountBefZext, 0) &&
420 !IsCompareZero(ICmp, CountBefZext, 1))
421 return false;
422
423 unsigned SuccIdx = ICmp->getPredicate() == ICmpInst::ICMP_NE ? 0 : 1;
424 if (BI->getSuccessor(SuccIdx) != Preheader)
425 return false;
426
427 return true;
428}
429
430Value *HardwareLoop::InitLoopCount() {
431 LLVM_DEBUG(dbgs() << "HWLoops: Initialising loop counter value:\n");
432 // Can we replace a conditional branch with an intrinsic that sets the
433 // loop counter and tests that is not zero?
434
435 SCEVExpander SCEVE(SE, "loopcnt");
436 if (!ExitCount->getType()->isPointerTy() &&
437 ExitCount->getType() != CountType)
438 ExitCount = SE.getZeroExtendExpr(ExitCount, CountType);
439
440 ExitCount = SE.getAddExpr(ExitCount, SE.getOne(CountType));
441
442 // If we're trying to use the 'test and set' form of the intrinsic, we need
443 // to replace a conditional branch that is controlling entry to the loop. It
444 // is likely (guaranteed?) that the preheader has an unconditional branch to
445 // the loop header, so also check if it has a single predecessor.
446 if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount,
447 SE.getZero(ExitCount->getType()))) {
448 LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n");
449 if (Opts.ForceGuard)
450 UseLoopGuard = true;
451 } else
452 UseLoopGuard = false;
453
454 BasicBlock *BB = L->getLoopPreheader();
455 if (UseLoopGuard && BB->getSinglePredecessor() &&
456 cast<BranchInst>(BB->getTerminator())->isUnconditional()) {
457 BasicBlock *Predecessor = BB->getSinglePredecessor();
458 // If it's not safe to create a while loop then don't force it and create a
459 // do-while loop instead
460 if (!SCEVE.isSafeToExpandAt(ExitCount, Predecessor->getTerminator()))
461 UseLoopGuard = false;
462 else
463 BB = Predecessor;
464 }
465
466 if (!SCEVE.isSafeToExpandAt(ExitCount, BB->getTerminator())) {
467 LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount "
468 << *ExitCount << "\n");
469 return nullptr;
470 }
471
472 Value *Count = SCEVE.expandCodeFor(ExitCount, CountType,
473 BB->getTerminator());
474
475 // FIXME: We've expanded Count where we hope to insert the counter setting
476 // intrinsic. But, in the case of the 'test and set' form, we may fallback to
477 // the just 'set' form and in which case the insertion block is most likely
478 // different. It means there will be instruction(s) in a block that possibly
479 // aren't needed. The isLoopEntryGuardedByCond is trying to avoid this issue,
480 // but it's doesn't appear to work in all cases.
481
482 UseLoopGuard = UseLoopGuard && CanGenerateTest(L, Count);
483 BeginBB = UseLoopGuard ? BB : L->getLoopPreheader();
484 LLVM_DEBUG(dbgs() << " - Loop Count: " << *Count << "\n"
485 << " - Expanded Count in " << BB->getName() << "\n"
486 << " - Will insert set counter intrinsic into: "
487 << BeginBB->getName() << "\n");
488 return Count;
489}
490
491Value* HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
492 IRBuilder<> Builder(BeginBB->getTerminator());
493 if (BeginBB->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP))
494 Builder.setIsFPConstrained(true);
495 Type *Ty = LoopCountInit->getType();
496 bool UsePhi = UsePHICounter || Opts.ForcePhi;
497 Intrinsic::ID ID = UseLoopGuard
498 ? (UsePhi ? Intrinsic::test_start_loop_iterations
499 : Intrinsic::test_set_loop_iterations)
500 : (UsePhi ? Intrinsic::start_loop_iterations
501 : Intrinsic::set_loop_iterations);
502 Value *LoopSetup = Builder.CreateIntrinsic(ID, Ty, LoopCountInit);
503
504 // Use the return value of the intrinsic to control the entry of the loop.
505 if (UseLoopGuard) {
506 assert((isa<BranchInst>(BeginBB->getTerminator()) &&
507 cast<BranchInst>(BeginBB->getTerminator())->isConditional()) &&
508 "Expected conditional branch");
509
510 Value *SetCount =
511 UsePhi ? Builder.CreateExtractValue(LoopSetup, 1) : LoopSetup;
512 auto *LoopGuard = cast<BranchInst>(BeginBB->getTerminator());
513 LoopGuard->setCondition(SetCount);
514 if (LoopGuard->getSuccessor(0) != L->getLoopPreheader())
515 LoopGuard->swapSuccessors();
516 }
517 LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop counter: " << *LoopSetup
518 << "\n");
519 if (UsePhi && UseLoopGuard)
520 LoopSetup = Builder.CreateExtractValue(LoopSetup, 0);
521 return !UsePhi ? LoopCountInit : LoopSetup;
522}
523
524void HardwareLoop::InsertLoopDec() {
525 IRBuilder<> CondBuilder(ExitBranch);
526 if (ExitBranch->getParent()->getParent()->getAttributes().hasFnAttr(
527 Attribute::StrictFP))
528 CondBuilder.setIsFPConstrained(true);
529
530 Value *Ops[] = { LoopDecrement };
531 Value *NewCond = CondBuilder.CreateIntrinsic(Intrinsic::loop_decrement,
532 LoopDecrement->getType(), Ops);
533 Value *OldCond = ExitBranch->getCondition();
534 ExitBranch->setCondition(NewCond);
535
536 // The false branch must exit the loop.
537 if (!L->contains(ExitBranch->getSuccessor(0)))
538 ExitBranch->swapSuccessors();
539
540 // The old condition may be dead now, and may have even created a dead PHI
541 // (the original induction variable).
543
544 LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *NewCond << "\n");
545}
546
547Instruction* HardwareLoop::InsertLoopRegDec(Value *EltsRem) {
548 IRBuilder<> CondBuilder(ExitBranch);
549 if (ExitBranch->getParent()->getParent()->getAttributes().hasFnAttr(
550 Attribute::StrictFP))
551 CondBuilder.setIsFPConstrained(true);
552
553 Value *Ops[] = { EltsRem, LoopDecrement };
554 Value *Call = CondBuilder.CreateIntrinsic(Intrinsic::loop_decrement_reg,
555 {EltsRem->getType()}, Ops);
556
557 LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *Call << "\n");
558 return cast<Instruction>(Call);
559}
560
561PHINode* HardwareLoop::InsertPHICounter(Value *NumElts, Value *EltsRem) {
562 BasicBlock *Preheader = L->getLoopPreheader();
563 BasicBlock *Header = L->getHeader();
564 BasicBlock *Latch = ExitBranch->getParent();
565 IRBuilder<> Builder(Header, Header->getFirstNonPHIIt());
566 PHINode *Index = Builder.CreatePHI(NumElts->getType(), 2);
567 Index->addIncoming(NumElts, Preheader);
568 Index->addIncoming(EltsRem, Latch);
569 LLVM_DEBUG(dbgs() << "HWLoops: PHI Counter: " << *Index << "\n");
570 return Index;
571}
572
573void HardwareLoop::UpdateBranch(Value *EltsRem) {
574 IRBuilder<> CondBuilder(ExitBranch);
575 Value *NewCond =
576 CondBuilder.CreateICmpNE(EltsRem, ConstantInt::get(EltsRem->getType(), 0));
577 Value *OldCond = ExitBranch->getCondition();
578 ExitBranch->setCondition(NewCond);
579
580 // The false branch must exit the loop.
581 if (!L->contains(ExitBranch->getSuccessor(0)))
582 ExitBranch->swapSuccessors();
583
584 // The old condition may be dead now, and may have even created a dead PHI
585 // (the original induction variable).
587}
588
589INITIALIZE_PASS_BEGIN(HardwareLoopsLegacy, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
590INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
591INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
592INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
593INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
594INITIALIZE_PASS_END(HardwareLoopsLegacy, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
595
596FunctionPass *llvm::createHardwareLoopsLegacyPass() { return new HardwareLoopsLegacy(); }
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool runOnFunction(Function &F, bool PostInlining)
#define DEBUG_TYPE
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
#define HW_LOOPS_NAME
static cl::opt< unsigned > CounterBitWidth("hardware-loop-counter-bitwidth", cl::Hidden, cl::init(32), cl::desc("Set the loop counter bitwidth"))
static OptimizationRemarkAnalysis createHWLoopAnalysis(StringRef RemarkName, Loop *L, Instruction *I)
static cl::opt< bool > ForceGuardLoopEntry("force-hardware-loop-guard", cl::Hidden, cl::init(false), cl::desc("Force generation of loop guard intrinsic"))
static void debugHWLoopFailure(const StringRef DebugMsg, Instruction *I)
static cl::opt< unsigned > LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(1), cl::desc("Set the loop decrement value"))
static cl::opt< bool > ForceHardwareLoops("force-hardware-loops", cl::Hidden, cl::init(false), cl::desc("Force hardware loops intrinsics to be inserted"))
static bool CanGenerateTest(Loop *L, Value *Count)
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
Defines an IR pass for the creation of hardware loops.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
MachineInstr unsigned OpIdx
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
A function analysis which provides an AssumptionCache.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
void setCondition(Value *V)
LLVM_ABI void swapSuccessors()
Swap the successors of this branch instruction.
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
Analysis pass which computes BranchProbabilityInfo.
@ ICMP_NE
not equal
Definition InstrTypes.h:698
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:765
A debug info location.
Definition DebugLoc.h:123
Analysis pass which computes a DominatorTree.
Definition Dominators.h:283
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
This instruction compares its operands according to the predicate given to the constructor.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:318
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
Analysis pass that exposes the LoopInfo for a function.
Definition LoopInfo.h:569
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
Analysis pass that exposes the ScalarEvolution for a function.
LLVM_ABI bool isLoopEntryGuardedByCond(const Loop *L, CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test whether entry to the loop is protected by a conditional between LHS and RHS.
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
LLVM_ABI const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM_ABI bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
Query the target whether it would be profitable to convert the given loop into a hardware loop.
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
void setOperand(unsigned i, Value *Val)
Definition User.h:212
Value * getOperand(unsigned i) const
Definition User.h:207
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
int getNumOccurrences() const
const ParentTy * getParent() const
Definition ilist_node.h:34
CallInst * Call
Changed
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:533
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI char & LCSSAID
Definition LCSSA.cpp:526
LLVM_ABI bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
TargetTransformInfo TTI
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI FunctionPass * createHardwareLoopsLegacyPass()
Create Hardware Loop pass.
LLVM_ABI bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
std::optional< bool > Force
HardwareLoopOptions & setForceNested(bool Force)
std::optional< bool > ForceGuard
std::optional< unsigned > Decrement
HardwareLoopOptions & setDecrement(unsigned Count)
HardwareLoopOptions & setForceGuard(bool Force)
HardwareLoopOptions & setForce(bool Force)
HardwareLoopOptions & setCounterBitwidth(unsigned Width)
std::optional< unsigned > Bitwidth
HardwareLoopOptions & setForcePhi(bool Force)
std::optional< bool > ForcePhi
std::optional< bool > ForceNested