LLVM  16.0.0git
HardwareLoops.cpp
Go to the documentation of this file.
1 //===-- HardwareLoops.cpp - Target Independent Hardware Loops --*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// Insert hardware loop intrinsics into loops which are deemed profitable by
10 /// the target, by querying TargetTransformInfo. A hardware loop comprises of
11 /// two intrinsics: one, outside the loop, to set the loop iteration count and
12 /// another, in the exit block, to decrement the counter. The decremented value
13 /// can either be carried through the loop via a phi or handled in some opaque
14 /// way by the target.
15 ///
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/Statistic.h"
20 #include "llvm/Analysis/LoopInfo.h"
25 #include "llvm/CodeGen/Passes.h"
26 #include "llvm/IR/BasicBlock.h"
27 #include "llvm/IR/Constants.h"
28 #include "llvm/IR/Dominators.h"
29 #include "llvm/IR/IRBuilder.h"
30 #include "llvm/IR/Instructions.h"
31 #include "llvm/IR/IntrinsicInst.h"
32 #include "llvm/IR/Value.h"
33 #include "llvm/InitializePasses.h"
34 #include "llvm/Pass.h"
35 #include "llvm/PassRegistry.h"
37 #include "llvm/Support/Debug.h"
38 #include "llvm/Transforms/Utils.h"
43 
44 #define DEBUG_TYPE "hardware-loops"
45 
46 #define HW_LOOPS_NAME "Hardware Loop Insertion"
47 
48 using namespace llvm;
49 
50 static cl::opt<bool>
51 ForceHardwareLoops("force-hardware-loops", cl::Hidden, cl::init(false),
52  cl::desc("Force hardware loops intrinsics to be inserted"));
53 
54 static cl::opt<bool>
56  "force-hardware-loop-phi", cl::Hidden, cl::init(false),
57  cl::desc("Force hardware loop counter to be updated through a phi"));
58 
59 static cl::opt<bool>
60 ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false),
61  cl::desc("Force allowance of nested hardware loops"));
62 
63 static cl::opt<unsigned>
64 LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(1),
65  cl::desc("Set the loop decrement value"));
66 
67 static cl::opt<unsigned>
68 CounterBitWidth("hardware-loop-counter-bitwidth", cl::Hidden, cl::init(32),
69  cl::desc("Set the loop counter bitwidth"));
70 
71 static cl::opt<bool>
73  "force-hardware-loop-guard", cl::Hidden, cl::init(false),
74  cl::desc("Force generation of loop guard intrinsic"));
75 
76 STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
77 
78 #ifndef NDEBUG
79 static void debugHWLoopFailure(const StringRef DebugMsg,
80  Instruction *I) {
81  dbgs() << "HWLoops: " << DebugMsg;
82  if (I)
83  dbgs() << ' ' << *I;
84  else
85  dbgs() << '.';
86  dbgs() << '\n';
87 }
88 #endif
89 
92  Value *CodeRegion = L->getHeader();
93  DebugLoc DL = L->getStartLoc();
94 
95  if (I) {
96  CodeRegion = I->getParent();
97  // If there is no debug location attached to the instruction, revert back to
98  // using the loop's.
99  if (I->getDebugLoc())
100  DL = I->getDebugLoc();
101  }
102 
103  OptimizationRemarkAnalysis R(DEBUG_TYPE, RemarkName, DL, CodeRegion);
104  R << "hardware-loop not created: ";
105  return R;
106 }
107 
108 namespace {
109 
110  void reportHWLoopFailure(const StringRef Msg, const StringRef ORETag,
111  OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr) {
113  ORE->emit(createHWLoopAnalysis(ORETag, TheLoop, I) << Msg);
114  }
115 
116  using TTI = TargetTransformInfo;
117 
118  class HardwareLoops : public FunctionPass {
119  public:
120  static char ID;
121 
122  HardwareLoops() : FunctionPass(ID) {
124  }
125 
126  bool runOnFunction(Function &F) override;
127 
128  void getAnalysisUsage(AnalysisUsage &AU) const override {
137  }
138 
139  // Try to convert the given Loop into a hardware loop.
140  bool TryConvertLoop(Loop *L);
141 
142  // Given that the target believes the loop to be profitable, try to
143  // convert it.
144  bool TryConvertLoop(HardwareLoopInfo &HWLoopInfo);
145 
146  private:
147  ScalarEvolution *SE = nullptr;
148  LoopInfo *LI = nullptr;
149  const DataLayout *DL = nullptr;
150  OptimizationRemarkEmitter *ORE = nullptr;
151  const TargetTransformInfo *TTI = nullptr;
152  DominatorTree *DT = nullptr;
153  bool PreserveLCSSA = false;
154  AssumptionCache *AC = nullptr;
155  TargetLibraryInfo *LibInfo = nullptr;
156  Module *M = nullptr;
157  bool MadeChange = false;
158  };
159 
160  class HardwareLoop {
161  // Expand the trip count scev into a value that we can use.
162  Value *InitLoopCount();
163 
164  // Insert the set_loop_iteration intrinsic.
165  Value *InsertIterationSetup(Value *LoopCountInit);
166 
167  // Insert the loop_decrement intrinsic.
168  void InsertLoopDec();
169 
170  // Insert the loop_decrement_reg intrinsic.
171  Instruction *InsertLoopRegDec(Value *EltsRem);
172 
173  // If the target requires the counter value to be updated in the loop,
174  // insert a phi to hold the value. The intended purpose is for use by
175  // loop_decrement_reg.
176  PHINode *InsertPHICounter(Value *NumElts, Value *EltsRem);
177 
178  // Create a new cmp, that checks the returned value of loop_decrement*,
179  // and update the exit branch to use it.
180  void UpdateBranch(Value *EltsRem);
181 
182  public:
183  HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE,
184  const DataLayout &DL,
186  SE(SE), DL(DL), ORE(ORE), L(Info.L), M(L->getHeader()->getModule()),
187  ExitCount(Info.ExitCount),
188  CountType(Info.CountType),
189  ExitBranch(Info.ExitBranch),
191  UsePHICounter(Info.CounterInReg),
192  UseLoopGuard(Info.PerformEntryTest) { }
193 
194  void Create();
195 
196  private:
197  ScalarEvolution &SE;
198  const DataLayout &DL;
199  OptimizationRemarkEmitter *ORE = nullptr;
200  Loop *L = nullptr;
201  Module *M = nullptr;
202  const SCEV *ExitCount = nullptr;
203  Type *CountType = nullptr;
204  BranchInst *ExitBranch = nullptr;
205  Value *LoopDecrement = nullptr;
206  bool UsePHICounter = false;
207  bool UseLoopGuard = false;
208  BasicBlock *BeginBB = nullptr;
209  };
210 }
211 
212 char HardwareLoops::ID = 0;
213 
215  if (skipFunction(F))
216  return false;
217 
218  LLVM_DEBUG(dbgs() << "HWLoops: Running on " << F.getName() << "\n");
219 
220  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
221  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
222  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
223  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
224  DL = &F.getParent()->getDataLayout();
225  ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
226  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
227  LibInfo = TLIP ? &TLIP->getTLI(F) : nullptr;
228  PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
229  AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
230  M = F.getParent();
231 
232  for (Loop *L : *LI)
233  if (L->isOutermost())
234  TryConvertLoop(L);
235 
236  return MadeChange;
237 }
238 
239 // Return true if the search should stop, which will be when an inner loop is
240 // converted and the parent loop doesn't support containing a hardware loop.
241 bool HardwareLoops::TryConvertLoop(Loop *L) {
242  // Process nested loops first.
243  bool AnyChanged = false;
244  for (Loop *SL : *L)
245  AnyChanged |= TryConvertLoop(SL);
246  if (AnyChanged) {
247  reportHWLoopFailure("nested hardware-loops not supported", "HWLoopNested",
248  ORE, L);
249  return true; // Stop search.
250  }
251 
252  LLVM_DEBUG(dbgs() << "HWLoops: Loop " << L->getHeader()->getName() << "\n");
253 
254  HardwareLoopInfo HWLoopInfo(L);
255  if (!HWLoopInfo.canAnalyze(*LI)) {
256  reportHWLoopFailure("cannot analyze loop, irreducible control flow",
257  "HWLoopCannotAnalyze", ORE, L);
258  return false;
259  }
260 
261  if (!ForceHardwareLoops &&
262  !TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo)) {
263  reportHWLoopFailure("it's not profitable to create a hardware-loop",
264  "HWLoopNotProfitable", ORE, L);
265  return false;
266  }
267 
268  // Allow overriding of the counter width and loop decrement value.
269  if (CounterBitWidth.getNumOccurrences())
270  HWLoopInfo.CountType =
271  IntegerType::get(M->getContext(), CounterBitWidth);
272 
273  if (LoopDecrement.getNumOccurrences())
274  HWLoopInfo.LoopDecrement =
275  ConstantInt::get(HWLoopInfo.CountType, LoopDecrement);
276 
277  MadeChange |= TryConvertLoop(HWLoopInfo);
278  return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop);
279 }
280 
281 bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
282 
283  Loop *L = HWLoopInfo.L;
284  LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L);
285 
286  if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT, ForceNestedLoop,
288  // TODO: there can be many reasons a loop is not considered a
289  // candidate, so we should let isHardwareLoopCandidate fill in the
290  // reason and then report a better message here.
291  reportHWLoopFailure("loop is not a candidate", "HWLoopNoCandidate", ORE, L);
292  return false;
293  }
294 
295  assert(
296  (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) &&
297  "Hardware Loop must have set exit info.");
298 
299  BasicBlock *Preheader = L->getLoopPreheader();
300 
301  // If we don't have a preheader, then insert one.
302  if (!Preheader)
303  Preheader = InsertPreheaderForLoop(L, DT, LI, nullptr, PreserveLCSSA);
304  if (!Preheader)
305  return false;
306 
307  HardwareLoop HWLoop(HWLoopInfo, *SE, *DL, ORE);
308  HWLoop.Create();
309  ++NumHWLoops;
310  return true;
311 }
312 
313 void HardwareLoop::Create() {
314  LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n");
315 
316  Value *LoopCountInit = InitLoopCount();
317  if (!LoopCountInit) {
318  reportHWLoopFailure("could not safely create a loop count expression",
319  "HWLoopNotSafe", ORE, L);
320  return;
321  }
322 
323  Value *Setup = InsertIterationSetup(LoopCountInit);
324 
325  if (UsePHICounter || ForceHardwareLoopPHI) {
326  Instruction *LoopDec = InsertLoopRegDec(LoopCountInit);
327  Value *EltsRem = InsertPHICounter(Setup, LoopDec);
328  LoopDec->setOperand(0, EltsRem);
329  UpdateBranch(LoopDec);
330  } else
331  InsertLoopDec();
332 
333  // Run through the basic blocks of the loop and see if any of them have dead
334  // PHIs that can be removed.
335  for (auto *I : L->blocks())
336  DeleteDeadPHIs(I);
337 }
338 
339 static bool CanGenerateTest(Loop *L, Value *Count) {
340  BasicBlock *Preheader = L->getLoopPreheader();
341  if (!Preheader->getSinglePredecessor())
342  return false;
343 
344  BasicBlock *Pred = Preheader->getSinglePredecessor();
345  if (!isa<BranchInst>(Pred->getTerminator()))
346  return false;
347 
348  auto *BI = cast<BranchInst>(Pred->getTerminator());
349  if (BI->isUnconditional() || !isa<ICmpInst>(BI->getCondition()))
350  return false;
351 
352  // Check that the icmp is checking for equality of Count and zero and that
353  // a non-zero value results in entering the loop.
354  auto ICmp = cast<ICmpInst>(BI->getCondition());
355  LLVM_DEBUG(dbgs() << " - Found condition: " << *ICmp << "\n");
356  if (!ICmp->isEquality())
357  return false;
358 
359  auto IsCompareZero = [](ICmpInst *ICmp, Value *Count, unsigned OpIdx) {
360  if (auto *Const = dyn_cast<ConstantInt>(ICmp->getOperand(OpIdx)))
361  return Const->isZero() && ICmp->getOperand(OpIdx ^ 1) == Count;
362  return false;
363  };
364 
365  // Check if Count is a zext.
366  Value *CountBefZext =
367  isa<ZExtInst>(Count) ? cast<ZExtInst>(Count)->getOperand(0) : nullptr;
368 
369  if (!IsCompareZero(ICmp, Count, 0) && !IsCompareZero(ICmp, Count, 1) &&
370  !IsCompareZero(ICmp, CountBefZext, 0) &&
371  !IsCompareZero(ICmp, CountBefZext, 1))
372  return false;
373 
374  unsigned SuccIdx = ICmp->getPredicate() == ICmpInst::ICMP_NE ? 0 : 1;
375  if (BI->getSuccessor(SuccIdx) != Preheader)
376  return false;
377 
378  return true;
379 }
380 
381 Value *HardwareLoop::InitLoopCount() {
382  LLVM_DEBUG(dbgs() << "HWLoops: Initialising loop counter value:\n");
383  // Can we replace a conditional branch with an intrinsic that sets the
384  // loop counter and tests that is not zero?
385 
386  SCEVExpander SCEVE(SE, DL, "loopcnt");
387  if (!ExitCount->getType()->isPointerTy() &&
388  ExitCount->getType() != CountType)
389  ExitCount = SE.getZeroExtendExpr(ExitCount, CountType);
390 
391  ExitCount = SE.getAddExpr(ExitCount, SE.getOne(CountType));
392 
393  // If we're trying to use the 'test and set' form of the intrinsic, we need
394  // to replace a conditional branch that is controlling entry to the loop. It
395  // is likely (guaranteed?) that the preheader has an unconditional branch to
396  // the loop header, so also check if it has a single predecessor.
397  if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount,
398  SE.getZero(ExitCount->getType()))) {
399  LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n");
400  UseLoopGuard |= ForceGuardLoopEntry;
401  } else
402  UseLoopGuard = false;
403 
405  if (UseLoopGuard && BB->getSinglePredecessor() &&
406  cast<BranchInst>(BB->getTerminator())->isUnconditional()) {
407  BasicBlock *Predecessor = BB->getSinglePredecessor();
408  // If it's not safe to create a while loop then don't force it and create a
409  // do-while loop instead
410  if (!SCEVE.isSafeToExpandAt(ExitCount, Predecessor->getTerminator()))
411  UseLoopGuard = false;
412  else
413  BB = Predecessor;
414  }
415 
416  if (!SCEVE.isSafeToExpandAt(ExitCount, BB->getTerminator())) {
417  LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount "
418  << *ExitCount << "\n");
419  return nullptr;
420  }
421 
422  Value *Count = SCEVE.expandCodeFor(ExitCount, CountType,
423  BB->getTerminator());
424 
425  // FIXME: We've expanded Count where we hope to insert the counter setting
426  // intrinsic. But, in the case of the 'test and set' form, we may fallback to
427  // the just 'set' form and in which case the insertion block is most likely
428  // different. It means there will be instruction(s) in a block that possibly
429  // aren't needed. The isLoopEntryGuardedByCond is trying to avoid this issue,
430  // but it's doesn't appear to work in all cases.
431 
432  UseLoopGuard = UseLoopGuard && CanGenerateTest(L, Count);
433  BeginBB = UseLoopGuard ? BB : L->getLoopPreheader();
434  LLVM_DEBUG(dbgs() << " - Loop Count: " << *Count << "\n"
435  << " - Expanded Count in " << BB->getName() << "\n"
436  << " - Will insert set counter intrinsic into: "
437  << BeginBB->getName() << "\n");
438  return Count;
439 }
440 
441 Value* HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
442  IRBuilder<> Builder(BeginBB->getTerminator());
443  Type *Ty = LoopCountInit->getType();
444  bool UsePhi = UsePHICounter || ForceHardwareLoopPHI;
445  Intrinsic::ID ID = UseLoopGuard
446  ? (UsePhi ? Intrinsic::test_start_loop_iterations
447  : Intrinsic::test_set_loop_iterations)
448  : (UsePhi ? Intrinsic::start_loop_iterations
449  : Intrinsic::set_loop_iterations);
450  Function *LoopIter = Intrinsic::getDeclaration(M, ID, Ty);
451  Value *LoopSetup = Builder.CreateCall(LoopIter, LoopCountInit);
452 
453  // Use the return value of the intrinsic to control the entry of the loop.
454  if (UseLoopGuard) {
455  assert((isa<BranchInst>(BeginBB->getTerminator()) &&
456  cast<BranchInst>(BeginBB->getTerminator())->isConditional()) &&
457  "Expected conditional branch");
458 
459  Value *SetCount =
460  UsePhi ? Builder.CreateExtractValue(LoopSetup, 1) : LoopSetup;
461  auto *LoopGuard = cast<BranchInst>(BeginBB->getTerminator());
462  LoopGuard->setCondition(SetCount);
463  if (LoopGuard->getSuccessor(0) != L->getLoopPreheader())
464  LoopGuard->swapSuccessors();
465  }
466  LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop counter: " << *LoopSetup
467  << "\n");
468  if (UsePhi && UseLoopGuard)
469  LoopSetup = Builder.CreateExtractValue(LoopSetup, 0);
470  return !UsePhi ? LoopCountInit : LoopSetup;
471 }
472 
473 void HardwareLoop::InsertLoopDec() {
474  IRBuilder<> CondBuilder(ExitBranch);
475 
476  Function *DecFunc =
477  Intrinsic::getDeclaration(M, Intrinsic::loop_decrement,
478  LoopDecrement->getType());
479  Value *Ops[] = { LoopDecrement };
480  Value *NewCond = CondBuilder.CreateCall(DecFunc, Ops);
481  Value *OldCond = ExitBranch->getCondition();
482  ExitBranch->setCondition(NewCond);
483 
484  // The false branch must exit the loop.
485  if (!L->contains(ExitBranch->getSuccessor(0)))
486  ExitBranch->swapSuccessors();
487 
488  // The old condition may be dead now, and may have even created a dead PHI
489  // (the original induction variable).
491 
492  LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *NewCond << "\n");
493 }
494 
495 Instruction* HardwareLoop::InsertLoopRegDec(Value *EltsRem) {
496  IRBuilder<> CondBuilder(ExitBranch);
497 
498  Function *DecFunc =
499  Intrinsic::getDeclaration(M, Intrinsic::loop_decrement_reg,
500  { EltsRem->getType() });
501  Value *Ops[] = { EltsRem, LoopDecrement };
502  Value *Call = CondBuilder.CreateCall(DecFunc, Ops);
503 
504  LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *Call << "\n");
505  return cast<Instruction>(Call);
506 }
507 
508 PHINode* HardwareLoop::InsertPHICounter(Value *NumElts, Value *EltsRem) {
509  BasicBlock *Preheader = L->getLoopPreheader();
510  BasicBlock *Header = L->getHeader();
511  BasicBlock *Latch = ExitBranch->getParent();
512  IRBuilder<> Builder(Header->getFirstNonPHI());
513  PHINode *Index = Builder.CreatePHI(NumElts->getType(), 2);
514  Index->addIncoming(NumElts, Preheader);
515  Index->addIncoming(EltsRem, Latch);
516  LLVM_DEBUG(dbgs() << "HWLoops: PHI Counter: " << *Index << "\n");
517  return Index;
518 }
519 
520 void HardwareLoop::UpdateBranch(Value *EltsRem) {
521  IRBuilder<> CondBuilder(ExitBranch);
522  Value *NewCond =
523  CondBuilder.CreateICmpNE(EltsRem, ConstantInt::get(EltsRem->getType(), 0));
524  Value *OldCond = ExitBranch->getCondition();
525  ExitBranch->setCondition(NewCond);
526 
527  // The false branch must exit the loop.
528  if (!L->contains(ExitBranch->getSuccessor(0)))
529  ExitBranch->swapSuccessors();
530 
531  // The old condition may be dead now, and may have even created a dead PHI
532  // (the original induction variable).
534 }
535 
536 INITIALIZE_PASS_BEGIN(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
542 
543 FunctionPass *llvm::createHardwareLoopsPass() { return new HardwareLoops(); }
llvm::TargetTransformInfo::isHardwareLoopProfitable
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
Query the target whether it would be profitable to convert the given loop into a hardware loop.
Definition: TargetTransformInfo.cpp:295
llvm::RecursivelyDeleteTriviallyDeadInstructions
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:520
AssumptionCache.h
CounterBitWidth
static cl::opt< unsigned > CounterBitWidth("hardware-loop-counter-bitwidth", cl::Hidden, cl::init(32), cl::desc("Set the loop counter bitwidth"))
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::AArch64PACKey::ID
ID
Definition: AArch64BaseInfo.h:818
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
IntrinsicInst.h
ScalarEvolutionExpander.h
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
Pass.h
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:139
llvm::SCEVExpander
This class uses information about analyze scalars to rewrite expressions in canonical form.
Definition: ScalarEvolutionExpander.h:50
Statistic.h
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:173
llvm::IRBuilder<>
llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:631
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:742
Local.h
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::HardwareLoopInfo::ExitBranch
BranchInst * ExitBranch
Definition: TargetTransformInfo.h:102
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::HardwareLoopInfo::isHardwareLoopCandidate
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Definition: TargetTransformInfo.cpp:106
ScalarEvolution.h
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1293
llvm::HardwareLoopInfo::L
Loop * L
Definition: TargetTransformInfo.h:100
ForceNestedLoop
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
LoopDecrement
static cl::opt< unsigned > LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(1), cl::desc("Set the loop decrement value"))
llvm::BasicBlock::getSinglePredecessor
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:285
PassRegistry.h
createHWLoopAnalysis
static OptimizationRemarkAnalysis createHWLoopAnalysis(StringRef RemarkName, Loop *L, Instruction *I)
Definition: HardwareLoops.cpp:91
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
CommandLine.h
llvm::HardwareLoopInfo::ExitCount
const SCEV * ExitCount
Definition: TargetTransformInfo.h:103
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
Constants.h
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:195
TargetLibraryInfo.h
false
Definition: StackSlotColoring.cpp:141
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
llvm::Instruction
Definition: Instruction.h:42
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:306
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:879
LoopUtils.h
llvm::ScalarEvolutionWrapperPass
Definition: ScalarEvolution.h:2187
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:155
Utils.h
ForceHardwareLoops
static cl::opt< bool > ForceHardwareLoops("force-hardware-loops", cl::Hidden, cl::init(false), cl::desc("Force hardware loops intrinsics to be inserted"))
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
LoopInfo.h
Passes.h
llvm::InsertPreheaderForLoop
BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...
Definition: LoopSimplify.cpp:118
debugHWLoopFailure
static void debugHWLoopFailure(const StringRef DebugMsg, Instruction *I)
Definition: HardwareLoops.cpp:79
BasicBlock.h
llvm::cl::opt< bool >
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:75
llvm::ICmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1186
Index
uint32_t Index
Definition: ELFObjHandler.cpp:83
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2648
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::initializeHardwareLoopsPass
void initializeHardwareLoopsPass(PassRegistry &)
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:447
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:183
ForceGuardLoopEntry
static cl::opt< bool > ForceGuardLoopEntry("force-hardware-loop-guard", cl::Hidden, cl::init(false), cl::desc("Force generation of loop guard intrinsic"))
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
CanGenerateTest
static bool CanGenerateTest(Loop *L, Value *Count)
Definition: HardwareLoops.cpp:339
HW_LOOPS_NAME
#define HW_LOOPS_NAME
Definition: HardwareLoops.cpp:46
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:66
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::User::setOperand
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:202
llvm::LoopInfo
Definition: LoopInfo.h:1108
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm::createHardwareLoopsPass
FunctionPass * createHardwareLoopsPass()
Create Hardware Loop pass.
Definition: HardwareLoops.cpp:543
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::OptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition: DiagnosticInfo.h:780
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1481
llvm::AMDGPU::SendMsg::Msg
const CustomOperand< const MCSubtargetInfo & > Msg[]
Definition: AMDGPUAsmUtils.cpp:39
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:85
llvm::LCSSAID
char & LCSSAID
Definition: LCSSA.cpp:492
llvm::OptimizationRemarkEmitterWrapperPass
OptimizationRemarkEmitter legacy analysis pass.
Definition: OptimizationRemarkEmitter.h:146
llvm::HardwareLoopInfo::ExitBlock
BasicBlock * ExitBlock
Definition: TargetTransformInfo.h:101
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:105
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:226
llvm::DeleteDeadPHIs
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
Definition: BasicBlockUtils.cpp:163
llvm::HardwareLoopInfo
Attributes of a target dependent hardware loop.
Definition: TargetTransformInfo.h:97
DEBUG_TYPE
#define DEBUG_TYPE
Definition: HardwareLoops.cpp:44
Instructions.h
INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:51
Dominators.h
llvm::orc::SimpleRemoteEPCOpcode::Setup
@ Setup
llvm::CmpInst::getPredicate
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:810
TargetTransformInfo.h
llvm::PHINode
Definition: Instructions.h:2697
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:119
ForceHardwareLoopPHI
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
llvm::IntegerType::get
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:311
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:413
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3132
BasicBlockUtils.h
Value.h
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:39