LLVM  14.0.0git
HardwareLoops.cpp
Go to the documentation of this file.
1 //===-- HardwareLoops.cpp - Target Independent Hardware Loops --*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// Insert hardware loop intrinsics into loops which are deemed profitable by
10 /// the target, by querying TargetTransformInfo. A hardware loop comprises of
11 /// two intrinsics: one, outside the loop, to set the loop iteration count and
12 /// another, in the exit block, to decrement the counter. The decremented value
13 /// can either be carried through the loop via a phi or handled in some opaque
14 /// way by the target.
15 ///
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/Statistic.h"
20 #include "llvm/Analysis/LoopInfo.h"
25 #include "llvm/CodeGen/Passes.h"
27 #include "llvm/IR/BasicBlock.h"
28 #include "llvm/IR/Constants.h"
29 #include "llvm/IR/DataLayout.h"
30 #include "llvm/IR/Dominators.h"
31 #include "llvm/IR/IRBuilder.h"
32 #include "llvm/IR/Instructions.h"
33 #include "llvm/IR/IntrinsicInst.h"
34 #include "llvm/IR/Value.h"
35 #include "llvm/InitializePasses.h"
36 #include "llvm/Pass.h"
37 #include "llvm/PassRegistry.h"
39 #include "llvm/Support/Debug.h"
40 #include "llvm/Transforms/Scalar.h"
41 #include "llvm/Transforms/Utils.h"
46 
47 #define DEBUG_TYPE "hardware-loops"
48 
49 #define HW_LOOPS_NAME "Hardware Loop Insertion"
50 
51 using namespace llvm;
52 
53 static cl::opt<bool>
54 ForceHardwareLoops("force-hardware-loops", cl::Hidden, cl::init(false),
55  cl::desc("Force hardware loops intrinsics to be inserted"));
56 
57 static cl::opt<bool>
59  "force-hardware-loop-phi", cl::Hidden, cl::init(false),
60  cl::desc("Force hardware loop counter to be updated through a phi"));
61 
62 static cl::opt<bool>
63 ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false),
64  cl::desc("Force allowance of nested hardware loops"));
65 
66 static cl::opt<unsigned>
67 LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(1),
68  cl::desc("Set the loop decrement value"));
69 
70 static cl::opt<unsigned>
71 CounterBitWidth("hardware-loop-counter-bitwidth", cl::Hidden, cl::init(32),
72  cl::desc("Set the loop counter bitwidth"));
73 
74 static cl::opt<bool>
76  "force-hardware-loop-guard", cl::Hidden, cl::init(false),
77  cl::desc("Force generation of loop guard intrinsic"));
78 
79 STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
80 
81 #ifndef NDEBUG
82 static void debugHWLoopFailure(const StringRef DebugMsg,
83  Instruction *I) {
84  dbgs() << "HWLoops: " << DebugMsg;
85  if (I)
86  dbgs() << ' ' << *I;
87  else
88  dbgs() << '.';
89  dbgs() << '\n';
90 }
91 #endif
92 
95  Value *CodeRegion = L->getHeader();
96  DebugLoc DL = L->getStartLoc();
97 
98  if (I) {
99  CodeRegion = I->getParent();
100  // If there is no debug location attached to the instruction, revert back to
101  // using the loop's.
102  if (I->getDebugLoc())
103  DL = I->getDebugLoc();
104  }
105 
106  OptimizationRemarkAnalysis R(DEBUG_TYPE, RemarkName, DL, CodeRegion);
107  R << "hardware-loop not created: ";
108  return R;
109 }
110 
111 namespace {
112 
113  void reportHWLoopFailure(const StringRef Msg, const StringRef ORETag,
114  OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr) {
116  ORE->emit(createHWLoopAnalysis(ORETag, TheLoop, I) << Msg);
117  }
118 
119  using TTI = TargetTransformInfo;
120 
121  class HardwareLoops : public FunctionPass {
122  public:
123  static char ID;
124 
125  HardwareLoops() : FunctionPass(ID) {
127  }
128 
129  bool runOnFunction(Function &F) override;
130 
131  void getAnalysisUsage(AnalysisUsage &AU) const override {
140  }
141 
142  // Try to convert the given Loop into a hardware loop.
143  bool TryConvertLoop(Loop *L);
144 
145  // Given that the target believes the loop to be profitable, try to
146  // convert it.
147  bool TryConvertLoop(HardwareLoopInfo &HWLoopInfo);
148 
149  private:
150  ScalarEvolution *SE = nullptr;
151  LoopInfo *LI = nullptr;
152  const DataLayout *DL = nullptr;
153  OptimizationRemarkEmitter *ORE = nullptr;
154  const TargetTransformInfo *TTI = nullptr;
155  DominatorTree *DT = nullptr;
156  bool PreserveLCSSA = false;
157  AssumptionCache *AC = nullptr;
158  TargetLibraryInfo *LibInfo = nullptr;
159  Module *M = nullptr;
160  bool MadeChange = false;
161  };
162 
163  class HardwareLoop {
164  // Expand the trip count scev into a value that we can use.
165  Value *InitLoopCount();
166 
167  // Insert the set_loop_iteration intrinsic.
168  Value *InsertIterationSetup(Value *LoopCountInit);
169 
170  // Insert the loop_decrement intrinsic.
171  void InsertLoopDec();
172 
173  // Insert the loop_decrement_reg intrinsic.
174  Instruction *InsertLoopRegDec(Value *EltsRem);
175 
176  // If the target requires the counter value to be updated in the loop,
177  // insert a phi to hold the value. The intended purpose is for use by
178  // loop_decrement_reg.
179  PHINode *InsertPHICounter(Value *NumElts, Value *EltsRem);
180 
181  // Create a new cmp, that checks the returned value of loop_decrement*,
182  // and update the exit branch to use it.
183  void UpdateBranch(Value *EltsRem);
184 
185  public:
186  HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE,
187  const DataLayout &DL,
189  SE(SE), DL(DL), ORE(ORE), L(Info.L), M(L->getHeader()->getModule()),
190  ExitCount(Info.ExitCount),
191  CountType(Info.CountType),
192  ExitBranch(Info.ExitBranch),
194  UsePHICounter(Info.CounterInReg),
195  UseLoopGuard(Info.PerformEntryTest) { }
196 
197  void Create();
198 
199  private:
200  ScalarEvolution &SE;
201  const DataLayout &DL;
202  OptimizationRemarkEmitter *ORE = nullptr;
203  Loop *L = nullptr;
204  Module *M = nullptr;
205  const SCEV *ExitCount = nullptr;
206  Type *CountType = nullptr;
207  BranchInst *ExitBranch = nullptr;
208  Value *LoopDecrement = nullptr;
209  bool UsePHICounter = false;
210  bool UseLoopGuard = false;
211  BasicBlock *BeginBB = nullptr;
212  };
213 }
214 
215 char HardwareLoops::ID = 0;
216 
218  if (skipFunction(F))
219  return false;
220 
221  LLVM_DEBUG(dbgs() << "HWLoops: Running on " << F.getName() << "\n");
222 
223  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
224  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
225  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
226  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
227  DL = &F.getParent()->getDataLayout();
228  ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
229  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
230  LibInfo = TLIP ? &TLIP->getTLI(F) : nullptr;
231  PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
232  AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
233  M = F.getParent();
234 
235  for (Loop *L : *LI)
236  if (L->isOutermost())
237  TryConvertLoop(L);
238 
239  return MadeChange;
240 }
241 
242 // Return true if the search should stop, which will be when an inner loop is
243 // converted and the parent loop doesn't support containing a hardware loop.
244 bool HardwareLoops::TryConvertLoop(Loop *L) {
245  // Process nested loops first.
246  bool AnyChanged = false;
247  for (Loop *SL : *L)
248  AnyChanged |= TryConvertLoop(SL);
249  if (AnyChanged) {
250  reportHWLoopFailure("nested hardware-loops not supported", "HWLoopNested",
251  ORE, L);
252  return true; // Stop search.
253  }
254 
255  LLVM_DEBUG(dbgs() << "HWLoops: Loop " << L->getHeader()->getName() << "\n");
256 
257  HardwareLoopInfo HWLoopInfo(L);
258  if (!HWLoopInfo.canAnalyze(*LI)) {
259  reportHWLoopFailure("cannot analyze loop, irreducible control flow",
260  "HWLoopCannotAnalyze", ORE, L);
261  return false;
262  }
263 
264  if (!ForceHardwareLoops &&
265  !TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo)) {
266  reportHWLoopFailure("it's not profitable to create a hardware-loop",
267  "HWLoopNotProfitable", ORE, L);
268  return false;
269  }
270 
271  // Allow overriding of the counter width and loop decrement value.
272  if (CounterBitWidth.getNumOccurrences())
273  HWLoopInfo.CountType =
274  IntegerType::get(M->getContext(), CounterBitWidth);
275 
276  if (LoopDecrement.getNumOccurrences())
277  HWLoopInfo.LoopDecrement =
278  ConstantInt::get(HWLoopInfo.CountType, LoopDecrement);
279 
280  MadeChange |= TryConvertLoop(HWLoopInfo);
281  return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop);
282 }
283 
284 bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
285 
286  Loop *L = HWLoopInfo.L;
287  LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L);
288 
289  if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT, ForceNestedLoop,
291  // TODO: there can be many reasons a loop is not considered a
292  // candidate, so we should let isHardwareLoopCandidate fill in the
293  // reason and then report a better message here.
294  reportHWLoopFailure("loop is not a candidate", "HWLoopNoCandidate", ORE, L);
295  return false;
296  }
297 
298  assert(
299  (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) &&
300  "Hardware Loop must have set exit info.");
301 
302  BasicBlock *Preheader = L->getLoopPreheader();
303 
304  // If we don't have a preheader, then insert one.
305  if (!Preheader)
306  Preheader = InsertPreheaderForLoop(L, DT, LI, nullptr, PreserveLCSSA);
307  if (!Preheader)
308  return false;
309 
310  HardwareLoop HWLoop(HWLoopInfo, *SE, *DL, ORE);
311  HWLoop.Create();
312  ++NumHWLoops;
313  return true;
314 }
315 
316 void HardwareLoop::Create() {
317  LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n");
318 
319  Value *LoopCountInit = InitLoopCount();
320  if (!LoopCountInit) {
321  reportHWLoopFailure("could not safely create a loop count expression",
322  "HWLoopNotSafe", ORE, L);
323  return;
324  }
325 
326  Value *Setup = InsertIterationSetup(LoopCountInit);
327 
328  if (UsePHICounter || ForceHardwareLoopPHI) {
329  Instruction *LoopDec = InsertLoopRegDec(LoopCountInit);
330  Value *EltsRem = InsertPHICounter(Setup, LoopDec);
331  LoopDec->setOperand(0, EltsRem);
332  UpdateBranch(LoopDec);
333  } else
334  InsertLoopDec();
335 
336  // Run through the basic blocks of the loop and see if any of them have dead
337  // PHIs that can be removed.
338  for (auto I : L->blocks())
339  DeleteDeadPHIs(I);
340 }
341 
342 static bool CanGenerateTest(Loop *L, Value *Count) {
343  BasicBlock *Preheader = L->getLoopPreheader();
344  if (!Preheader->getSinglePredecessor())
345  return false;
346 
347  BasicBlock *Pred = Preheader->getSinglePredecessor();
348  if (!isa<BranchInst>(Pred->getTerminator()))
349  return false;
350 
351  auto *BI = cast<BranchInst>(Pred->getTerminator());
352  if (BI->isUnconditional() || !isa<ICmpInst>(BI->getCondition()))
353  return false;
354 
355  // Check that the icmp is checking for equality of Count and zero and that
356  // a non-zero value results in entering the loop.
357  auto ICmp = cast<ICmpInst>(BI->getCondition());
358  LLVM_DEBUG(dbgs() << " - Found condition: " << *ICmp << "\n");
359  if (!ICmp->isEquality())
360  return false;
361 
362  auto IsCompareZero = [](ICmpInst *ICmp, Value *Count, unsigned OpIdx) {
363  if (auto *Const = dyn_cast<ConstantInt>(ICmp->getOperand(OpIdx)))
364  return Const->isZero() && ICmp->getOperand(OpIdx ^ 1) == Count;
365  return false;
366  };
367 
368  // Check if Count is a zext.
369  Value *CountBefZext =
370  isa<ZExtInst>(Count) ? cast<ZExtInst>(Count)->getOperand(0) : nullptr;
371 
372  if (!IsCompareZero(ICmp, Count, 0) && !IsCompareZero(ICmp, Count, 1) &&
373  !IsCompareZero(ICmp, CountBefZext, 0) &&
374  !IsCompareZero(ICmp, CountBefZext, 1))
375  return false;
376 
377  unsigned SuccIdx = ICmp->getPredicate() == ICmpInst::ICMP_NE ? 0 : 1;
378  if (BI->getSuccessor(SuccIdx) != Preheader)
379  return false;
380 
381  return true;
382 }
383 
384 Value *HardwareLoop::InitLoopCount() {
385  LLVM_DEBUG(dbgs() << "HWLoops: Initialising loop counter value:\n");
386  // Can we replace a conditional branch with an intrinsic that sets the
387  // loop counter and tests that is not zero?
388 
389  SCEVExpander SCEVE(SE, DL, "loopcnt");
390  if (!ExitCount->getType()->isPointerTy() &&
391  ExitCount->getType() != CountType)
392  ExitCount = SE.getZeroExtendExpr(ExitCount, CountType);
393 
394  ExitCount = SE.getAddExpr(ExitCount, SE.getOne(CountType));
395 
396  // If we're trying to use the 'test and set' form of the intrinsic, we need
397  // to replace a conditional branch that is controlling entry to the loop. It
398  // is likely (guaranteed?) that the preheader has an unconditional branch to
399  // the loop header, so also check if it has a single predecessor.
400  if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount,
401  SE.getZero(ExitCount->getType()))) {
402  LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n");
403  UseLoopGuard |= ForceGuardLoopEntry;
404  } else
405  UseLoopGuard = false;
406 
408  if (UseLoopGuard && BB->getSinglePredecessor() &&
409  cast<BranchInst>(BB->getTerminator())->isUnconditional()) {
410  BasicBlock *Predecessor = BB->getSinglePredecessor();
411  // If it's not safe to create a while loop then don't force it and create a
412  // do-while loop instead
413  if (!isSafeToExpandAt(ExitCount, Predecessor->getTerminator(), SE))
414  UseLoopGuard = false;
415  else
416  BB = Predecessor;
417  }
418 
419  if (!isSafeToExpandAt(ExitCount, BB->getTerminator(), SE)) {
420  LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount "
421  << *ExitCount << "\n");
422  return nullptr;
423  }
424 
425  Value *Count = SCEVE.expandCodeFor(ExitCount, CountType,
426  BB->getTerminator());
427 
428  // FIXME: We've expanded Count where we hope to insert the counter setting
429  // intrinsic. But, in the case of the 'test and set' form, we may fallback to
430  // the just 'set' form and in which case the insertion block is most likely
431  // different. It means there will be instruction(s) in a block that possibly
432  // aren't needed. The isLoopEntryGuardedByCond is trying to avoid this issue,
433  // but it's doesn't appear to work in all cases.
434 
435  UseLoopGuard = UseLoopGuard && CanGenerateTest(L, Count);
436  BeginBB = UseLoopGuard ? BB : L->getLoopPreheader();
437  LLVM_DEBUG(dbgs() << " - Loop Count: " << *Count << "\n"
438  << " - Expanded Count in " << BB->getName() << "\n"
439  << " - Will insert set counter intrinsic into: "
440  << BeginBB->getName() << "\n");
441  return Count;
442 }
443 
444 Value* HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
445  IRBuilder<> Builder(BeginBB->getTerminator());
446  Type *Ty = LoopCountInit->getType();
447  bool UsePhi = UsePHICounter || ForceHardwareLoopPHI;
448  Intrinsic::ID ID = UseLoopGuard
449  ? (UsePhi ? Intrinsic::test_start_loop_iterations
450  : Intrinsic::test_set_loop_iterations)
451  : (UsePhi ? Intrinsic::start_loop_iterations
452  : Intrinsic::set_loop_iterations);
453  Function *LoopIter = Intrinsic::getDeclaration(M, ID, Ty);
454  Value *LoopSetup = Builder.CreateCall(LoopIter, LoopCountInit);
455 
456  // Use the return value of the intrinsic to control the entry of the loop.
457  if (UseLoopGuard) {
458  assert((isa<BranchInst>(BeginBB->getTerminator()) &&
459  cast<BranchInst>(BeginBB->getTerminator())->isConditional()) &&
460  "Expected conditional branch");
461 
462  Value *SetCount =
463  UsePhi ? Builder.CreateExtractValue(LoopSetup, 1) : LoopSetup;
464  auto *LoopGuard = cast<BranchInst>(BeginBB->getTerminator());
465  LoopGuard->setCondition(SetCount);
466  if (LoopGuard->getSuccessor(0) != L->getLoopPreheader())
467  LoopGuard->swapSuccessors();
468  }
469  LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop counter: " << *LoopSetup
470  << "\n");
471  if (UsePhi && UseLoopGuard)
472  LoopSetup = Builder.CreateExtractValue(LoopSetup, 0);
473  return !UsePhi ? LoopCountInit : LoopSetup;
474 }
475 
476 void HardwareLoop::InsertLoopDec() {
477  IRBuilder<> CondBuilder(ExitBranch);
478 
479  Function *DecFunc =
480  Intrinsic::getDeclaration(M, Intrinsic::loop_decrement,
481  LoopDecrement->getType());
482  Value *Ops[] = { LoopDecrement };
483  Value *NewCond = CondBuilder.CreateCall(DecFunc, Ops);
484  Value *OldCond = ExitBranch->getCondition();
485  ExitBranch->setCondition(NewCond);
486 
487  // The false branch must exit the loop.
488  if (!L->contains(ExitBranch->getSuccessor(0)))
489  ExitBranch->swapSuccessors();
490 
491  // The old condition may be dead now, and may have even created a dead PHI
492  // (the original induction variable).
494 
495  LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *NewCond << "\n");
496 }
497 
498 Instruction* HardwareLoop::InsertLoopRegDec(Value *EltsRem) {
499  IRBuilder<> CondBuilder(ExitBranch);
500 
501  Function *DecFunc =
502  Intrinsic::getDeclaration(M, Intrinsic::loop_decrement_reg,
503  { EltsRem->getType() });
504  Value *Ops[] = { EltsRem, LoopDecrement };
505  Value *Call = CondBuilder.CreateCall(DecFunc, Ops);
506 
507  LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *Call << "\n");
508  return cast<Instruction>(Call);
509 }
510 
511 PHINode* HardwareLoop::InsertPHICounter(Value *NumElts, Value *EltsRem) {
512  BasicBlock *Preheader = L->getLoopPreheader();
513  BasicBlock *Header = L->getHeader();
514  BasicBlock *Latch = ExitBranch->getParent();
516  PHINode *Index = Builder.CreatePHI(NumElts->getType(), 2);
517  Index->addIncoming(NumElts, Preheader);
518  Index->addIncoming(EltsRem, Latch);
519  LLVM_DEBUG(dbgs() << "HWLoops: PHI Counter: " << *Index << "\n");
520  return Index;
521 }
522 
523 void HardwareLoop::UpdateBranch(Value *EltsRem) {
524  IRBuilder<> CondBuilder(ExitBranch);
525  Value *NewCond =
526  CondBuilder.CreateICmpNE(EltsRem, ConstantInt::get(EltsRem->getType(), 0));
527  Value *OldCond = ExitBranch->getCondition();
528  ExitBranch->setCondition(NewCond);
529 
530  // The false branch must exit the loop.
531  if (!L->contains(ExitBranch->getSuccessor(0)))
532  ExitBranch->swapSuccessors();
533 
534  // The old condition may be dead now, and may have even created a dead PHI
535  // (the original induction variable).
537 }
538 
539 INITIALIZE_PASS_BEGIN(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
545 
546 FunctionPass *llvm::createHardwareLoopsPass() { return new HardwareLoops(); }
llvm::TargetTransformInfo::isHardwareLoopProfitable
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
Query the target whether it would be profitable to convert the given loop into a hardware loop.
Definition: TargetTransformInfo.cpp:280
llvm::RecursivelyDeleteTriviallyDeadInstructions
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:511
AssumptionCache.h
CounterBitWidth
static cl::opt< unsigned > CounterBitWidth("hardware-loop-counter-bitwidth", cl::Hidden, cl::init(32), cl::desc("Set the loop counter bitwidth"))
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1379
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
IntrinsicInst.h
ScalarEvolutionExpander.h
Scalar.h
llvm::Function
Definition: Function.h:61
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
Pass.h
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
llvm::SCEVExpander
This class uses information about analyze scalars to rewrite expressions in canonical form.
Definition: ScalarEvolutionExpander.h:63
Statistic.h
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::IRBuilder<>
llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:633
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:461
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:742
Local.h
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::HardwareLoopInfo::ExitBranch
BranchInst * ExitBranch
Definition: TargetTransformInfo.h:100
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::HardwareLoopInfo::isHardwareLoopCandidate
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Definition: TargetTransformInfo.cpp:100
ScalarEvolution.h
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1268
llvm::HardwareLoopInfo::L
Loop * L
Definition: TargetTransformInfo.h:98
ForceNestedLoop
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
LoopDecrement
static cl::opt< unsigned > LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(1), cl::desc("Set the loop decrement value"))
llvm::BasicBlock::getSinglePredecessor
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:264
PassRegistry.h
createHWLoopAnalysis
static OptimizationRemarkAnalysis createHWLoopAnalysis(StringRef RemarkName, Loop *L, Instruction *I)
Definition: HardwareLoops.cpp:94
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
CommandLine.h
llvm::HardwareLoopInfo::ExitCount
const SCEV * ExitCount
Definition: TargetTransformInfo.h:101
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Constants.h
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
TargetLibraryInfo.h
false
Definition: StackSlotColoring.cpp:142
llvm::Instruction
Definition: Instruction.h:45
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:287
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:900
LoopUtils.h
llvm::ScalarEvolutionWrapperPass
Definition: ScalarEvolution.h:2067
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::BasicBlock::getFirstNonPHI
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:212
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:153
Utils.h
ForceHardwareLoops
static cl::opt< bool > ForceHardwareLoops("force-hardware-loops", cl::Hidden, cl::init(false), cl::desc("Force hardware loops intrinsics to be inserted"))
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
LoopInfo.h
Passes.h
llvm::InsertPreheaderForLoop
BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...
Definition: LoopSimplify.cpp:123
debugHWLoopFailure
static void debugHWLoopFailure(const StringRef DebugMsg, Instruction *I)
Definition: HardwareLoops.cpp:82
BasicBlock.h
llvm::cl::opt< bool >
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:78
llvm::ICmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1203
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2387
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::initializeHardwareLoopsPass
void initializeHardwareLoopsPass(PassRegistry &)
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:167
TargetPassConfig.h
ForceGuardLoopEntry
static cl::opt< bool > ForceGuardLoopEntry("force-hardware-loop-guard", cl::Hidden, cl::init(false), cl::desc("Force generation of loop guard intrinsic"))
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
CanGenerateTest
static bool CanGenerateTest(Loop *L, Value *Count)
Definition: HardwareLoops.cpp:342
HW_LOOPS_NAME
#define HW_LOOPS_NAME
Definition: HardwareLoops.cpp:49
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:650
llvm::User::setOperand
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:200
llvm::LoopInfo
Definition: LoopInfo.h:1083
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
DataLayout.h
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::createHardwareLoopsPass
FunctionPass * createHardwareLoopsPass()
Create Hardware Loop pass.
Definition: HardwareLoops.cpp:546
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:41
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::OptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition: DiagnosticInfo.h:776
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:148
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::LCSSAID
char & LCSSAID
Definition: LCSSA.cpp:485
llvm::OptimizationRemarkEmitterWrapperPass
OptimizationRemarkEmitter legacy analysis pass.
Definition: OptimizationRemarkEmitter.h:146
llvm::HardwareLoopInfo::ExitBlock
BasicBlock * ExitBlock
Definition: TargetTransformInfo.h:99
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:219
llvm::DeleteDeadPHIs
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
Definition: BasicBlockUtils.cpp:157
llvm::HardwareLoopInfo
Attributes of a target dependent hardware loop.
Definition: TargetTransformInfo.h:95
DEBUG_TYPE
#define DEBUG_TYPE
Definition: HardwareLoops.cpp:47
Instructions.h
llvm::isSafeToExpandAt
bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint, ScalarEvolution &SE)
Return true if the given expression is safe to expand in the sense that all materialized values are d...
Definition: ScalarEvolutionExpander.cpp:2653
INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:51
Dominators.h
llvm::orc::SimpleRemoteEPCOpcode::Setup
@ Setup
llvm::CmpInst::getPredicate
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:796
TargetTransformInfo.h
llvm::PHINode
Definition: Instructions.h:2625
ForceHardwareLoopPHI
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
llvm::IntegerType::get
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:275
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:414
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3060
BasicBlockUtils.h
Value.h
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
Debug.h
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37