LLVM  14.0.0git
MVETailPredication.cpp
Go to the documentation of this file.
1 //===- MVETailPredication.cpp - MVE Tail Predication ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Armv8.1m introduced MVE, M-Profile Vector Extension, and low-overhead
11 /// branches to help accelerate DSP applications. These two extensions,
12 /// combined with a new form of predication called tail-predication, can be used
13 /// to provide implicit vector predication within a low-overhead loop.
14 /// This is implicit because the predicate of active/inactive lanes is
15 /// calculated by hardware, and thus does not need to be explicitly passed
16 /// to vector instructions. The instructions responsible for this are the
17 /// DLSTP and WLSTP instructions, which setup a tail-predicated loop and the
18 /// the total number of data elements processed by the loop. The loop-end
19 /// LETP instruction is responsible for decrementing and setting the remaining
20 /// elements to be processed and generating the mask of active lanes.
21 ///
22 /// The HardwareLoops pass inserts intrinsics identifying loops that the
23 /// backend will attempt to convert into a low-overhead loop. The vectorizer is
24 /// responsible for generating a vectorized loop in which the lanes are
25 /// predicated upon an get.active.lane.mask intrinsic. This pass looks at these
26 /// get.active.lane.mask intrinsic and attempts to convert them to VCTP
27 /// instructions. This will be picked up by the ARM Low-overhead loop pass later
28 /// in the backend, which performs the final transformation to a DLSTP or WLSTP
29 /// tail-predicated loop.
30 //
31 //===----------------------------------------------------------------------===//
32 
33 #include "ARM.h"
34 #include "ARMSubtarget.h"
35 #include "ARMTargetTransformInfo.h"
36 #include "llvm/Analysis/LoopInfo.h"
37 #include "llvm/Analysis/LoopPass.h"
43 #include "llvm/IR/IRBuilder.h"
44 #include "llvm/IR/Instructions.h"
45 #include "llvm/IR/IntrinsicsARM.h"
46 #include "llvm/IR/PatternMatch.h"
47 #include "llvm/InitializePasses.h"
48 #include "llvm/Support/Debug.h"
53 
54 using namespace llvm;
55 
56 #define DEBUG_TYPE "mve-tail-predication"
57 #define DESC "Transform predicated vector loops to use MVE tail predication"
58 
60  "tail-predication", cl::desc("MVE tail-predication pass options"),
63  "Don't tail-predicate loops"),
65  "enabled-no-reductions",
66  "Enable tail-predication, but not for reduction loops"),
68  "enabled",
69  "Enable tail-predication, including reduction loops"),
71  "force-enabled-no-reductions",
72  "Enable tail-predication, but not for reduction loops, "
73  "and force this which might be unsafe"),
75  "force-enabled",
76  "Enable tail-predication, including reduction loops, "
77  "and force this which might be unsafe")));
78 
79 
80 namespace {
81 
82 class MVETailPredication : public LoopPass {
84  Loop *L = nullptr;
85  ScalarEvolution *SE = nullptr;
86  TargetTransformInfo *TTI = nullptr;
87  const ARMSubtarget *ST = nullptr;
88 
89 public:
90  static char ID;
91 
92  MVETailPredication() : LoopPass(ID) { }
93 
94  void getAnalysisUsage(AnalysisUsage &AU) const override {
100  AU.setPreservesCFG();
101  }
102 
103  bool runOnLoop(Loop *L, LPPassManager&) override;
104 
105 private:
106  /// Perform the relevant checks on the loop and convert active lane masks if
107  /// possible.
108  bool TryConvertActiveLaneMask(Value *TripCount);
109 
110  /// Perform several checks on the arguments of @llvm.get.active.lane.mask
111  /// intrinsic. E.g., check that the loop induction variable and the element
112  /// count are of the form we expect, and also perform overflow checks for
113  /// the new expressions that are created.
114  bool IsSafeActiveMask(IntrinsicInst *ActiveLaneMask, Value *TripCount);
115 
116  /// Insert the intrinsic to represent the effect of tail predication.
117  void InsertVCTPIntrinsic(IntrinsicInst *ActiveLaneMask, Value *TripCount);
118 
119  /// Rematerialize the iteration count in exit blocks, which enables
120  /// ARMLowOverheadLoops to better optimise away loop update statements inside
121  /// hardware-loops.
122  void RematerializeIterCount();
123 };
124 
125 } // end namespace
126 
127 bool MVETailPredication::runOnLoop(Loop *L, LPPassManager&) {
128  if (skipLoop(L) || !EnableTailPredication)
129  return false;
130 
131  MaskedInsts.clear();
132  Function &F = *L->getHeader()->getParent();
133  auto &TPC = getAnalysis<TargetPassConfig>();
134  auto &TM = TPC.getTM<TargetMachine>();
135  ST = &TM.getSubtarget<ARMSubtarget>(F);
136  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
137  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
138  this->L = L;
139 
140  // The MVE and LOB extensions are combined to enable tail-predication, but
141  // there's nothing preventing us from generating VCTP instructions for v8.1m.
142  if (!ST->hasMVEIntegerOps() || !ST->hasV8_1MMainlineOps()) {
143  LLVM_DEBUG(dbgs() << "ARM TP: Not a v8.1m.main+mve target.\n");
144  return false;
145  }
146 
147  BasicBlock *Preheader = L->getLoopPreheader();
148  if (!Preheader)
149  return false;
150 
151  auto FindLoopIterations = [](BasicBlock *BB) -> IntrinsicInst* {
152  for (auto &I : *BB) {
153  auto *Call = dyn_cast<IntrinsicInst>(&I);
154  if (!Call)
155  continue;
156 
157  Intrinsic::ID ID = Call->getIntrinsicID();
158  if (ID == Intrinsic::start_loop_iterations ||
159  ID == Intrinsic::test_start_loop_iterations)
160  return cast<IntrinsicInst>(&I);
161  }
162  return nullptr;
163  };
164 
165  // Look for the hardware loop intrinsic that sets the iteration count.
166  IntrinsicInst *Setup = FindLoopIterations(Preheader);
167 
168  // The test.set iteration could live in the pre-preheader.
169  if (!Setup) {
170  if (!Preheader->getSinglePredecessor())
171  return false;
172  Setup = FindLoopIterations(Preheader->getSinglePredecessor());
173  if (!Setup)
174  return false;
175  }
176 
177  LLVM_DEBUG(dbgs() << "ARM TP: Running on Loop: " << *L << *Setup << "\n");
178 
179  bool Changed = TryConvertActiveLaneMask(Setup->getArgOperand(0));
180 
181  return Changed;
182 }
183 
184 // The active lane intrinsic has this form:
185 //
186 // @llvm.get.active.lane.mask(IV, TC)
187 //
188 // Here we perform checks that this intrinsic behaves as expected,
189 // which means:
190 //
191 // 1) Check that the TripCount (TC) belongs to this loop (originally).
192 // 2) The element count (TC) needs to be sufficiently large that the decrement
193 // of element counter doesn't overflow, which means that we need to prove:
194 // ceil(ElementCount / VectorWidth) >= TripCount
195 // by rounding up ElementCount up:
196 // ((ElementCount + (VectorWidth - 1)) / VectorWidth
197 // and evaluate if expression isKnownNonNegative:
198 // (((ElementCount + (VectorWidth - 1)) / VectorWidth) - TripCount
199 // 3) The IV must be an induction phi with an increment equal to the
200 // vector width.
201 bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask,
202  Value *TripCount) {
203  bool ForceTailPredication =
206 
207  Value *ElemCount = ActiveLaneMask->getOperand(1);
208  bool Changed = false;
209  if (!L->makeLoopInvariant(ElemCount, Changed))
210  return false;
211 
212  auto *EC= SE->getSCEV(ElemCount);
213  auto *TC = SE->getSCEV(TripCount);
214  int VectorWidth =
215  cast<FixedVectorType>(ActiveLaneMask->getType())->getNumElements();
216  if (VectorWidth != 4 && VectorWidth != 8 && VectorWidth != 16)
217  return false;
218  ConstantInt *ConstElemCount = nullptr;
219 
220  // 1) Smoke tests that the original scalar loop TripCount (TC) belongs to
221  // this loop. The scalar tripcount corresponds the number of elements
222  // processed by the loop, so we will refer to that from this point on.
223  if (!SE->isLoopInvariant(EC, L)) {
224  LLVM_DEBUG(dbgs() << "ARM TP: element count must be loop invariant.\n");
225  return false;
226  }
227 
228  if ((ConstElemCount = dyn_cast<ConstantInt>(ElemCount))) {
229  ConstantInt *TC = dyn_cast<ConstantInt>(TripCount);
230  if (!TC) {
231  LLVM_DEBUG(dbgs() << "ARM TP: Constant tripcount expected in "
232  "set.loop.iterations\n");
233  return false;
234  }
235 
236  // Calculate 2 tripcount values and check that they are consistent with
237  // each other. The TripCount for a predicated vector loop body is
238  // ceil(ElementCount/Width), or floor((ElementCount+Width-1)/Width) as we
239  // work it out here.
240  uint64_t TC1 = TC->getZExtValue();
241  uint64_t TC2 =
242  (ConstElemCount->getZExtValue() + VectorWidth - 1) / VectorWidth;
243 
244  // If the tripcount values are inconsistent, we can't insert the VCTP and
245  // trigger tail-predication; keep the intrinsic as a get.active.lane.mask
246  // and legalize this.
247  if (TC1 != TC2) {
248  LLVM_DEBUG(dbgs() << "ARM TP: inconsistent constant tripcount values: "
249  << TC1 << " from set.loop.iterations, and "
250  << TC2 << " from get.active.lane.mask\n");
251  return false;
252  }
253  } else if (!ForceTailPredication) {
254  // 2) We need to prove that the sub expression that we create in the
255  // tail-predicated loop body, which calculates the remaining elements to be
256  // processed, is non-negative, i.e. it doesn't overflow:
257  //
258  // ((ElementCount + VectorWidth - 1) / VectorWidth) - TripCount >= 0
259  //
260  // This is true if:
261  //
262  // TripCount == (ElementCount + VectorWidth - 1) / VectorWidth
263  //
264  // which what we will be using here.
265  //
266  auto *VW = SE->getSCEV(ConstantInt::get(TripCount->getType(), VectorWidth));
267  // ElementCount + (VW-1):
268  auto *ECPlusVWMinus1 = SE->getAddExpr(EC,
269  SE->getSCEV(ConstantInt::get(TripCount->getType(), VectorWidth - 1)));
270 
271  // Ceil = ElementCount + (VW-1) / VW
272  auto *Ceil = SE->getUDivExpr(ECPlusVWMinus1, VW);
273 
274  // Prevent unused variable warnings with TC
275  (void)TC;
276  LLVM_DEBUG(
277  dbgs() << "ARM TP: Analysing overflow behaviour for:\n";
278  dbgs() << "ARM TP: - TripCount = "; TC->dump();
279  dbgs() << "ARM TP: - ElemCount = "; EC->dump();
280  dbgs() << "ARM TP: - VecWidth = " << VectorWidth << "\n";
281  dbgs() << "ARM TP: - (ElemCount+VW-1) / VW = "; Ceil->dump();
282  );
283 
284  // As an example, almost all the tripcount expressions (produced by the
285  // vectoriser) look like this:
286  //
287  // TC = ((-4 + (4 * ((3 + %N) /u 4))<nuw>) /u 4)
288  //
289  // and "ElementCount + (VW-1) / VW":
290  //
291  // Ceil = ((3 + %N) /u 4)
292  //
293  // Check for equality of TC and Ceil by calculating SCEV expression
294  // TC - Ceil and test it for zero.
295  //
296  const SCEV *Sub =
297  SE->getMinusSCEV(SE->getBackedgeTakenCount(L),
298  SE->getUDivExpr(SE->getAddExpr(SE->getMulExpr(Ceil, VW),
299  SE->getNegativeSCEV(VW)),
300  VW));
301 
302  // Use context sensitive facts about the path to the loop to refine. This
303  // comes up as the backedge taken count can incorporate context sensitive
304  // reasoning, and our RHS just above doesn't.
305  Sub = SE->applyLoopGuards(Sub, L);
306 
307  if (!Sub->isZero()) {
308  LLVM_DEBUG(dbgs() << "ARM TP: possible overflow in sub expression.\n");
309  return false;
310  }
311  }
312 
313  // 3) Find out if IV is an induction phi. Note that we can't use Loop
314  // helpers here to get the induction variable, because the hardware loop is
315  // no longer in loopsimplify form, and also the hwloop intrinsic uses a
316  // different counter. Using SCEV, we check that the induction is of the
317  // form i = i + 4, where the increment must be equal to the VectorWidth.
318  auto *IV = ActiveLaneMask->getOperand(0);
319  auto *IVExpr = SE->getSCEV(IV);
320  auto *AddExpr = dyn_cast<SCEVAddRecExpr>(IVExpr);
321 
322  if (!AddExpr) {
323  LLVM_DEBUG(dbgs() << "ARM TP: induction not an add expr: "; IVExpr->dump());
324  return false;
325  }
326  // Check that this AddRec is associated with this loop.
327  if (AddExpr->getLoop() != L) {
328  LLVM_DEBUG(dbgs() << "ARM TP: phi not part of this loop\n");
329  return false;
330  }
331  auto *Base = dyn_cast<SCEVConstant>(AddExpr->getOperand(0));
332  if (!Base || !Base->isZero()) {
333  LLVM_DEBUG(dbgs() << "ARM TP: induction base is not 0\n");
334  return false;
335  }
336  auto *Step = dyn_cast<SCEVConstant>(AddExpr->getOperand(1));
337  if (!Step) {
338  LLVM_DEBUG(dbgs() << "ARM TP: induction step is not a constant: ";
339  AddExpr->getOperand(1)->dump());
340  return false;
341  }
342  auto StepValue = Step->getValue()->getSExtValue();
343  if (VectorWidth == StepValue)
344  return true;
345 
346  LLVM_DEBUG(dbgs() << "ARM TP: Step value " << StepValue
347  << " doesn't match vector width " << VectorWidth << "\n");
348 
349  return false;
350 }
351 
352 void MVETailPredication::InsertVCTPIntrinsic(IntrinsicInst *ActiveLaneMask,
353  Value *TripCount) {
355  Module *M = L->getHeader()->getModule();
356  Type *Ty = IntegerType::get(M->getContext(), 32);
357  unsigned VectorWidth =
358  cast<FixedVectorType>(ActiveLaneMask->getType())->getNumElements();
359 
360  // Insert a phi to count the number of elements processed by the loop.
361  Builder.SetInsertPoint(L->getHeader()->getFirstNonPHI());
362  PHINode *Processed = Builder.CreatePHI(Ty, 2);
363  Processed->addIncoming(ActiveLaneMask->getOperand(1), L->getLoopPreheader());
364 
365  // Replace @llvm.get.active.mask() with the ARM specific VCTP intrinic, and
366  // thus represent the effect of tail predication.
367  Builder.SetInsertPoint(ActiveLaneMask);
368  ConstantInt *Factor = ConstantInt::get(cast<IntegerType>(Ty), VectorWidth);
369 
370  Intrinsic::ID VCTPID;
371  switch (VectorWidth) {
372  default:
373  llvm_unreachable("unexpected number of lanes");
374  case 4: VCTPID = Intrinsic::arm_mve_vctp32; break;
375  case 8: VCTPID = Intrinsic::arm_mve_vctp16; break;
376  case 16: VCTPID = Intrinsic::arm_mve_vctp8; break;
377 
378  // FIXME: vctp64 currently not supported because the predicate
379  // vector wants to be <2 x i1>, but v2i1 is not a legal MVE
380  // type, so problems happen at isel time.
381  // Intrinsic::arm_mve_vctp64 exists for ACLE intrinsics
382  // purposes, but takes a v4i1 instead of a v2i1.
383  }
384  Function *VCTP = Intrinsic::getDeclaration(M, VCTPID);
385  Value *VCTPCall = Builder.CreateCall(VCTP, Processed);
386  ActiveLaneMask->replaceAllUsesWith(VCTPCall);
387 
388  // Add the incoming value to the new phi.
389  // TODO: This add likely already exists in the loop.
390  Value *Remaining = Builder.CreateSub(Processed, Factor);
391  Processed->addIncoming(Remaining, L->getLoopLatch());
392  LLVM_DEBUG(dbgs() << "ARM TP: Insert processed elements phi: "
393  << *Processed << "\n"
394  << "ARM TP: Inserted VCTP: " << *VCTPCall << "\n");
395 }
396 
397 bool MVETailPredication::TryConvertActiveLaneMask(Value *TripCount) {
398  SmallVector<IntrinsicInst *, 4> ActiveLaneMasks;
399  for (auto *BB : L->getBlocks())
400  for (auto &I : *BB)
401  if (auto *Int = dyn_cast<IntrinsicInst>(&I))
402  if (Int->getIntrinsicID() == Intrinsic::get_active_lane_mask)
403  ActiveLaneMasks.push_back(Int);
404 
405  if (ActiveLaneMasks.empty())
406  return false;
407 
408  LLVM_DEBUG(dbgs() << "ARM TP: Found predicated vector loop.\n");
409 
410  for (auto *ActiveLaneMask : ActiveLaneMasks) {
411  LLVM_DEBUG(dbgs() << "ARM TP: Found active lane mask: "
412  << *ActiveLaneMask << "\n");
413 
414  if (!IsSafeActiveMask(ActiveLaneMask, TripCount)) {
415  LLVM_DEBUG(dbgs() << "ARM TP: Not safe to insert VCTP.\n");
416  return false;
417  }
418  LLVM_DEBUG(dbgs() << "ARM TP: Safe to insert VCTP.\n");
419  InsertVCTPIntrinsic(ActiveLaneMask, TripCount);
420  }
421 
422  // Remove dead instructions and now dead phis.
423  for (auto *II : ActiveLaneMasks)
425  for (auto I : L->blocks())
426  DeleteDeadPHIs(I);
427  return true;
428 }
429 
431  return new MVETailPredication();
432 }
433 
434 char MVETailPredication::ID = 0;
435 
436 INITIALIZE_PASS_BEGIN(MVETailPredication, DEBUG_TYPE, DESC, false, false)
437 INITIALIZE_PASS_END(MVETailPredication, DEBUG_TYPE, DESC, false, false)
ARMSubtarget.h
llvm::RecursivelyDeleteTriviallyDeadInstructions
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:511
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::TailPredication::Disabled
@ Disabled
Definition: ARMTargetTransformInfo.h:43
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1379
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
ScalarEvolutionExpander.h
llvm::ARMSubtarget
Definition: ARMSubtarget.h:46
llvm::Function
Definition: Function.h:62
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::Value::dump
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:4807
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::IRBuilder<>
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:460
Local.h
ScalarEvolution.h
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1268
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:876
llvm::BasicBlock::getSinglePredecessor
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:268
llvm::TailPredication::ForceEnabled
@ ForceEnabled
Definition: ARMTargetTransformInfo.h:47
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::TailPredication::Enabled
@ Enabled
Definition: ARMTargetTransformInfo.h:45
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
TargetLibraryInfo.h
false
Definition: StackSlotColoring.cpp:142
llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:171
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:925
LoopUtils.h
llvm::ScalarEvolutionWrapperPass
Definition: ScalarEvolution.h:2084
llvm::LPPassManager
Definition: LoopPass.h:75
llvm::BasicBlock::getModule
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:148
PatternMatch.h
llvm::BasicBlock::getFirstNonPHI
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:216
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:153
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
LoopInfo.h
llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition: TargetPassConfig.h:84
llvm::cl::opt
Definition: CommandLine.h:1432
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:77
llvm::cl::values
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:697
uint64_t
llvm::LoopPass
Definition: LoopPass.h:27
DEBUG_TYPE
#define DEBUG_TYPE
Definition: MVETailPredication.cpp:56
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2387
llvm::tgtok::Int
@ Int
Definition: TGLexer.h:51
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2783
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:167
TargetPassConfig.h
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:216
IRBuilder.h
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:79
llvm::codeview::CompileSym2Flags::EC
@ EC
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
ARM.h
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:650
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
llvm::Loop::makeLoopInvariant
bool makeLoopInvariant(Value *V, bool &Changed, Instruction *InsertPt=nullptr, MemorySSAUpdater *MSSAU=nullptr) const
If the given value is an instruction inside of the loop and it can be hoisted, do so to make it trivi...
Definition: LoopInfo.cpp:74
LoopPass.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
clEnumValN
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:672
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:152
EnableTailPredication
cl::opt< TailPredication::Mode > EnableTailPredication("tail-predication", cl::desc("MVE tail-predication pass options"), cl::init(TailPredication::Enabled), cl::values(clEnumValN(TailPredication::Disabled, "disabled", "Don't tail-predicate loops"), clEnumValN(TailPredication::EnabledNoReductions, "enabled-no-reductions", "Enable tail-predication, but not for reduction loops"), clEnumValN(TailPredication::Enabled, "enabled", "Enable tail-predication, including reduction loops"), clEnumValN(TailPredication::ForceEnabledNoReductions, "force-enabled-no-reductions", "Enable tail-predication, but not for reduction loops, " "and force this which might be unsafe"), clEnumValN(TailPredication::ForceEnabled, "force-enabled", "Enable tail-predication, including reduction loops, " "and force this which might be unsafe")))
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:142
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
DESC
#define DESC
Definition: MVETailPredication.cpp:57
llvm::createMVETailPredicationPass
Pass * createMVETailPredicationPass()
Definition: MVETailPredication.cpp:430
llvm::DeleteDeadPHIs
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
Definition: BasicBlockUtils.cpp:157
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:45
ScalarEvolutionExpressions.h
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
Instructions.h
INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:51
llvm::TailPredication::EnabledNoReductions
@ EnabledNoReductions
Definition: ARMTargetTransformInfo.h:44
llvm::orc::SimpleRemoteEPCOpcode::Setup
@ Setup
TargetTransformInfo.h
ARMTargetTransformInfo.h
llvm::PHINode
Definition: Instructions.h:2633
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::IntegerType::get
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:313
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:412
BasicBlockUtils.h
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
Debug.h
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37
llvm::TailPredication::ForceEnabledNoReductions
@ ForceEnabledNoReductions
Definition: ARMTargetTransformInfo.h:46