LLVM  16.0.0git
PPCLoopInstrFormPrep.cpp
Go to the documentation of this file.
1 //===------ PPCLoopInstrFormPrep.cpp - Loop Instr Form Prep Pass ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a pass to prepare loops for ppc preferred addressing
10 // modes, leveraging different instruction form. (eg: DS/DQ form, D/DS form with
11 // update)
12 // Additional PHIs are created for loop induction variables used by load/store
13 // instructions so that preferred addressing modes can be used.
14 //
15 // 1: DS/DQ form preparation, prepare the load/store instructions so that they
16 // can satisfy the DS/DQ form displacement requirements.
17 // Generically, this means transforming loops like this:
18 // for (int i = 0; i < n; ++i) {
19 // unsigned long x1 = *(unsigned long *)(p + i + 5);
20 // unsigned long x2 = *(unsigned long *)(p + i + 9);
21 // }
22 //
23 // to look like this:
24 //
25 // unsigned NewP = p + 5;
26 // for (int i = 0; i < n; ++i) {
27 // unsigned long x1 = *(unsigned long *)(i + NewP);
28 // unsigned long x2 = *(unsigned long *)(i + NewP + 4);
29 // }
30 //
31 // 2: D/DS form with update preparation, prepare the load/store instructions so
32 // that we can use update form to do pre-increment.
33 // Generically, this means transforming loops like this:
34 // for (int i = 0; i < n; ++i)
35 // array[i] = c;
36 //
37 // to look like this:
38 //
39 // T *p = array[-1];
40 // for (int i = 0; i < n; ++i)
41 // *++p = c;
42 //
43 // 3: common multiple chains for the load/stores with same offsets in the loop,
44 // so that we can reuse the offsets and reduce the register pressure in the
45 // loop. This transformation can also increase the loop ILP as now each chain
46 // uses its own loop induction add/addi. But this will increase the number of
47 // add/addi in the loop.
48 //
49 // Generically, this means transforming loops like this:
50 //
51 // char *p;
52 // A1 = p + base1
53 // A2 = p + base1 + offset
54 // B1 = p + base2
55 // B2 = p + base2 + offset
56 //
57 // for (int i = 0; i < n; i++)
58 // unsigned long x1 = *(unsigned long *)(A1 + i);
59 // unsigned long x2 = *(unsigned long *)(A2 + i)
60 // unsigned long x3 = *(unsigned long *)(B1 + i);
61 // unsigned long x4 = *(unsigned long *)(B2 + i);
62 // }
63 //
64 // to look like this:
65 //
66 // A1_new = p + base1 // chain 1
67 // B1_new = p + base2 // chain 2, now inside the loop, common offset is
68 // // reused.
69 //
70 // for (long long i = 0; i < n; i+=count) {
71 // unsigned long x1 = *(unsigned long *)(A1_new + i);
72 // unsigned long x2 = *(unsigned long *)((A1_new + i) + offset);
73 // unsigned long x3 = *(unsigned long *)(B1_new + i);
74 // unsigned long x4 = *(unsigned long *)((B1_new + i) + offset);
75 // }
76 //===----------------------------------------------------------------------===//
77 
78 #include "PPC.h"
79 #include "PPCSubtarget.h"
80 #include "PPCTargetMachine.h"
82 #include "llvm/ADT/SmallPtrSet.h"
83 #include "llvm/ADT/SmallSet.h"
84 #include "llvm/ADT/SmallVector.h"
85 #include "llvm/ADT/Statistic.h"
86 #include "llvm/Analysis/LoopInfo.h"
89 #include "llvm/IR/BasicBlock.h"
90 #include "llvm/IR/CFG.h"
91 #include "llvm/IR/Dominators.h"
92 #include "llvm/IR/Instruction.h"
93 #include "llvm/IR/Instructions.h"
94 #include "llvm/IR/IntrinsicInst.h"
95 #include "llvm/IR/IntrinsicsPowerPC.h"
96 #include "llvm/IR/Module.h"
97 #include "llvm/IR/Type.h"
98 #include "llvm/IR/Value.h"
99 #include "llvm/InitializePasses.h"
100 #include "llvm/Pass.h"
101 #include "llvm/Support/Casting.h"
103 #include "llvm/Support/Debug.h"
104 #include "llvm/Transforms/Scalar.h"
105 #include "llvm/Transforms/Utils.h"
110 #include <cassert>
111 #include <cmath>
112 #include <iterator>
113 #include <utility>
114 
115 #define DEBUG_TYPE "ppc-loop-instr-form-prep"
116 
117 using namespace llvm;
118 
119 static cl::opt<unsigned>
120  MaxVarsPrep("ppc-formprep-max-vars", cl::Hidden, cl::init(24),
121  cl::desc("Potential common base number threshold per function "
122  "for PPC loop prep"));
123 
124 static cl::opt<bool> PreferUpdateForm("ppc-formprep-prefer-update",
125  cl::init(true), cl::Hidden,
126  cl::desc("prefer update form when ds form is also a update form"));
127 
129  "ppc-formprep-update-nonconst-inc", cl::init(false), cl::Hidden,
130  cl::desc("prepare update form when the load/store increment is a loop "
131  "invariant non-const value."));
132 
134  "ppc-formprep-chain-commoning", cl::init(false), cl::Hidden,
135  cl::desc("Enable chain commoning in PPC loop prepare pass."));
136 
137 // Sum of following 3 per loop thresholds for all loops can not be larger
138 // than MaxVarsPrep.
139 // now the thresholds for each kind prep are exterimental values on Power9.
140 static cl::opt<unsigned> MaxVarsUpdateForm("ppc-preinc-prep-max-vars",
141  cl::Hidden, cl::init(3),
142  cl::desc("Potential PHI threshold per loop for PPC loop prep of update "
143  "form"));
144 
145 static cl::opt<unsigned> MaxVarsDSForm("ppc-dsprep-max-vars",
146  cl::Hidden, cl::init(3),
147  cl::desc("Potential PHI threshold per loop for PPC loop prep of DS form"));
148 
149 static cl::opt<unsigned> MaxVarsDQForm("ppc-dqprep-max-vars",
150  cl::Hidden, cl::init(8),
151  cl::desc("Potential PHI threshold per loop for PPC loop prep of DQ form"));
152 
153 // Commoning chain will reduce the register pressure, so we don't consider about
154 // the PHI nodes number.
155 // But commoning chain will increase the addi/add number in the loop and also
156 // increase loop ILP. Maximum chain number should be same with hardware
157 // IssueWidth, because we won't benefit from ILP if the parallel chains number
158 // is bigger than IssueWidth. We assume there are 2 chains in one bucket, so
159 // there would be 4 buckets at most on P9(IssueWidth is 8).
161  "ppc-chaincommon-max-vars", cl::Hidden, cl::init(4),
162  cl::desc("Bucket number per loop for PPC loop chain common"));
163 
164 // If would not be profitable if the common base has only one load/store, ISEL
165 // should already be able to choose best load/store form based on offset for
166 // single load/store. Set minimal profitable value default to 2 and make it as
167 // an option.
168 static cl::opt<unsigned> DispFormPrepMinThreshold("ppc-dispprep-min-threshold",
169  cl::Hidden, cl::init(2),
170  cl::desc("Minimal common base load/store instructions triggering DS/DQ form "
171  "preparation"));
172 
174  "ppc-chaincommon-min-threshold", cl::Hidden, cl::init(4),
175  cl::desc("Minimal common base load/store instructions triggering chain "
176  "commoning preparation. Must be not smaller than 4"));
177 
178 STATISTIC(PHINodeAlreadyExistsUpdate, "PHI node already in pre-increment form");
179 STATISTIC(PHINodeAlreadyExistsDS, "PHI node already in DS form");
180 STATISTIC(PHINodeAlreadyExistsDQ, "PHI node already in DQ form");
181 STATISTIC(DSFormChainRewritten, "Num of DS form chain rewritten");
182 STATISTIC(DQFormChainRewritten, "Num of DQ form chain rewritten");
183 STATISTIC(UpdFormChainRewritten, "Num of update form chain rewritten");
184 STATISTIC(ChainCommoningRewritten, "Num of commoning chains");
185 
186 namespace {
187  struct BucketElement {
188  BucketElement(const SCEV *O, Instruction *I) : Offset(O), Instr(I) {}
189  BucketElement(Instruction *I) : Offset(nullptr), Instr(I) {}
190 
191  const SCEV *Offset;
192  Instruction *Instr;
193  };
194 
195  struct Bucket {
196  Bucket(const SCEV *B, Instruction *I)
197  : BaseSCEV(B), Elements(1, BucketElement(I)) {
198  ChainSize = 0;
199  }
200 
201  // The base of the whole bucket.
202  const SCEV *BaseSCEV;
203 
204  // All elements in the bucket. In the bucket, the element with the BaseSCEV
205  // has no offset and all other elements are stored as offsets to the
206  // BaseSCEV.
208 
209  // The potential chains size. This is used for chain commoning only.
210  unsigned ChainSize;
211 
212  // The base for each potential chain. This is used for chain commoning only.
214  };
215 
216  // "UpdateForm" is not a real PPC instruction form, it stands for dform
217  // load/store with update like ldu/stdu, or Prefetch intrinsic.
218  // For DS form instructions, their displacements must be multiple of 4.
219  // For DQ form instructions, their displacements must be multiple of 16.
220  enum PrepForm { UpdateForm = 1, DSForm = 4, DQForm = 16, ChainCommoning };
221 
222  class PPCLoopInstrFormPrep : public FunctionPass {
223  public:
224  static char ID; // Pass ID, replacement for typeid
225 
226  PPCLoopInstrFormPrep() : FunctionPass(ID) {
228  }
229 
230  PPCLoopInstrFormPrep(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) {
232  }
233 
234  void getAnalysisUsage(AnalysisUsage &AU) const override {
239  }
240 
241  bool runOnFunction(Function &F) override;
242 
243  private:
244  PPCTargetMachine *TM = nullptr;
245  const PPCSubtarget *ST;
246  DominatorTree *DT;
247  LoopInfo *LI;
248  ScalarEvolution *SE;
249  bool PreserveLCSSA;
250  bool HasCandidateForPrepare;
251 
252  /// Successful preparation number for Update/DS/DQ form in all inner most
253  /// loops. One successful preparation will put one common base out of loop,
254  /// this may leads to register presure like LICM does.
255  /// Make sure total preparation number can be controlled by option.
256  unsigned SuccPrepCount;
257 
258  bool runOnLoop(Loop *L);
259 
260  /// Check if required PHI node is already exist in Loop \p L.
261  bool alreadyPrepared(Loop *L, Instruction *MemI,
262  const SCEV *BasePtrStartSCEV,
263  const SCEV *BasePtrIncSCEV, PrepForm Form);
264 
265  /// Get the value which defines the increment SCEV \p BasePtrIncSCEV.
266  Value *getNodeForInc(Loop *L, Instruction *MemI,
267  const SCEV *BasePtrIncSCEV);
268 
269  /// Common chains to reuse offsets for a loop to reduce register pressure.
270  bool chainCommoning(Loop *L, SmallVector<Bucket, 16> &Buckets);
271 
272  /// Find out the potential commoning chains and their bases.
273  bool prepareBasesForCommoningChains(Bucket &BucketChain);
274 
275  /// Rewrite load/store according to the common chains.
276  bool
277  rewriteLoadStoresForCommoningChains(Loop *L, Bucket &Bucket,
278  SmallSet<BasicBlock *, 16> &BBChanged);
279 
280  /// Collect condition matched(\p isValidCandidate() returns true)
281  /// candidates in Loop \p L.
282  SmallVector<Bucket, 16> collectCandidates(
283  Loop *L,
284  std::function<bool(const Instruction *, Value *, const Type *)>
285  isValidCandidate,
286  std::function<bool(const SCEV *)> isValidDiff,
287  unsigned MaxCandidateNum);
288 
289  /// Add a candidate to candidates \p Buckets if diff between candidate and
290  /// one base in \p Buckets matches \p isValidDiff.
291  void addOneCandidate(Instruction *MemI, const SCEV *LSCEV,
292  SmallVector<Bucket, 16> &Buckets,
293  std::function<bool(const SCEV *)> isValidDiff,
294  unsigned MaxCandidateNum);
295 
296  /// Prepare all candidates in \p Buckets for update form.
297  bool updateFormPrep(Loop *L, SmallVector<Bucket, 16> &Buckets);
298 
299  /// Prepare all candidates in \p Buckets for displacement form, now for
300  /// ds/dq.
301  bool dispFormPrep(Loop *L, SmallVector<Bucket, 16> &Buckets, PrepForm Form);
302 
303  /// Prepare for one chain \p BucketChain, find the best base element and
304  /// update all other elements in \p BucketChain accordingly.
305  /// \p Form is used to find the best base element.
306  /// If success, best base element must be stored as the first element of
307  /// \p BucketChain.
308  /// Return false if no base element found, otherwise return true.
309  bool prepareBaseForDispFormChain(Bucket &BucketChain, PrepForm Form);
310 
311  /// Prepare for one chain \p BucketChain, find the best base element and
312  /// update all other elements in \p BucketChain accordingly.
313  /// If success, best base element must be stored as the first element of
314  /// \p BucketChain.
315  /// Return false if no base element found, otherwise return true.
316  bool prepareBaseForUpdateFormChain(Bucket &BucketChain);
317 
318  /// Rewrite load/store instructions in \p BucketChain according to
319  /// preparation.
320  bool rewriteLoadStores(Loop *L, Bucket &BucketChain,
321  SmallSet<BasicBlock *, 16> &BBChanged,
322  PrepForm Form);
323 
324  /// Rewrite for the base load/store of a chain.
325  std::pair<Instruction *, Instruction *>
326  rewriteForBase(Loop *L, const SCEVAddRecExpr *BasePtrSCEV,
327  Instruction *BaseMemI, bool CanPreInc, PrepForm Form,
328  SCEVExpander &SCEVE, SmallPtrSet<Value *, 16> &DeletedPtrs);
329 
330  /// Rewrite for the other load/stores of a chain according to the new \p
331  /// Base.
332  Instruction *
333  rewriteForBucketElement(std::pair<Instruction *, Instruction *> Base,
334  const BucketElement &Element, Value *OffToBase,
335  SmallPtrSet<Value *, 16> &DeletedPtrs);
336  };
337 
338 } // end anonymous namespace
339 
340 char PPCLoopInstrFormPrep::ID = 0;
341 static const char *name = "Prepare loop for ppc preferred instruction forms";
342 INITIALIZE_PASS_BEGIN(PPCLoopInstrFormPrep, DEBUG_TYPE, name, false, false)
345 INITIALIZE_PASS_END(PPCLoopInstrFormPrep, DEBUG_TYPE, name, false, false)
346 
347 static constexpr StringRef PHINodeNameSuffix = ".phi";
348 static constexpr StringRef CastNodeNameSuffix = ".cast";
349 static constexpr StringRef GEPNodeIncNameSuffix = ".inc";
350 static constexpr StringRef GEPNodeOffNameSuffix = ".off";
351 
353  return new PPCLoopInstrFormPrep(TM);
354 }
355 
356 static bool IsPtrInBounds(Value *BasePtr) {
357  Value *StrippedBasePtr = BasePtr;
358  while (BitCastInst *BC = dyn_cast<BitCastInst>(StrippedBasePtr))
359  StrippedBasePtr = BC->getOperand(0);
360  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(StrippedBasePtr))
361  return GEP->isInBounds();
362 
363  return false;
364 }
365 
366 static std::string getInstrName(const Value *I, StringRef Suffix) {
367  assert(I && "Invalid paramater!");
368  if (I->hasName())
369  return (I->getName() + Suffix).str();
370  else
371  return "";
372 }
373 
375  Type **PtrElementType = nullptr) {
376 
377  Value *PtrValue = nullptr;
378  Type *PointerElementType = nullptr;
379 
380  if (LoadInst *LMemI = dyn_cast<LoadInst>(MemI)) {
381  PtrValue = LMemI->getPointerOperand();
382  PointerElementType = LMemI->getType();
383  } else if (StoreInst *SMemI = dyn_cast<StoreInst>(MemI)) {
384  PtrValue = SMemI->getPointerOperand();
385  PointerElementType = SMemI->getValueOperand()->getType();
386  } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(MemI)) {
387  PointerElementType = Type::getInt8Ty(MemI->getContext());
388  if (IMemI->getIntrinsicID() == Intrinsic::prefetch ||
389  IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp) {
390  PtrValue = IMemI->getArgOperand(0);
391  } else if (IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp) {
392  PtrValue = IMemI->getArgOperand(1);
393  }
394  }
395  /*Get ElementType if PtrElementType is not null.*/
396  if (PtrElementType)
397  *PtrElementType = PointerElementType;
398 
399  return PtrValue;
400 }
401 
403  if (skipFunction(F))
404  return false;
405 
406  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
407  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
408  auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
409  DT = DTWP ? &DTWP->getDomTree() : nullptr;
410  PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
411  ST = TM ? TM->getSubtargetImpl(F) : nullptr;
412  SuccPrepCount = 0;
413 
414  bool MadeChange = false;
415 
416  for (Loop *I : *LI)
417  for (Loop *L : depth_first(I))
418  MadeChange |= runOnLoop(L);
419 
420  return MadeChange;
421 }
422 
423 // Finding the minimal(chain_number + reusable_offset_number) is a complicated
424 // algorithmic problem.
425 // For now, the algorithm used here is simply adjusted to handle the case for
426 // manually unrolling cases.
427 // FIXME: use a more powerful algorithm to find minimal sum of chain_number and
428 // reusable_offset_number for one base with multiple offsets.
429 bool PPCLoopInstrFormPrep::prepareBasesForCommoningChains(Bucket &CBucket) {
430  // The minimal size for profitable chain commoning:
431  // A1 = base + offset1
432  // A2 = base + offset2 (offset2 - offset1 = X)
433  // A3 = base + offset3
434  // A4 = base + offset4 (offset4 - offset3 = X)
435  // ======>
436  // base1 = base + offset1
437  // base2 = base + offset3
438  // A1 = base1
439  // A2 = base1 + X
440  // A3 = base2
441  // A4 = base2 + X
442  //
443  // There is benefit because of reuse of offest 'X'.
444 
446  "Thredhold can not be smaller than 4!\n");
447  if (CBucket.Elements.size() < ChainCommonPrepMinThreshold)
448  return false;
449 
450  // We simply select the FirstOffset as the first reusable offset between each
451  // chain element 1 and element 0.
452  const SCEV *FirstOffset = CBucket.Elements[1].Offset;
453 
454  // Figure out how many times above FirstOffset is used in the chain.
455  // For a success commoning chain candidate, offset difference between each
456  // chain element 1 and element 0 must be also FirstOffset.
457  unsigned FirstOffsetReusedCount = 1;
458 
459  // Figure out how many times above FirstOffset is used in the first chain.
460  // Chain number is FirstOffsetReusedCount / FirstOffsetReusedCountInFirstChain
461  unsigned FirstOffsetReusedCountInFirstChain = 1;
462 
463  unsigned EleNum = CBucket.Elements.size();
464  bool SawChainSeparater = false;
465  for (unsigned j = 2; j != EleNum; ++j) {
466  if (SE->getMinusSCEV(CBucket.Elements[j].Offset,
467  CBucket.Elements[j - 1].Offset) == FirstOffset) {
468  if (!SawChainSeparater)
469  FirstOffsetReusedCountInFirstChain++;
470  FirstOffsetReusedCount++;
471  } else
472  // For now, if we meet any offset which is not FirstOffset, we assume we
473  // find a new Chain.
474  // This makes us miss some opportunities.
475  // For example, we can common:
476  //
477  // {OffsetA, Offset A, OffsetB, OffsetA, OffsetA, OffsetB}
478  //
479  // as two chains:
480  // {{OffsetA, Offset A, OffsetB}, {OffsetA, OffsetA, OffsetB}}
481  // FirstOffsetReusedCount = 4; FirstOffsetReusedCountInFirstChain = 2
482  //
483  // But we fail to common:
484  //
485  // {OffsetA, OffsetB, OffsetA, OffsetA, OffsetB, OffsetA}
486  // FirstOffsetReusedCount = 4; FirstOffsetReusedCountInFirstChain = 1
487 
488  SawChainSeparater = true;
489  }
490 
491  // FirstOffset is not reused, skip this bucket.
492  if (FirstOffsetReusedCount == 1)
493  return false;
494 
495  unsigned ChainNum =
496  FirstOffsetReusedCount / FirstOffsetReusedCountInFirstChain;
497 
498  // All elements are increased by FirstOffset.
499  // The number of chains should be sqrt(EleNum).
500  if (!SawChainSeparater)
501  ChainNum = (unsigned)sqrt((double)EleNum);
502 
503  CBucket.ChainSize = (unsigned)(EleNum / ChainNum);
504 
505  // If this is not a perfect chain(eg: not all elements can be put inside
506  // commoning chains.), skip now.
507  if (CBucket.ChainSize * ChainNum != EleNum)
508  return false;
509 
510  if (SawChainSeparater) {
511  // Check that the offset seqs are the same for all chains.
512  for (unsigned i = 1; i < CBucket.ChainSize; i++)
513  for (unsigned j = 1; j < ChainNum; j++)
514  if (CBucket.Elements[i].Offset !=
515  SE->getMinusSCEV(CBucket.Elements[i + j * CBucket.ChainSize].Offset,
516  CBucket.Elements[j * CBucket.ChainSize].Offset))
517  return false;
518  }
519 
520  for (unsigned i = 0; i < ChainNum; i++)
521  CBucket.ChainBases.push_back(CBucket.Elements[i * CBucket.ChainSize]);
522 
523  LLVM_DEBUG(dbgs() << "Bucket has " << ChainNum << " chains.\n");
524 
525  return true;
526 }
527 
528 bool PPCLoopInstrFormPrep::chainCommoning(Loop *L,
529  SmallVector<Bucket, 16> &Buckets) {
530  bool MadeChange = false;
531 
532  if (Buckets.empty())
533  return MadeChange;
534 
535  SmallSet<BasicBlock *, 16> BBChanged;
536 
537  for (auto &Bucket : Buckets) {
538  if (prepareBasesForCommoningChains(Bucket))
539  MadeChange |= rewriteLoadStoresForCommoningChains(L, Bucket, BBChanged);
540  }
541 
542  if (MadeChange)
543  for (auto *BB : BBChanged)
545  return MadeChange;
546 }
547 
548 bool PPCLoopInstrFormPrep::rewriteLoadStoresForCommoningChains(
549  Loop *L, Bucket &Bucket, SmallSet<BasicBlock *, 16> &BBChanged) {
550  bool MadeChange = false;
551 
552  assert(Bucket.Elements.size() ==
553  Bucket.ChainBases.size() * Bucket.ChainSize &&
554  "invalid bucket for chain commoning!\n");
555  SmallPtrSet<Value *, 16> DeletedPtrs;
556 
557  BasicBlock *Header = L->getHeader();
558  BasicBlock *LoopPredecessor = L->getLoopPredecessor();
559 
560  SCEVExpander SCEVE(*SE, Header->getModule()->getDataLayout(),
561  "loopprepare-chaincommon");
562 
563  for (unsigned ChainIdx = 0; ChainIdx < Bucket.ChainBases.size(); ++ChainIdx) {
564  unsigned BaseElemIdx = Bucket.ChainSize * ChainIdx;
565  const SCEV *BaseSCEV =
566  ChainIdx ? SE->getAddExpr(Bucket.BaseSCEV,
567  Bucket.Elements[BaseElemIdx].Offset)
568  : Bucket.BaseSCEV;
569  const SCEVAddRecExpr *BasePtrSCEV = cast<SCEVAddRecExpr>(BaseSCEV);
570 
571  // Make sure the base is able to expand.
572  if (!SCEVE.isSafeToExpand(BasePtrSCEV->getStart()))
573  return MadeChange;
574 
575  assert(BasePtrSCEV->isAffine() &&
576  "Invalid SCEV type for the base ptr for a candidate chain!\n");
577 
578  std::pair<Instruction *, Instruction *> Base = rewriteForBase(
579  L, BasePtrSCEV, Bucket.Elements[BaseElemIdx].Instr,
580  false /* CanPreInc */, ChainCommoning, SCEVE, DeletedPtrs);
581 
582  if (!Base.first || !Base.second)
583  return MadeChange;
584 
585  // Keep track of the replacement pointer values we've inserted so that we
586  // don't generate more pointer values than necessary.
587  SmallPtrSet<Value *, 16> NewPtrs;
588  NewPtrs.insert(Base.first);
589 
590  for (unsigned Idx = BaseElemIdx + 1; Idx < BaseElemIdx + Bucket.ChainSize;
591  ++Idx) {
592  BucketElement &I = Bucket.Elements[Idx];
594  assert(Ptr && "No pointer operand");
595  if (NewPtrs.count(Ptr))
596  continue;
597 
598  const SCEV *OffsetSCEV =
599  BaseElemIdx ? SE->getMinusSCEV(Bucket.Elements[Idx].Offset,
600  Bucket.Elements[BaseElemIdx].Offset)
601  : Bucket.Elements[Idx].Offset;
602 
603  // Make sure offset is able to expand. Only need to check one time as the
604  // offsets are reused between different chains.
605  if (!BaseElemIdx)
606  if (!SCEVE.isSafeToExpand(OffsetSCEV))
607  return false;
608 
609  Value *OffsetValue = SCEVE.expandCodeFor(
610  OffsetSCEV, OffsetSCEV->getType(), LoopPredecessor->getTerminator());
611 
612  Instruction *NewPtr = rewriteForBucketElement(Base, Bucket.Elements[Idx],
613  OffsetValue, DeletedPtrs);
614 
615  assert(NewPtr && "Wrong rewrite!\n");
616  NewPtrs.insert(NewPtr);
617  }
618 
619  ++ChainCommoningRewritten;
620  }
621 
622  // Clear the rewriter cache, because values that are in the rewriter's cache
623  // can be deleted below, causing the AssertingVH in the cache to trigger.
624  SCEVE.clear();
625 
626  for (auto *Ptr : DeletedPtrs) {
627  if (Instruction *IDel = dyn_cast<Instruction>(Ptr))
628  BBChanged.insert(IDel->getParent());
630  }
631 
632  MadeChange = true;
633  return MadeChange;
634 }
635 
636 // Rewrite the new base according to BasePtrSCEV.
637 // bb.loop.preheader:
638 // %newstart = ...
639 // bb.loop.body:
640 // %phinode = phi [ %newstart, %bb.loop.preheader ], [ %add, %bb.loop.body ]
641 // ...
642 // %add = getelementptr %phinode, %inc
643 //
644 // First returned instruciton is %phinode (or a type cast to %phinode), caller
645 // needs this value to rewrite other load/stores in the same chain.
646 // Second returned instruction is %add, caller needs this value to rewrite other
647 // load/stores in the same chain.
648 std::pair<Instruction *, Instruction *>
649 PPCLoopInstrFormPrep::rewriteForBase(Loop *L, const SCEVAddRecExpr *BasePtrSCEV,
650  Instruction *BaseMemI, bool CanPreInc,
651  PrepForm Form, SCEVExpander &SCEVE,
652  SmallPtrSet<Value *, 16> &DeletedPtrs) {
653 
654  LLVM_DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n");
655 
656  assert(BasePtrSCEV->getLoop() == L && "AddRec for the wrong loop?");
657 
659  assert(BasePtr && "No pointer operand");
660 
661  Type *I8Ty = Type::getInt8Ty(BaseMemI->getParent()->getContext());
662  Type *I8PtrTy =
663  Type::getInt8PtrTy(BaseMemI->getParent()->getContext(),
664  BasePtr->getType()->getPointerAddressSpace());
665 
666  bool IsConstantInc = false;
667  const SCEV *BasePtrIncSCEV = BasePtrSCEV->getStepRecurrence(*SE);
668  Value *IncNode = getNodeForInc(L, BaseMemI, BasePtrIncSCEV);
669 
670  const SCEVConstant *BasePtrIncConstantSCEV =
671  dyn_cast<SCEVConstant>(BasePtrIncSCEV);
672  if (BasePtrIncConstantSCEV)
673  IsConstantInc = true;
674 
675  // No valid representation for the increment.
676  if (!IncNode) {
677  LLVM_DEBUG(dbgs() << "Loop Increasement can not be represented!\n");
678  return std::make_pair(nullptr, nullptr);
679  }
680 
681  if (Form == UpdateForm && !IsConstantInc && !EnableUpdateFormForNonConstInc) {
682  LLVM_DEBUG(
683  dbgs()
684  << "Update form prepare for non-const increment is not enabled!\n");
685  return std::make_pair(nullptr, nullptr);
686  }
687 
688  const SCEV *BasePtrStartSCEV = nullptr;
689  if (CanPreInc) {
690  assert(SE->isLoopInvariant(BasePtrIncSCEV, L) &&
691  "Increment is not loop invariant!\n");
692  BasePtrStartSCEV = SE->getMinusSCEV(BasePtrSCEV->getStart(),
693  IsConstantInc ? BasePtrIncConstantSCEV
694  : BasePtrIncSCEV);
695  } else
696  BasePtrStartSCEV = BasePtrSCEV->getStart();
697 
698  if (alreadyPrepared(L, BaseMemI, BasePtrStartSCEV, BasePtrIncSCEV, Form)) {
699  LLVM_DEBUG(dbgs() << "Instruction form is already prepared!\n");
700  return std::make_pair(nullptr, nullptr);
701  }
702 
703  LLVM_DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n");
704 
705  BasicBlock *Header = L->getHeader();
706  unsigned HeaderLoopPredCount = pred_size(Header);
707  BasicBlock *LoopPredecessor = L->getLoopPredecessor();
708 
709  PHINode *NewPHI = PHINode::Create(I8PtrTy, HeaderLoopPredCount,
710  getInstrName(BaseMemI, PHINodeNameSuffix),
711  Header->getFirstNonPHI());
712 
713  Value *BasePtrStart = SCEVE.expandCodeFor(BasePtrStartSCEV, I8PtrTy,
714  LoopPredecessor->getTerminator());
715 
716  // Note that LoopPredecessor might occur in the predecessor list multiple
717  // times, and we need to add it the right number of times.
718  for (auto *PI : predecessors(Header)) {
719  if (PI != LoopPredecessor)
720  continue;
721 
722  NewPHI->addIncoming(BasePtrStart, LoopPredecessor);
723  }
724 
725  Instruction *PtrInc = nullptr;
726  Instruction *NewBasePtr = nullptr;
727  if (CanPreInc) {
728  Instruction *InsPoint = &*Header->getFirstInsertionPt();
729  PtrInc = GetElementPtrInst::Create(
730  I8Ty, NewPHI, IncNode, getInstrName(BaseMemI, GEPNodeIncNameSuffix),
731  InsPoint);
732  cast<GetElementPtrInst>(PtrInc)->setIsInBounds(IsPtrInBounds(BasePtr));
733  for (auto *PI : predecessors(Header)) {
734  if (PI == LoopPredecessor)
735  continue;
736 
737  NewPHI->addIncoming(PtrInc, PI);
738  }
739  if (PtrInc->getType() != BasePtr->getType())
740  NewBasePtr =
741  new BitCastInst(PtrInc, BasePtr->getType(),
742  getInstrName(PtrInc, CastNodeNameSuffix), InsPoint);
743  else
744  NewBasePtr = PtrInc;
745  } else {
746  // Note that LoopPredecessor might occur in the predecessor list multiple
747  // times, and we need to make sure no more incoming value for them in PHI.
748  for (auto *PI : predecessors(Header)) {
749  if (PI == LoopPredecessor)
750  continue;
751 
752  // For the latch predecessor, we need to insert a GEP just before the
753  // terminator to increase the address.
754  BasicBlock *BB = PI;
755  Instruction *InsPoint = BB->getTerminator();
756  PtrInc = GetElementPtrInst::Create(
757  I8Ty, NewPHI, IncNode, getInstrName(BaseMemI, GEPNodeIncNameSuffix),
758  InsPoint);
759  cast<GetElementPtrInst>(PtrInc)->setIsInBounds(IsPtrInBounds(BasePtr));
760 
761  NewPHI->addIncoming(PtrInc, PI);
762  }
763  PtrInc = NewPHI;
764  if (NewPHI->getType() != BasePtr->getType())
765  NewBasePtr = new BitCastInst(NewPHI, BasePtr->getType(),
767  &*Header->getFirstInsertionPt());
768  else
769  NewBasePtr = NewPHI;
770  }
771 
772  BasePtr->replaceAllUsesWith(NewBasePtr);
773 
774  DeletedPtrs.insert(BasePtr);
775 
776  return std::make_pair(NewBasePtr, PtrInc);
777 }
778 
779 Instruction *PPCLoopInstrFormPrep::rewriteForBucketElement(
780  std::pair<Instruction *, Instruction *> Base, const BucketElement &Element,
781  Value *OffToBase, SmallPtrSet<Value *, 16> &DeletedPtrs) {
782  Instruction *NewBasePtr = Base.first;
783  Instruction *PtrInc = Base.second;
784  assert((NewBasePtr && PtrInc) && "base does not exist!\n");
785 
786  Type *I8Ty = Type::getInt8Ty(PtrInc->getParent()->getContext());
787 
788  Value *Ptr = getPointerOperandAndType(Element.Instr);
789  assert(Ptr && "No pointer operand");
790 
791  Instruction *RealNewPtr;
792  if (!Element.Offset ||
793  (isa<SCEVConstant>(Element.Offset) &&
794  cast<SCEVConstant>(Element.Offset)->getValue()->isZero())) {
795  RealNewPtr = NewBasePtr;
796  } else {
797  Instruction *PtrIP = dyn_cast<Instruction>(Ptr);
798  if (PtrIP && isa<Instruction>(NewBasePtr) &&
799  cast<Instruction>(NewBasePtr)->getParent() == PtrIP->getParent())
800  PtrIP = nullptr;
801  else if (PtrIP && isa<PHINode>(PtrIP))
802  PtrIP = &*PtrIP->getParent()->getFirstInsertionPt();
803  else if (!PtrIP)
804  PtrIP = Element.Instr;
805 
806  assert(OffToBase && "There should be an offset for non base element!\n");
808  I8Ty, PtrInc, OffToBase,
809  getInstrName(Element.Instr, GEPNodeOffNameSuffix), PtrIP);
810  if (!PtrIP)
811  NewPtr->insertAfter(cast<Instruction>(PtrInc));
812  NewPtr->setIsInBounds(IsPtrInBounds(Ptr));
813  RealNewPtr = NewPtr;
814  }
815 
816  Instruction *ReplNewPtr;
817  if (Ptr->getType() != RealNewPtr->getType()) {
818  ReplNewPtr = new BitCastInst(RealNewPtr, Ptr->getType(),
820  ReplNewPtr->insertAfter(RealNewPtr);
821  } else
822  ReplNewPtr = RealNewPtr;
823 
824  Ptr->replaceAllUsesWith(ReplNewPtr);
825  DeletedPtrs.insert(Ptr);
826 
827  return ReplNewPtr;
828 }
829 
830 void PPCLoopInstrFormPrep::addOneCandidate(
831  Instruction *MemI, const SCEV *LSCEV, SmallVector<Bucket, 16> &Buckets,
832  std::function<bool(const SCEV *)> isValidDiff, unsigned MaxCandidateNum) {
833  assert((MemI && getPointerOperandAndType(MemI)) &&
834  "Candidate should be a memory instruction.");
835  assert(LSCEV && "Invalid SCEV for Ptr value.");
836 
837  bool FoundBucket = false;
838  for (auto &B : Buckets) {
839  if (cast<SCEVAddRecExpr>(B.BaseSCEV)->getStepRecurrence(*SE) !=
840  cast<SCEVAddRecExpr>(LSCEV)->getStepRecurrence(*SE))
841  continue;
842  const SCEV *Diff = SE->getMinusSCEV(LSCEV, B.BaseSCEV);
843  if (isValidDiff(Diff)) {
844  B.Elements.push_back(BucketElement(Diff, MemI));
845  FoundBucket = true;
846  break;
847  }
848  }
849 
850  if (!FoundBucket) {
851  if (Buckets.size() == MaxCandidateNum) {
852  LLVM_DEBUG(dbgs() << "Can not prepare more chains, reach maximum limit "
853  << MaxCandidateNum << "\n");
854  return;
855  }
856  Buckets.push_back(Bucket(LSCEV, MemI));
857  }
858 }
859 
860 SmallVector<Bucket, 16> PPCLoopInstrFormPrep::collectCandidates(
861  Loop *L,
862  std::function<bool(const Instruction *, Value *, const Type *)>
863  isValidCandidate,
864  std::function<bool(const SCEV *)> isValidDiff, unsigned MaxCandidateNum) {
865  SmallVector<Bucket, 16> Buckets;
866 
867  for (const auto &BB : L->blocks())
868  for (auto &J : *BB) {
869  Value *PtrValue = nullptr;
870  Type *PointerElementType = nullptr;
871  PtrValue = getPointerOperandAndType(&J, &PointerElementType);
872 
873  if (!PtrValue)
874  continue;
875 
876  if (PtrValue->getType()->getPointerAddressSpace())
877  continue;
878 
879  if (L->isLoopInvariant(PtrValue))
880  continue;
881 
882  const SCEV *LSCEV = SE->getSCEVAtScope(PtrValue, L);
883  const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV);
884  if (!LARSCEV || LARSCEV->getLoop() != L)
885  continue;
886 
887  // Mark that we have candidates for preparing.
888  HasCandidateForPrepare = true;
889 
890  if (isValidCandidate(&J, PtrValue, PointerElementType))
891  addOneCandidate(&J, LSCEV, Buckets, isValidDiff, MaxCandidateNum);
892  }
893  return Buckets;
894 }
895 
896 bool PPCLoopInstrFormPrep::prepareBaseForDispFormChain(Bucket &BucketChain,
897  PrepForm Form) {
898  // RemainderOffsetInfo details:
899  // key: value of (Offset urem DispConstraint). For DSForm, it can
900  // be [0, 4).
901  // first of pair: the index of first BucketElement whose remainder is equal
902  // to key. For key 0, this value must be 0.
903  // second of pair: number of load/stores with the same remainder.
905 
906  for (unsigned j = 0, je = BucketChain.Elements.size(); j != je; ++j) {
907  if (!BucketChain.Elements[j].Offset)
908  RemainderOffsetInfo[0] = std::make_pair(0, 1);
909  else {
910  unsigned Remainder = cast<SCEVConstant>(BucketChain.Elements[j].Offset)
911  ->getAPInt()
912  .urem(Form);
913  if (RemainderOffsetInfo.find(Remainder) == RemainderOffsetInfo.end())
914  RemainderOffsetInfo[Remainder] = std::make_pair(j, 1);
915  else
916  RemainderOffsetInfo[Remainder].second++;
917  }
918  }
919  // Currently we choose the most profitable base as the one which has the max
920  // number of load/store with same remainder.
921  // FIXME: adjust the base selection strategy according to load/store offset
922  // distribution.
923  // For example, if we have one candidate chain for DS form preparation, which
924  // contains following load/stores with different remainders:
925  // 1: 10 load/store whose remainder is 1;
926  // 2: 9 load/store whose remainder is 2;
927  // 3: 1 for remainder 3 and 0 for remainder 0;
928  // Now we will choose the first load/store whose remainder is 1 as base and
929  // adjust all other load/stores according to new base, so we will get 10 DS
930  // form and 10 X form.
931  // But we should be more clever, for this case we could use two bases, one for
932  // remainder 1 and the other for remainder 2, thus we could get 19 DS form and
933  // 1 X form.
934  unsigned MaxCountRemainder = 0;
935  for (unsigned j = 0; j < (unsigned)Form; j++)
936  if ((RemainderOffsetInfo.find(j) != RemainderOffsetInfo.end()) &&
937  RemainderOffsetInfo[j].second >
938  RemainderOffsetInfo[MaxCountRemainder].second)
939  MaxCountRemainder = j;
940 
941  // Abort when there are too few insts with common base.
942  if (RemainderOffsetInfo[MaxCountRemainder].second < DispFormPrepMinThreshold)
943  return false;
944 
945  // If the first value is most profitable, no needed to adjust BucketChain
946  // elements as they are substracted the first value when collecting.
947  if (MaxCountRemainder == 0)
948  return true;
949 
950  // Adjust load/store to the new chosen base.
951  const SCEV *Offset =
952  BucketChain.Elements[RemainderOffsetInfo[MaxCountRemainder].first].Offset;
953  BucketChain.BaseSCEV = SE->getAddExpr(BucketChain.BaseSCEV, Offset);
954  for (auto &E : BucketChain.Elements) {
955  if (E.Offset)
956  E.Offset = cast<SCEVConstant>(SE->getMinusSCEV(E.Offset, Offset));
957  else
958  E.Offset = cast<SCEVConstant>(SE->getNegativeSCEV(Offset));
959  }
960 
961  std::swap(BucketChain.Elements[RemainderOffsetInfo[MaxCountRemainder].first],
962  BucketChain.Elements[0]);
963  return true;
964 }
965 
966 // FIXME: implement a more clever base choosing policy.
967 // Currently we always choose an exist load/store offset. This maybe lead to
968 // suboptimal code sequences. For example, for one DS chain with offsets
969 // {-32769, 2003, 2007, 2011}, we choose -32769 as base offset, and left disp
970 // for load/stores are {0, 34772, 34776, 34780}. Though each offset now is a
971 // multipler of 4, it cannot be represented by sint16.
972 bool PPCLoopInstrFormPrep::prepareBaseForUpdateFormChain(Bucket &BucketChain) {
973  // We have a choice now of which instruction's memory operand we use as the
974  // base for the generated PHI. Always picking the first instruction in each
975  // bucket does not work well, specifically because that instruction might
976  // be a prefetch (and there are no pre-increment dcbt variants). Otherwise,
977  // the choice is somewhat arbitrary, because the backend will happily
978  // generate direct offsets from both the pre-incremented and
979  // post-incremented pointer values. Thus, we'll pick the first non-prefetch
980  // instruction in each bucket, and adjust the recurrence and other offsets
981  // accordingly.
982  for (int j = 0, je = BucketChain.Elements.size(); j != je; ++j) {
983  if (auto *II = dyn_cast<IntrinsicInst>(BucketChain.Elements[j].Instr))
984  if (II->getIntrinsicID() == Intrinsic::prefetch)
985  continue;
986 
987  // If we'd otherwise pick the first element anyway, there's nothing to do.
988  if (j == 0)
989  break;
990 
991  // If our chosen element has no offset from the base pointer, there's
992  // nothing to do.
993  if (!BucketChain.Elements[j].Offset ||
994  cast<SCEVConstant>(BucketChain.Elements[j].Offset)->isZero())
995  break;
996 
997  const SCEV *Offset = BucketChain.Elements[j].Offset;
998  BucketChain.BaseSCEV = SE->getAddExpr(BucketChain.BaseSCEV, Offset);
999  for (auto &E : BucketChain.Elements) {
1000  if (E.Offset)
1001  E.Offset = cast<SCEVConstant>(SE->getMinusSCEV(E.Offset, Offset));
1002  else
1003  E.Offset = cast<SCEVConstant>(SE->getNegativeSCEV(Offset));
1004  }
1005 
1006  std::swap(BucketChain.Elements[j], BucketChain.Elements[0]);
1007  break;
1008  }
1009  return true;
1010 }
1011 
1012 bool PPCLoopInstrFormPrep::rewriteLoadStores(
1013  Loop *L, Bucket &BucketChain, SmallSet<BasicBlock *, 16> &BBChanged,
1014  PrepForm Form) {
1015  bool MadeChange = false;
1016 
1017  const SCEVAddRecExpr *BasePtrSCEV =
1018  cast<SCEVAddRecExpr>(BucketChain.BaseSCEV);
1019  if (!BasePtrSCEV->isAffine())
1020  return MadeChange;
1021 
1022  BasicBlock *Header = L->getHeader();
1023  SCEVExpander SCEVE(*SE, Header->getModule()->getDataLayout(),
1024  "loopprepare-formrewrite");
1025  if (!SCEVE.isSafeToExpand(BasePtrSCEV->getStart()))
1026  return MadeChange;
1027 
1028  SmallPtrSet<Value *, 16> DeletedPtrs;
1029 
1030  // For some DS form load/store instructions, it can also be an update form,
1031  // if the stride is constant and is a multipler of 4. Use update form if
1032  // prefer it.
1033  bool CanPreInc = (Form == UpdateForm ||
1034  ((Form == DSForm) &&
1035  isa<SCEVConstant>(BasePtrSCEV->getStepRecurrence(*SE)) &&
1036  !cast<SCEVConstant>(BasePtrSCEV->getStepRecurrence(*SE))
1037  ->getAPInt()
1038  .urem(4) &&
1039  PreferUpdateForm));
1040 
1041  std::pair<Instruction *, Instruction *> Base =
1042  rewriteForBase(L, BasePtrSCEV, BucketChain.Elements.begin()->Instr,
1043  CanPreInc, Form, SCEVE, DeletedPtrs);
1044 
1045  if (!Base.first || !Base.second)
1046  return MadeChange;
1047 
1048  // Keep track of the replacement pointer values we've inserted so that we
1049  // don't generate more pointer values than necessary.
1050  SmallPtrSet<Value *, 16> NewPtrs;
1051  NewPtrs.insert(Base.first);
1052 
1053  for (const BucketElement &BE : llvm::drop_begin(BucketChain.Elements)) {
1054  Value *Ptr = getPointerOperandAndType(BE.Instr);
1055  assert(Ptr && "No pointer operand");
1056  if (NewPtrs.count(Ptr))
1057  continue;
1058 
1059  Instruction *NewPtr = rewriteForBucketElement(
1060  Base, BE,
1061  BE.Offset ? cast<SCEVConstant>(BE.Offset)->getValue() : nullptr,
1062  DeletedPtrs);
1063  assert(NewPtr && "wrong rewrite!\n");
1064  NewPtrs.insert(NewPtr);
1065  }
1066 
1067  // Clear the rewriter cache, because values that are in the rewriter's cache
1068  // can be deleted below, causing the AssertingVH in the cache to trigger.
1069  SCEVE.clear();
1070 
1071  for (auto *Ptr : DeletedPtrs) {
1072  if (Instruction *IDel = dyn_cast<Instruction>(Ptr))
1073  BBChanged.insert(IDel->getParent());
1075  }
1076 
1077  MadeChange = true;
1078 
1079  SuccPrepCount++;
1080 
1081  if (Form == DSForm && !CanPreInc)
1082  DSFormChainRewritten++;
1083  else if (Form == DQForm)
1084  DQFormChainRewritten++;
1085  else if (Form == UpdateForm || (Form == DSForm && CanPreInc))
1086  UpdFormChainRewritten++;
1087 
1088  return MadeChange;
1089 }
1090 
1091 bool PPCLoopInstrFormPrep::updateFormPrep(Loop *L,
1092  SmallVector<Bucket, 16> &Buckets) {
1093  bool MadeChange = false;
1094  if (Buckets.empty())
1095  return MadeChange;
1096  SmallSet<BasicBlock *, 16> BBChanged;
1097  for (auto &Bucket : Buckets)
1098  // The base address of each bucket is transformed into a phi and the others
1099  // are rewritten based on new base.
1100  if (prepareBaseForUpdateFormChain(Bucket))
1101  MadeChange |= rewriteLoadStores(L, Bucket, BBChanged, UpdateForm);
1102 
1103  if (MadeChange)
1104  for (auto *BB : BBChanged)
1105  DeleteDeadPHIs(BB);
1106  return MadeChange;
1107 }
1108 
1109 bool PPCLoopInstrFormPrep::dispFormPrep(Loop *L,
1110  SmallVector<Bucket, 16> &Buckets,
1111  PrepForm Form) {
1112  bool MadeChange = false;
1113 
1114  if (Buckets.empty())
1115  return MadeChange;
1116 
1117  SmallSet<BasicBlock *, 16> BBChanged;
1118  for (auto &Bucket : Buckets) {
1119  if (Bucket.Elements.size() < DispFormPrepMinThreshold)
1120  continue;
1121  if (prepareBaseForDispFormChain(Bucket, Form))
1122  MadeChange |= rewriteLoadStores(L, Bucket, BBChanged, Form);
1123  }
1124 
1125  if (MadeChange)
1126  for (auto *BB : BBChanged)
1127  DeleteDeadPHIs(BB);
1128  return MadeChange;
1129 }
1130 
1131 // Find the loop invariant increment node for SCEV BasePtrIncSCEV.
1132 // bb.loop.preheader:
1133 // %start = ...
1134 // bb.loop.body:
1135 // %phinode = phi [ %start, %bb.loop.preheader ], [ %add, %bb.loop.body ]
1136 // ...
1137 // %add = add %phinode, %inc ; %inc is what we want to get.
1138 //
1139 Value *PPCLoopInstrFormPrep::getNodeForInc(Loop *L, Instruction *MemI,
1140  const SCEV *BasePtrIncSCEV) {
1141  // If the increment is a constant, no definition is needed.
1142  // Return the value directly.
1143  if (isa<SCEVConstant>(BasePtrIncSCEV))
1144  return cast<SCEVConstant>(BasePtrIncSCEV)->getValue();
1145 
1146  if (!SE->isLoopInvariant(BasePtrIncSCEV, L))
1147  return nullptr;
1148 
1149  BasicBlock *BB = MemI->getParent();
1150  if (!BB)
1151  return nullptr;
1152 
1153  BasicBlock *LatchBB = L->getLoopLatch();
1154 
1155  if (!LatchBB)
1156  return nullptr;
1157 
1158  // Run through the PHIs and check their operands to find valid representation
1159  // for the increment SCEV.
1160  iterator_range<BasicBlock::phi_iterator> PHIIter = BB->phis();
1161  for (auto &CurrentPHI : PHIIter) {
1162  PHINode *CurrentPHINode = dyn_cast<PHINode>(&CurrentPHI);
1163  if (!CurrentPHINode)
1164  continue;
1165 
1166  if (!SE->isSCEVable(CurrentPHINode->getType()))
1167  continue;
1168 
1169  const SCEV *PHISCEV = SE->getSCEVAtScope(CurrentPHINode, L);
1170 
1171  const SCEVAddRecExpr *PHIBasePtrSCEV = dyn_cast<SCEVAddRecExpr>(PHISCEV);
1172  if (!PHIBasePtrSCEV)
1173  continue;
1174 
1175  const SCEV *PHIBasePtrIncSCEV = PHIBasePtrSCEV->getStepRecurrence(*SE);
1176 
1177  if (!PHIBasePtrIncSCEV || (PHIBasePtrIncSCEV != BasePtrIncSCEV))
1178  continue;
1179 
1180  // Get the incoming value from the loop latch and check if the value has
1181  // the add form with the required increment.
1182  if (Instruction *I = dyn_cast<Instruction>(
1183  CurrentPHINode->getIncomingValueForBlock(LatchBB))) {
1184  Value *StrippedBaseI = I;
1185  while (BitCastInst *BC = dyn_cast<BitCastInst>(StrippedBaseI))
1186  StrippedBaseI = BC->getOperand(0);
1187 
1188  Instruction *StrippedI = dyn_cast<Instruction>(StrippedBaseI);
1189  if (!StrippedI)
1190  continue;
1191 
1192  // LSR pass may add a getelementptr instruction to do the loop increment,
1193  // also search in that getelementptr instruction.
1194  if (StrippedI->getOpcode() == Instruction::Add ||
1195  (StrippedI->getOpcode() == Instruction::GetElementPtr &&
1196  StrippedI->getNumOperands() == 2)) {
1197  if (SE->getSCEVAtScope(StrippedI->getOperand(0), L) == BasePtrIncSCEV)
1198  return StrippedI->getOperand(0);
1199  if (SE->getSCEVAtScope(StrippedI->getOperand(1), L) == BasePtrIncSCEV)
1200  return StrippedI->getOperand(1);
1201  }
1202  }
1203  }
1204  return nullptr;
1205 }
1206 
1207 // In order to prepare for the preferred instruction form, a PHI is added.
1208 // This function will check to see if that PHI already exists and will return
1209 // true if it found an existing PHI with the matched start and increment as the
1210 // one we wanted to create.
1211 bool PPCLoopInstrFormPrep::alreadyPrepared(Loop *L, Instruction *MemI,
1212  const SCEV *BasePtrStartSCEV,
1213  const SCEV *BasePtrIncSCEV,
1214  PrepForm Form) {
1215  BasicBlock *BB = MemI->getParent();
1216  if (!BB)
1217  return false;
1218 
1219  BasicBlock *PredBB = L->getLoopPredecessor();
1220  BasicBlock *LatchBB = L->getLoopLatch();
1221 
1222  if (!PredBB || !LatchBB)
1223  return false;
1224 
1225  // Run through the PHIs and see if we have some that looks like a preparation
1226  iterator_range<BasicBlock::phi_iterator> PHIIter = BB->phis();
1227  for (auto & CurrentPHI : PHIIter) {
1228  PHINode *CurrentPHINode = dyn_cast<PHINode>(&CurrentPHI);
1229  if (!CurrentPHINode)
1230  continue;
1231 
1232  if (!SE->isSCEVable(CurrentPHINode->getType()))
1233  continue;
1234 
1235  const SCEV *PHISCEV = SE->getSCEVAtScope(CurrentPHINode, L);
1236 
1237  const SCEVAddRecExpr *PHIBasePtrSCEV = dyn_cast<SCEVAddRecExpr>(PHISCEV);
1238  if (!PHIBasePtrSCEV)
1239  continue;
1240 
1241  const SCEVConstant *PHIBasePtrIncSCEV =
1242  dyn_cast<SCEVConstant>(PHIBasePtrSCEV->getStepRecurrence(*SE));
1243  if (!PHIBasePtrIncSCEV)
1244  continue;
1245 
1246  if (CurrentPHINode->getNumIncomingValues() == 2) {
1247  if ((CurrentPHINode->getIncomingBlock(0) == LatchBB &&
1248  CurrentPHINode->getIncomingBlock(1) == PredBB) ||
1249  (CurrentPHINode->getIncomingBlock(1) == LatchBB &&
1250  CurrentPHINode->getIncomingBlock(0) == PredBB)) {
1251  if (PHIBasePtrIncSCEV == BasePtrIncSCEV) {
1252  // The existing PHI (CurrentPHINode) has the same start and increment
1253  // as the PHI that we wanted to create.
1254  if ((Form == UpdateForm || Form == ChainCommoning ) &&
1255  PHIBasePtrSCEV->getStart() == BasePtrStartSCEV) {
1256  ++PHINodeAlreadyExistsUpdate;
1257  return true;
1258  }
1259  if (Form == DSForm || Form == DQForm) {
1260  const SCEVConstant *Diff = dyn_cast<SCEVConstant>(
1261  SE->getMinusSCEV(PHIBasePtrSCEV->getStart(), BasePtrStartSCEV));
1262  if (Diff && !Diff->getAPInt().urem(Form)) {
1263  if (Form == DSForm)
1264  ++PHINodeAlreadyExistsDS;
1265  else
1266  ++PHINodeAlreadyExistsDQ;
1267  return true;
1268  }
1269  }
1270  }
1271  }
1272  }
1273  }
1274  return false;
1275 }
1276 
1277 bool PPCLoopInstrFormPrep::runOnLoop(Loop *L) {
1278  bool MadeChange = false;
1279 
1280  // Only prep. the inner-most loop
1281  if (!L->isInnermost())
1282  return MadeChange;
1283 
1284  // Return if already done enough preparation.
1285  if (SuccPrepCount >= MaxVarsPrep)
1286  return MadeChange;
1287 
1288  LLVM_DEBUG(dbgs() << "PIP: Examining: " << *L << "\n");
1289 
1290  BasicBlock *LoopPredecessor = L->getLoopPredecessor();
1291  // If there is no loop predecessor, or the loop predecessor's terminator
1292  // returns a value (which might contribute to determining the loop's
1293  // iteration space), insert a new preheader for the loop.
1294  if (!LoopPredecessor ||
1295  !LoopPredecessor->getTerminator()->getType()->isVoidTy()) {
1296  LoopPredecessor = InsertPreheaderForLoop(L, DT, LI, nullptr, PreserveLCSSA);
1297  if (LoopPredecessor)
1298  MadeChange = true;
1299  }
1300  if (!LoopPredecessor) {
1301  LLVM_DEBUG(dbgs() << "PIP fails since no predecessor for current loop.\n");
1302  return MadeChange;
1303  }
1304  // Check if a load/store has update form. This lambda is used by function
1305  // collectCandidates which can collect candidates for types defined by lambda.
1306  auto isUpdateFormCandidate = [&](const Instruction *I, Value *PtrValue,
1307  const Type *PointerElementType) {
1308  assert((PtrValue && I) && "Invalid parameter!");
1309  // There are no update forms for Altivec vector load/stores.
1310  if (ST && ST->hasAltivec() && PointerElementType->isVectorTy())
1311  return false;
1312  // There are no update forms for P10 lxvp/stxvp intrinsic.
1313  auto *II = dyn_cast<IntrinsicInst>(I);
1314  if (II && ((II->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp) ||
1315  II->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp))
1316  return false;
1317  // See getPreIndexedAddressParts, the displacement for LDU/STDU has to
1318  // be 4's multiple (DS-form). For i64 loads/stores when the displacement
1319  // fits in a 16-bit signed field but isn't a multiple of 4, it will be
1320  // useless and possible to break some original well-form addressing mode
1321  // to make this pre-inc prep for it.
1322  if (PointerElementType->isIntegerTy(64)) {
1323  const SCEV *LSCEV = SE->getSCEVAtScope(const_cast<Value *>(PtrValue), L);
1324  const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV);
1325  if (!LARSCEV || LARSCEV->getLoop() != L)
1326  return false;
1327  if (const SCEVConstant *StepConst =
1328  dyn_cast<SCEVConstant>(LARSCEV->getStepRecurrence(*SE))) {
1329  const APInt &ConstInt = StepConst->getValue()->getValue();
1330  if (ConstInt.isSignedIntN(16) && ConstInt.srem(4) != 0)
1331  return false;
1332  }
1333  }
1334  return true;
1335  };
1336 
1337  // Check if a load/store has DS form.
1338  auto isDSFormCandidate = [](const Instruction *I, Value *PtrValue,
1339  const Type *PointerElementType) {
1340  assert((PtrValue && I) && "Invalid parameter!");
1341  if (isa<IntrinsicInst>(I))
1342  return false;
1343  return (PointerElementType->isIntegerTy(64)) ||
1344  (PointerElementType->isFloatTy()) ||
1345  (PointerElementType->isDoubleTy()) ||
1346  (PointerElementType->isIntegerTy(32) &&
1347  llvm::any_of(I->users(),
1348  [](const User *U) { return isa<SExtInst>(U); }));
1349  };
1350 
1351  // Check if a load/store has DQ form.
1352  auto isDQFormCandidate = [&](const Instruction *I, Value *PtrValue,
1353  const Type *PointerElementType) {
1354  assert((PtrValue && I) && "Invalid parameter!");
1355  // Check if it is a P10 lxvp/stxvp intrinsic.
1356  auto *II = dyn_cast<IntrinsicInst>(I);
1357  if (II)
1358  return II->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp ||
1359  II->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp;
1360  // Check if it is a P9 vector load/store.
1361  return ST && ST->hasP9Vector() && (PointerElementType->isVectorTy());
1362  };
1363 
1364  // Check if a load/store is candidate for chain commoning.
1365  // If the SCEV is only with one ptr operand in its start, we can use that
1366  // start as a chain separator. Mark this load/store as a candidate.
1367  auto isChainCommoningCandidate = [&](const Instruction *I, Value *PtrValue,
1368  const Type *PointerElementType) {
1369  const SCEVAddRecExpr *ARSCEV =
1370  cast<SCEVAddRecExpr>(SE->getSCEVAtScope(PtrValue, L));
1371  if (!ARSCEV)
1372  return false;
1373 
1374  if (!ARSCEV->isAffine())
1375  return false;
1376 
1377  const SCEV *Start = ARSCEV->getStart();
1378 
1379  // A single pointer. We can treat it as offset 0.
1380  if (isa<SCEVUnknown>(Start) && Start->getType()->isPointerTy())
1381  return true;
1382 
1383  const SCEVAddExpr *ASCEV = dyn_cast<SCEVAddExpr>(Start);
1384 
1385  // We need a SCEVAddExpr to include both base and offset.
1386  if (!ASCEV)
1387  return false;
1388 
1389  // Make sure there is only one pointer operand(base) and all other operands
1390  // are integer type.
1391  bool SawPointer = false;
1392  for (const SCEV *Op : ASCEV->operands()) {
1393  if (Op->getType()->isPointerTy()) {
1394  if (SawPointer)
1395  return false;
1396  SawPointer = true;
1397  } else if (!Op->getType()->isIntegerTy())
1398  return false;
1399  }
1400 
1401  return SawPointer;
1402  };
1403 
1404  // Check if the diff is a constant type. This is used for update/DS/DQ form
1405  // preparation.
1406  auto isValidConstantDiff = [](const SCEV *Diff) {
1407  return dyn_cast<SCEVConstant>(Diff) != nullptr;
1408  };
1409 
1410  // Make sure the diff between the base and new candidate is required type.
1411  // This is used for chain commoning preparation.
1412  auto isValidChainCommoningDiff = [](const SCEV *Diff) {
1413  assert(Diff && "Invalid Diff!\n");
1414 
1415  // Don't mess up previous dform prepare.
1416  if (isa<SCEVConstant>(Diff))
1417  return false;
1418 
1419  // A single integer type offset.
1420  if (isa<SCEVUnknown>(Diff) && Diff->getType()->isIntegerTy())
1421  return true;
1422 
1423  const SCEVNAryExpr *ADiff = dyn_cast<SCEVNAryExpr>(Diff);
1424  if (!ADiff)
1425  return false;
1426 
1427  for (const SCEV *Op : ADiff->operands())
1428  if (!Op->getType()->isIntegerTy())
1429  return false;
1430 
1431  return true;
1432  };
1433 
1434  HasCandidateForPrepare = false;
1435 
1436  LLVM_DEBUG(dbgs() << "Start to prepare for update form.\n");
1437  // Collect buckets of comparable addresses used by loads and stores for update
1438  // form.
1439  SmallVector<Bucket, 16> UpdateFormBuckets = collectCandidates(
1440  L, isUpdateFormCandidate, isValidConstantDiff, MaxVarsUpdateForm);
1441 
1442  // Prepare for update form.
1443  if (!UpdateFormBuckets.empty())
1444  MadeChange |= updateFormPrep(L, UpdateFormBuckets);
1445  else if (!HasCandidateForPrepare) {
1446  LLVM_DEBUG(
1447  dbgs()
1448  << "No prepare candidates found, stop praparation for current loop!\n");
1449  // If no candidate for preparing, return early.
1450  return MadeChange;
1451  }
1452 
1453  LLVM_DEBUG(dbgs() << "Start to prepare for DS form.\n");
1454  // Collect buckets of comparable addresses used by loads and stores for DS
1455  // form.
1456  SmallVector<Bucket, 16> DSFormBuckets = collectCandidates(
1457  L, isDSFormCandidate, isValidConstantDiff, MaxVarsDSForm);
1458 
1459  // Prepare for DS form.
1460  if (!DSFormBuckets.empty())
1461  MadeChange |= dispFormPrep(L, DSFormBuckets, DSForm);
1462 
1463  LLVM_DEBUG(dbgs() << "Start to prepare for DQ form.\n");
1464  // Collect buckets of comparable addresses used by loads and stores for DQ
1465  // form.
1466  SmallVector<Bucket, 16> DQFormBuckets = collectCandidates(
1467  L, isDQFormCandidate, isValidConstantDiff, MaxVarsDQForm);
1468 
1469  // Prepare for DQ form.
1470  if (!DQFormBuckets.empty())
1471  MadeChange |= dispFormPrep(L, DQFormBuckets, DQForm);
1472 
1473  // Collect buckets of comparable addresses used by loads and stores for chain
1474  // commoning. With chain commoning, we reuse offsets between the chains, so
1475  // the register pressure will be reduced.
1476  if (!EnableChainCommoning) {
1477  LLVM_DEBUG(dbgs() << "Chain commoning is not enabled.\n");
1478  return MadeChange;
1479  }
1480 
1481  LLVM_DEBUG(dbgs() << "Start to prepare for chain commoning.\n");
1482  SmallVector<Bucket, 16> Buckets =
1483  collectCandidates(L, isChainCommoningCandidate, isValidChainCommoningDiff,
1485 
1486  // Prepare for chain commoning.
1487  if (!Buckets.empty())
1488  MadeChange |= chainCommoning(L, Buckets);
1489 
1490  return MadeChange;
1491 }
i
i
Definition: README.txt:29
llvm::RecursivelyDeleteTriviallyDeadInstructions
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:518
llvm::GetElementPtrInst::setIsInBounds
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
Definition: Instructions.cpp:1924
MaxVarsChainCommon
static cl::opt< unsigned > MaxVarsChainCommon("ppc-chaincommon-max-vars", cl::Hidden, cl::init(4), cl::desc("Bucket number per loop for PPC loop chain common"))
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:60
llvm::ScalarEvolution::getNegativeSCEV
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
Definition: ScalarEvolution.cpp:4463
GEPNodeIncNameSuffix
static constexpr StringRef GEPNodeIncNameSuffix
Definition: PPCLoopInstrFormPrep.cpp:349
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:387
llvm::AArch64PACKey::ID
ID
Definition: AArch64BaseInfo.h:818
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:291
llvm::SCEVAddRecExpr::isAffine
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
Definition: ScalarEvolutionExpressions.h:370
IntrinsicInst.h
ScalarEvolutionExpander.h
llvm::APInt::isSignedIntN
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:420
MaxVarsDQForm
static cl::opt< unsigned > MaxVarsDQForm("ppc-dqprep-max-vars", cl::Hidden, cl::init(8), cl::desc("Potential PHI threshold per loop for PPC loop prep of DQ form"))
Scalar.h
llvm::SCEVAddRecExpr::getStart
const SCEV * getStart() const
Definition: ScalarEvolutionExpressions.h:353
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
Pass.h
llvm::SCEVExpander
This class uses information about analyze scalars to rewrite expressions in canonical form.
Definition: ScalarEvolutionExpander.h:50
CastNodeNameSuffix
static constexpr StringRef CastNodeNameSuffix
Definition: PPCLoopInstrFormPrep.cpp:348
llvm::BitCastInst
This class represents a no-op cast from one type to another.
Definition: Instructions.h:5255
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
Statistic.h
llvm::pred_size
unsigned pred_size(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:32
llvm::LoopBase::getLoopPredecessor
BlockT * getLoopPredecessor() const
If the given loop's header has exactly one unique predecessor outside the loop, return it.
Definition: LoopInfoImpl.h:211
llvm::dwarf::Form
Form
Definition: Dwarf.h:132
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:729
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
Local.h
llvm::createPPCLoopInstrFormPrepPass
FunctionPass * createPPCLoopInstrFormPrepPass(PPCTargetMachine &TM)
Definition: PPCLoopInstrFormPrep.cpp:352
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::SCEVExpander::expandCodeFor
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
Definition: ScalarEvolutionExpander.h:278
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
ScalarEvolution.h
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Module.h
getPointerOperandAndType
static Value * getPointerOperandAndType(Value *MemI, Type **PtrElementType=nullptr)
Definition: PPCLoopInstrFormPrep.cpp:374
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1293
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:136
PHINodeNameSuffix
static constexpr StringRef PHINodeNameSuffix
Definition: PPCLoopInstrFormPrep.cpp:347
llvm::SmallPtrSet< Value *, 16 >
EnableChainCommoning
static cl::opt< bool > EnableChainCommoning("ppc-formprep-chain-commoning", cl::init(false), cl::Hidden, cl::desc("Enable chain commoning in PPC loop prepare pass."))
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:237
llvm::SCEVNAryExpr::operands
op_range operands() const
Definition: ScalarEvolutionExpressions.h:211
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::initializePPCLoopInstrFormPrepPass
void initializePPCLoopInstrFormPrepPass(PassRegistry &)
DepthFirstIterator.h
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Instruction.h
PPCSubtarget.h
CommandLine.h
llvm::cast
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition: Casting.h:565
llvm::Instruction::insertAfter
void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
Definition: Instruction.cpp:93
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:164
EnableUpdateFormForNonConstInc
static cl::opt< bool > EnableUpdateFormForNonConstInc("ppc-formprep-update-nonconst-inc", cl::init(false), cl::Hidden, cl::desc("prepare update form when the load/store increment is a loop " "invariant non-const value."))
MaxVarsPrep
static cl::opt< unsigned > MaxVarsPrep("ppc-formprep-max-vars", cl::Hidden, cl::init(24), cl::desc("Potential common base number threshold per function " "for PPC loop prep"))
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
llvm::PPCSubtarget
Definition: PPCSubtarget.h:71
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
MaxVarsDSForm
static cl::opt< unsigned > MaxVarsDSForm("ppc-dsprep-max-vars", cl::Hidden, cl::init(3), cl::desc("Potential PHI threshold per loop for PPC loop prep of DS form"))
llvm::User
Definition: User.h:44
name
static const char * name
Definition: PPCLoopInstrFormPrep.cpp:341
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:195
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:246
llvm::BasicBlock::getFirstInsertionPt
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:246
false
Definition: StackSlotColoring.cpp:141
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::PHINode::getIncomingValueForBlock
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Definition: Instructions.h:2883
llvm::Instruction
Definition: Instruction.h:42
MaxVarsUpdateForm
static cl::opt< unsigned > MaxVarsUpdateForm("ppc-preinc-prep-max-vars", cl::Hidden, cl::init(3), cl::desc("Potential PHI threshold per loop for PPC loop prep of update " "form"))
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:302
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::predecessors
auto predecessors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:30
PPC.h
llvm::codeview::EncodedFramePtrReg::BasePtr
@ BasePtr
LoopUtils.h
llvm::ScalarEvolutionWrapperPass
Definition: ScalarEvolution.h:2185
SmallPtrSet.h
llvm::SCEVNAryExpr
This node is a base class providing common functionality for n'ary operators.
Definition: ScalarEvolutionExpressions.h:184
Utils.h
llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition: Instructions.h:2790
ChainCommonPrepMinThreshold
static cl::opt< unsigned > ChainCommonPrepMinThreshold("ppc-chaincommon-min-threshold", cl::Hidden, cl::init(4), cl::desc("Minimal common base load/store instructions triggering chain " "commoning preparation. Must be not smaller than 4"))
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Type.h
llvm::APInt::srem
APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition: APInt.cpp:1734
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
CFG.h
LoopInfo.h
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:210
llvm::InsertPreheaderForLoop
BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...
Definition: LoopSimplify.cpp:118
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1412
DispFormPrepMinThreshold
static cl::opt< unsigned > DispFormPrepMinThreshold("ppc-dispprep-min-threshold", cl::Hidden, cl::init(2), cl::desc("Minimal common base load/store instructions triggering DS/DQ form " "preparation"))
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:75
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:264
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:297
inc
Current eax eax eax ret Ideal eax eax ret Re implement atomic builtins x86 does not have to use add to implement these it can use inc
Definition: README.txt:1367
llvm::AMDGPU::Hwreg::Offset
Offset
Definition: SIDefines.h:416
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2848
llvm::DenseMap
Definition: DenseMap.h:714
llvm::logicalview::LVPrintKind::Elements
@ Elements
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:928
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:447
llvm::SCEVConstant
This class represents a constant integer value.
Definition: ScalarEvolutionExpressions.h:60
GEPNodeOffNameSuffix
static constexpr StringRef GEPNodeOffNameSuffix
Definition: PPCLoopInstrFormPrep.cpp:350
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:232
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::SCEVExpander::isSafeToExpand
bool isSafeToExpand(const SCEV *S) const
Return true if the given expression is safe to expand in the sense that all materialized values are s...
Definition: ScalarEvolutionExpander.cpp:2609
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:60
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:82
llvm::Type::isVoidTy
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
llvm::SCEVExpander::clear
void clear()
Erase the contents of the InsertedExpressions map so that users trying to expand the same expression ...
Definition: ScalarEvolutionExpander.h:186
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
llvm::APInt::urem
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1664
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
getInstrName
static std::string getInstrName(const Value *I, StringRef Suffix)
Definition: PPCLoopInstrFormPrep.cpp:366
prefetch
loop data prefetch
Definition: LoopDataPrefetch.cpp:149
llvm::GetElementPtrInst::Create
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:954
llvm::LoopInfo
Definition: LoopInfo.h:1108
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1716
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
DEBUG_TYPE
#define DEBUG_TYPE
Definition: PPCLoopInstrFormPrep.cpp:115
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
getParent
static const Function * getParent(const Value *V)
Definition: BasicAliasAnalysis.cpp:804
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:994
PreferUpdateForm
static cl::opt< bool > PreferUpdateForm("ppc-formprep-prefer-update", cl::init(true), cl::Hidden, cl::desc("prefer update form when ds form is also a update form"))
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:173
llvm::ScalarEvolution::isLoopInvariant
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Definition: ScalarEvolution.cpp:13625
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:35
llvm::depth_first
iterator_range< df_iterator< T > > depth_first(const T &G)
Definition: DepthFirstIterator.h:230
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:85
j
return j(j<< 16)
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:182
llvm::LCSSAID
char & LCSSAID
Definition: LCSSA.cpp:492
llvm::SCEVAddRecExpr::getLoop
const Loop * getLoop() const
Definition: ScalarEvolutionExpressions.h:354
llvm::ScalarEvolution::getMinusSCEV
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
Definition: ScalarEvolution.cpp:4549
llvm::Type::isFloatTy
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:153
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:84
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:348
llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition: Instructions.h:2740
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:342
Casting.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:105
llvm::ScalarEvolution::isSCEVable
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Definition: ScalarEvolution.cpp:4327
llvm::Type::isDoubleTy
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:156
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:185
llvm::SmallSet::insert
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:178
llvm::PPCTargetMachine
Common code between 32-bit and 64-bit PowerPC targets.
Definition: PPCTargetMachine.h:25
llvm::DeleteDeadPHIs
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
Definition: BasicBlockUtils.cpp:163
llvm::SCEVConstant::getAPInt
const APInt & getAPInt() const
Definition: ScalarEvolutionExpressions.h:70
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:46
ScalarEvolutionExpressions.h
IsPtrInBounds
static bool IsPtrInBounds(Value *BasePtr)
Definition: PPCLoopInstrFormPrep.cpp:356
Instructions.h
llvm::numbers::phi
constexpr double phi
Definition: MathExtras.h:67
llvm::SCEVAddExpr
This node represents an addition of some number of SCEVs.
Definition: ScalarEvolutionExpressions.h:257
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition: User.h:191
INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:51
SmallVector.h
Dominators.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::PHINode::getIncomingBlock
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Definition: Instructions.h:2814
llvm::iterator_range
A range adaptor for a pair of iterators.
Definition: iterator_range.h:30
llvm::PHINode
Definition: Instructions.h:2698
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:119
llvm::SCEV::getType
Type * getType() const
Return the LLVM type of this SCEV expression.
Definition: ScalarEvolution.cpp:402
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::ScalarEvolution::getAddExpr
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:2492
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:413
BasicBlockUtils.h
Value.h
InitializePasses.h
llvm::ScalarEvolution::getSCEVAtScope
const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
Definition: ScalarEvolution.cpp:9609
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
PPCTargetMachine.h
llvm::SCEVAddRecExpr::getStepRecurrence
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Definition: ScalarEvolutionExpressions.h:360
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
SmallSet.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365