LLVM  15.0.0git
PPCLoopInstrFormPrep.cpp
Go to the documentation of this file.
1 //===------ PPCLoopInstrFormPrep.cpp - Loop Instr Form Prep Pass ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a pass to prepare loops for ppc preferred addressing
10 // modes, leveraging different instruction form. (eg: DS/DQ form, D/DS form with
11 // update)
12 // Additional PHIs are created for loop induction variables used by load/store
13 // instructions so that preferred addressing modes can be used.
14 //
15 // 1: DS/DQ form preparation, prepare the load/store instructions so that they
16 // can satisfy the DS/DQ form displacement requirements.
17 // Generically, this means transforming loops like this:
18 // for (int i = 0; i < n; ++i) {
19 // unsigned long x1 = *(unsigned long *)(p + i + 5);
20 // unsigned long x2 = *(unsigned long *)(p + i + 9);
21 // }
22 //
23 // to look like this:
24 //
25 // unsigned NewP = p + 5;
26 // for (int i = 0; i < n; ++i) {
27 // unsigned long x1 = *(unsigned long *)(i + NewP);
28 // unsigned long x2 = *(unsigned long *)(i + NewP + 4);
29 // }
30 //
31 // 2: D/DS form with update preparation, prepare the load/store instructions so
32 // that we can use update form to do pre-increment.
33 // Generically, this means transforming loops like this:
34 // for (int i = 0; i < n; ++i)
35 // array[i] = c;
36 //
37 // to look like this:
38 //
39 // T *p = array[-1];
40 // for (int i = 0; i < n; ++i)
41 // *++p = c;
42 //
43 // 3: common multiple chains for the load/stores with same offsets in the loop,
44 // so that we can reuse the offsets and reduce the register pressure in the
45 // loop. This transformation can also increase the loop ILP as now each chain
46 // uses its own loop induction add/addi. But this will increase the number of
47 // add/addi in the loop.
48 //
49 // Generically, this means transforming loops like this:
50 //
51 // char *p;
52 // A1 = p + base1
53 // A2 = p + base1 + offset
54 // B1 = p + base2
55 // B2 = p + base2 + offset
56 //
57 // for (int i = 0; i < n; i++)
58 // unsigned long x1 = *(unsigned long *)(A1 + i);
59 // unsigned long x2 = *(unsigned long *)(A2 + i)
60 // unsigned long x3 = *(unsigned long *)(B1 + i);
61 // unsigned long x4 = *(unsigned long *)(B2 + i);
62 // }
63 //
64 // to look like this:
65 //
66 // A1_new = p + base1 // chain 1
67 // B1_new = p + base2 // chain 2, now inside the loop, common offset is
68 // // reused.
69 //
70 // for (long long i = 0; i < n; i+=count) {
71 // unsigned long x1 = *(unsigned long *)(A1_new + i);
72 // unsigned long x2 = *(unsigned long *)((A1_new + i) + offset);
73 // unsigned long x3 = *(unsigned long *)(B1_new + i);
74 // unsigned long x4 = *(unsigned long *)((B1_new + i) + offset);
75 // }
76 //===----------------------------------------------------------------------===//
77 
78 #include "PPC.h"
79 #include "PPCSubtarget.h"
80 #include "PPCTargetMachine.h"
82 #include "llvm/ADT/SmallPtrSet.h"
83 #include "llvm/ADT/SmallSet.h"
84 #include "llvm/ADT/SmallVector.h"
85 #include "llvm/ADT/Statistic.h"
86 #include "llvm/Analysis/LoopInfo.h"
89 #include "llvm/IR/BasicBlock.h"
90 #include "llvm/IR/CFG.h"
91 #include "llvm/IR/Dominators.h"
92 #include "llvm/IR/Instruction.h"
93 #include "llvm/IR/Instructions.h"
94 #include "llvm/IR/IntrinsicInst.h"
95 #include "llvm/IR/IntrinsicsPowerPC.h"
96 #include "llvm/IR/Module.h"
97 #include "llvm/IR/Type.h"
98 #include "llvm/IR/Value.h"
99 #include "llvm/InitializePasses.h"
100 #include "llvm/Pass.h"
101 #include "llvm/Support/Casting.h"
103 #include "llvm/Support/Debug.h"
104 #include "llvm/Transforms/Scalar.h"
105 #include "llvm/Transforms/Utils.h"
110 #include <cassert>
111 #include <iterator>
112 #include <utility>
113 
114 #define DEBUG_TYPE "ppc-loop-instr-form-prep"
115 
116 using namespace llvm;
117 
118 static cl::opt<unsigned>
119  MaxVarsPrep("ppc-formprep-max-vars", cl::Hidden, cl::init(24),
121  cl::desc("Potential common base number threshold per function "
122  "for PPC loop prep"));
123 
124 static cl::opt<bool> PreferUpdateForm("ppc-formprep-prefer-update",
125  cl::init(true), cl::Hidden,
126  cl::desc("prefer update form when ds form is also a update form"));
127 
129  "ppc-formprep-update-nonconst-inc", cl::init(false), cl::Hidden,
130  cl::desc("prepare update form when the load/store increment is a loop "
131  "invariant non-const value."));
132 
134  "ppc-formprep-chain-commoning", cl::init(false), cl::Hidden,
135  cl::desc("Enable chain commoning in PPC loop prepare pass."));
136 
137 // Sum of following 3 per loop thresholds for all loops can not be larger
138 // than MaxVarsPrep.
139 // now the thresholds for each kind prep are exterimental values on Power9.
140 static cl::opt<unsigned> MaxVarsUpdateForm("ppc-preinc-prep-max-vars",
141  cl::Hidden, cl::init(3),
142  cl::desc("Potential PHI threshold per loop for PPC loop prep of update "
143  "form"));
144 
145 static cl::opt<unsigned> MaxVarsDSForm("ppc-dsprep-max-vars",
146  cl::Hidden, cl::init(3),
147  cl::desc("Potential PHI threshold per loop for PPC loop prep of DS form"));
148 
149 static cl::opt<unsigned> MaxVarsDQForm("ppc-dqprep-max-vars",
150  cl::Hidden, cl::init(8),
151  cl::desc("Potential PHI threshold per loop for PPC loop prep of DQ form"));
152 
153 // Commoning chain will reduce the register pressure, so we don't consider about
154 // the PHI nodes number.
155 // But commoning chain will increase the addi/add number in the loop and also
156 // increase loop ILP. Maximum chain number should be same with hardware
157 // IssueWidth, because we won't benefit from ILP if the parallel chains number
158 // is bigger than IssueWidth. We assume there are 2 chains in one bucket, so
159 // there would be 4 buckets at most on P9(IssueWidth is 8).
161  "ppc-chaincommon-max-vars", cl::Hidden, cl::init(4),
162  cl::desc("Bucket number per loop for PPC loop chain common"));
163 
164 // If would not be profitable if the common base has only one load/store, ISEL
165 // should already be able to choose best load/store form based on offset for
166 // single load/store. Set minimal profitable value default to 2 and make it as
167 // an option.
168 static cl::opt<unsigned> DispFormPrepMinThreshold("ppc-dispprep-min-threshold",
169  cl::Hidden, cl::init(2),
170  cl::desc("Minimal common base load/store instructions triggering DS/DQ form "
171  "preparation"));
172 
174  "ppc-chaincommon-min-threshold", cl::Hidden, cl::init(4),
175  cl::desc("Minimal common base load/store instructions triggering chain "
176  "commoning preparation. Must be not smaller than 4"));
177 
178 STATISTIC(PHINodeAlreadyExistsUpdate, "PHI node already in pre-increment form");
179 STATISTIC(PHINodeAlreadyExistsDS, "PHI node already in DS form");
180 STATISTIC(PHINodeAlreadyExistsDQ, "PHI node already in DQ form");
181 STATISTIC(DSFormChainRewritten, "Num of DS form chain rewritten");
182 STATISTIC(DQFormChainRewritten, "Num of DQ form chain rewritten");
183 STATISTIC(UpdFormChainRewritten, "Num of update form chain rewritten");
184 STATISTIC(ChainCommoningRewritten, "Num of commoning chains");
185 
186 namespace {
187  struct BucketElement {
188  BucketElement(const SCEV *O, Instruction *I) : Offset(O), Instr(I) {}
189  BucketElement(Instruction *I) : Offset(nullptr), Instr(I) {}
190 
191  const SCEV *Offset;
192  Instruction *Instr;
193  };
194 
195  struct Bucket {
196  Bucket(const SCEV *B, Instruction *I)
197  : BaseSCEV(B), Elements(1, BucketElement(I)) {
198  ChainSize = 0;
199  }
200 
201  // The base of the whole bucket.
202  const SCEV *BaseSCEV;
203 
204  // All elements in the bucket. In the bucket, the element with the BaseSCEV
205  // has no offset and all other elements are stored as offsets to the
206  // BaseSCEV.
208 
209  // The potential chains size. This is used for chain commoning only.
210  unsigned ChainSize;
211 
212  // The base for each potential chain. This is used for chain commoning only.
214  };
215 
216  // "UpdateForm" is not a real PPC instruction form, it stands for dform
217  // load/store with update like ldu/stdu, or Prefetch intrinsic.
218  // For DS form instructions, their displacements must be multiple of 4.
219  // For DQ form instructions, their displacements must be multiple of 16.
220  enum PrepForm { UpdateForm = 1, DSForm = 4, DQForm = 16, ChainCommoning };
221 
222  class PPCLoopInstrFormPrep : public FunctionPass {
223  public:
224  static char ID; // Pass ID, replacement for typeid
225 
226  PPCLoopInstrFormPrep() : FunctionPass(ID) {
228  }
229 
230  PPCLoopInstrFormPrep(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) {
232  }
233 
234  void getAnalysisUsage(AnalysisUsage &AU) const override {
239  }
240 
241  bool runOnFunction(Function &F) override;
242 
243  private:
244  PPCTargetMachine *TM = nullptr;
245  const PPCSubtarget *ST;
246  DominatorTree *DT;
247  LoopInfo *LI;
248  ScalarEvolution *SE;
249  bool PreserveLCSSA;
250  bool HasCandidateForPrepare;
251 
252  /// Successful preparation number for Update/DS/DQ form in all inner most
253  /// loops. One successful preparation will put one common base out of loop,
254  /// this may leads to register presure like LICM does.
255  /// Make sure total preparation number can be controlled by option.
256  unsigned SuccPrepCount;
257 
258  bool runOnLoop(Loop *L);
259 
260  /// Check if required PHI node is already exist in Loop \p L.
261  bool alreadyPrepared(Loop *L, Instruction *MemI,
262  const SCEV *BasePtrStartSCEV,
263  const SCEV *BasePtrIncSCEV, PrepForm Form);
264 
265  /// Get the value which defines the increment SCEV \p BasePtrIncSCEV.
266  Value *getNodeForInc(Loop *L, Instruction *MemI,
267  const SCEV *BasePtrIncSCEV);
268 
269  /// Common chains to reuse offsets for a loop to reduce register pressure.
270  bool chainCommoning(Loop *L, SmallVector<Bucket, 16> &Buckets);
271 
272  /// Find out the potential commoning chains and their bases.
273  bool prepareBasesForCommoningChains(Bucket &BucketChain);
274 
275  /// Rewrite load/store according to the common chains.
276  bool
277  rewriteLoadStoresForCommoningChains(Loop *L, Bucket &Bucket,
278  SmallSet<BasicBlock *, 16> &BBChanged);
279 
280  /// Collect condition matched(\p isValidCandidate() returns true)
281  /// candidates in Loop \p L.
282  SmallVector<Bucket, 16> collectCandidates(
283  Loop *L,
284  std::function<bool(const Instruction *, Value *, const Type *)>
285  isValidCandidate,
286  std::function<bool(const SCEV *)> isValidDiff,
287  unsigned MaxCandidateNum);
288 
289  /// Add a candidate to candidates \p Buckets if diff between candidate and
290  /// one base in \p Buckets matches \p isValidDiff.
291  void addOneCandidate(Instruction *MemI, const SCEV *LSCEV,
292  SmallVector<Bucket, 16> &Buckets,
293  std::function<bool(const SCEV *)> isValidDiff,
294  unsigned MaxCandidateNum);
295 
296  /// Prepare all candidates in \p Buckets for update form.
297  bool updateFormPrep(Loop *L, SmallVector<Bucket, 16> &Buckets);
298 
299  /// Prepare all candidates in \p Buckets for displacement form, now for
300  /// ds/dq.
301  bool dispFormPrep(Loop *L, SmallVector<Bucket, 16> &Buckets, PrepForm Form);
302 
303  /// Prepare for one chain \p BucketChain, find the best base element and
304  /// update all other elements in \p BucketChain accordingly.
305  /// \p Form is used to find the best base element.
306  /// If success, best base element must be stored as the first element of
307  /// \p BucketChain.
308  /// Return false if no base element found, otherwise return true.
309  bool prepareBaseForDispFormChain(Bucket &BucketChain, PrepForm Form);
310 
311  /// Prepare for one chain \p BucketChain, find the best base element and
312  /// update all other elements in \p BucketChain accordingly.
313  /// If success, best base element must be stored as the first element of
314  /// \p BucketChain.
315  /// Return false if no base element found, otherwise return true.
316  bool prepareBaseForUpdateFormChain(Bucket &BucketChain);
317 
318  /// Rewrite load/store instructions in \p BucketChain according to
319  /// preparation.
320  bool rewriteLoadStores(Loop *L, Bucket &BucketChain,
321  SmallSet<BasicBlock *, 16> &BBChanged,
322  PrepForm Form);
323 
324  /// Rewrite for the base load/store of a chain.
325  std::pair<Instruction *, Instruction *>
326  rewriteForBase(Loop *L, const SCEVAddRecExpr *BasePtrSCEV,
327  Instruction *BaseMemI, bool CanPreInc, PrepForm Form,
328  SCEVExpander &SCEVE, SmallPtrSet<Value *, 16> &DeletedPtrs);
329 
330  /// Rewrite for the other load/stores of a chain according to the new \p
331  /// Base.
332  Instruction *
333  rewriteForBucketElement(std::pair<Instruction *, Instruction *> Base,
334  const BucketElement &Element, Value *OffToBase,
335  SmallPtrSet<Value *, 16> &DeletedPtrs);
336  };
337 
338 } // end anonymous namespace
339 
340 char PPCLoopInstrFormPrep::ID = 0;
341 static const char *name = "Prepare loop for ppc preferred instruction forms";
342 INITIALIZE_PASS_BEGIN(PPCLoopInstrFormPrep, DEBUG_TYPE, name, false, false)
345 INITIALIZE_PASS_END(PPCLoopInstrFormPrep, DEBUG_TYPE, name, false, false)
346 
347 static constexpr StringRef PHINodeNameSuffix = ".phi";
348 static constexpr StringRef CastNodeNameSuffix = ".cast";
349 static constexpr StringRef GEPNodeIncNameSuffix = ".inc";
350 static constexpr StringRef GEPNodeOffNameSuffix = ".off";
351 
353  return new PPCLoopInstrFormPrep(TM);
354 }
355 
356 static bool IsPtrInBounds(Value *BasePtr) {
357  Value *StrippedBasePtr = BasePtr;
358  while (BitCastInst *BC = dyn_cast<BitCastInst>(StrippedBasePtr))
359  StrippedBasePtr = BC->getOperand(0);
360  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(StrippedBasePtr))
361  return GEP->isInBounds();
362 
363  return false;
364 }
365 
366 static std::string getInstrName(const Value *I, StringRef Suffix) {
367  assert(I && "Invalid paramater!");
368  if (I->hasName())
369  return (I->getName() + Suffix).str();
370  else
371  return "";
372 }
373 
375  Type **PtrElementType = nullptr) {
376 
377  Value *PtrValue = nullptr;
378  Type *PointerElementType = nullptr;
379 
380  if (LoadInst *LMemI = dyn_cast<LoadInst>(MemI)) {
381  PtrValue = LMemI->getPointerOperand();
382  PointerElementType = LMemI->getType();
383  } else if (StoreInst *SMemI = dyn_cast<StoreInst>(MemI)) {
384  PtrValue = SMemI->getPointerOperand();
385  PointerElementType = SMemI->getValueOperand()->getType();
386  } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(MemI)) {
387  PointerElementType = Type::getInt8Ty(MemI->getContext());
388  if (IMemI->getIntrinsicID() == Intrinsic::prefetch ||
389  IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp) {
390  PtrValue = IMemI->getArgOperand(0);
391  } else if (IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp) {
392  PtrValue = IMemI->getArgOperand(1);
393  }
394  }
395  /*Get ElementType if PtrElementType is not null.*/
396  if (PtrElementType)
397  *PtrElementType = PointerElementType;
398 
399  return PtrValue;
400 }
401 
403  if (skipFunction(F))
404  return false;
405 
406  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
407  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
408  auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
409  DT = DTWP ? &DTWP->getDomTree() : nullptr;
410  PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
411  ST = TM ? TM->getSubtargetImpl(F) : nullptr;
412  SuccPrepCount = 0;
413 
414  bool MadeChange = false;
415 
416  for (Loop *I : *LI)
417  for (Loop *L : depth_first(I))
418  MadeChange |= runOnLoop(L);
419 
420  return MadeChange;
421 }
422 
423 // Finding the minimal(chain_number + reusable_offset_number) is a complicated
424 // algorithmic problem.
425 // For now, the algorithm used here is simply adjusted to handle the case for
426 // manually unrolling cases.
427 // FIXME: use a more powerful algorithm to find minimal sum of chain_number and
428 // reusable_offset_number for one base with multiple offsets.
429 bool PPCLoopInstrFormPrep::prepareBasesForCommoningChains(Bucket &CBucket) {
430  // The minimal size for profitable chain commoning:
431  // A1 = base + offset1
432  // A2 = base + offset2 (offset2 - offset1 = X)
433  // A3 = base + offset3
434  // A4 = base + offset4 (offset4 - offset3 = X)
435  // ======>
436  // base1 = base + offset1
437  // base2 = base + offset3
438  // A1 = base1
439  // A2 = base1 + X
440  // A3 = base2
441  // A4 = base2 + X
442  //
443  // There is benefit because of reuse of offest 'X'.
444 
446  "Thredhold can not be smaller than 4!\n");
447  if (CBucket.Elements.size() < ChainCommonPrepMinThreshold)
448  return false;
449 
450  // We simply select the FirstOffset as the first reusable offset between each
451  // chain element 1 and element 0.
452  const SCEV *FirstOffset = CBucket.Elements[1].Offset;
453 
454  // Figure out how many times above FirstOffset is used in the chain.
455  // For a success commoning chain candidate, offset difference between each
456  // chain element 1 and element 0 must be also FirstOffset.
457  unsigned FirstOffsetReusedCount = 1;
458 
459  // Figure out how many times above FirstOffset is used in the first chain.
460  // Chain number is FirstOffsetReusedCount / FirstOffsetReusedCountInFirstChain
461  unsigned FirstOffsetReusedCountInFirstChain = 1;
462 
463  unsigned EleNum = CBucket.Elements.size();
464  bool SawChainSeparater = false;
465  for (unsigned j = 2; j != EleNum; ++j) {
466  if (SE->getMinusSCEV(CBucket.Elements[j].Offset,
467  CBucket.Elements[j - 1].Offset) == FirstOffset) {
468  if (!SawChainSeparater)
469  FirstOffsetReusedCountInFirstChain++;
470  FirstOffsetReusedCount++;
471  } else
472  // For now, if we meet any offset which is not FirstOffset, we assume we
473  // find a new Chain.
474  // This makes us miss some opportunities.
475  // For example, we can common:
476  //
477  // {OffsetA, Offset A, OffsetB, OffsetA, OffsetA, OffsetB}
478  //
479  // as two chains:
480  // {{OffsetA, Offset A, OffsetB}, {OffsetA, OffsetA, OffsetB}}
481  // FirstOffsetReusedCount = 4; FirstOffsetReusedCountInFirstChain = 2
482  //
483  // But we fail to common:
484  //
485  // {OffsetA, OffsetB, OffsetA, OffsetA, OffsetB, OffsetA}
486  // FirstOffsetReusedCount = 4; FirstOffsetReusedCountInFirstChain = 1
487 
488  SawChainSeparater = true;
489  }
490 
491  // FirstOffset is not reused, skip this bucket.
492  if (FirstOffsetReusedCount == 1)
493  return false;
494 
495  unsigned ChainNum =
496  FirstOffsetReusedCount / FirstOffsetReusedCountInFirstChain;
497 
498  // All elements are increased by FirstOffset.
499  // The number of chains should be sqrt(EleNum).
500  if (!SawChainSeparater)
501  ChainNum = (unsigned)sqrt((double)EleNum);
502 
503  CBucket.ChainSize = (unsigned)(EleNum / ChainNum);
504 
505  // If this is not a perfect chain(eg: not all elements can be put inside
506  // commoning chains.), skip now.
507  if (CBucket.ChainSize * ChainNum != EleNum)
508  return false;
509 
510  if (SawChainSeparater) {
511  // Check that the offset seqs are the same for all chains.
512  for (unsigned i = 1; i < CBucket.ChainSize; i++)
513  for (unsigned j = 1; j < ChainNum; j++)
514  if (CBucket.Elements[i].Offset !=
515  SE->getMinusSCEV(CBucket.Elements[i + j * CBucket.ChainSize].Offset,
516  CBucket.Elements[j * CBucket.ChainSize].Offset))
517  return false;
518  }
519 
520  for (unsigned i = 0; i < ChainNum; i++)
521  CBucket.ChainBases.push_back(CBucket.Elements[i * CBucket.ChainSize]);
522 
523  LLVM_DEBUG(dbgs() << "Bucket has " << ChainNum << " chains.\n");
524 
525  return true;
526 }
527 
528 bool PPCLoopInstrFormPrep::chainCommoning(Loop *L,
529  SmallVector<Bucket, 16> &Buckets) {
530  bool MadeChange = false;
531 
532  if (Buckets.empty())
533  return MadeChange;
534 
535  SmallSet<BasicBlock *, 16> BBChanged;
536 
537  for (auto &Bucket : Buckets) {
538  if (prepareBasesForCommoningChains(Bucket))
539  MadeChange |= rewriteLoadStoresForCommoningChains(L, Bucket, BBChanged);
540  }
541 
542  if (MadeChange)
543  for (auto *BB : BBChanged)
545  return MadeChange;
546 }
547 
548 bool PPCLoopInstrFormPrep::rewriteLoadStoresForCommoningChains(
549  Loop *L, Bucket &Bucket, SmallSet<BasicBlock *, 16> &BBChanged) {
550  bool MadeChange = false;
551 
552  assert(Bucket.Elements.size() ==
553  Bucket.ChainBases.size() * Bucket.ChainSize &&
554  "invalid bucket for chain commoning!\n");
555  SmallPtrSet<Value *, 16> DeletedPtrs;
556 
557  BasicBlock *Header = L->getHeader();
558  BasicBlock *LoopPredecessor = L->getLoopPredecessor();
559 
560  SCEVExpander SCEVE(*SE, Header->getModule()->getDataLayout(),
561  "loopprepare-chaincommon");
562 
563  for (unsigned ChainIdx = 0; ChainIdx < Bucket.ChainBases.size(); ++ChainIdx) {
564  unsigned BaseElemIdx = Bucket.ChainSize * ChainIdx;
565  const SCEV *BaseSCEV =
566  ChainIdx ? SE->getAddExpr(Bucket.BaseSCEV,
567  Bucket.Elements[BaseElemIdx].Offset)
568  : Bucket.BaseSCEV;
569  const SCEVAddRecExpr *BasePtrSCEV = cast<SCEVAddRecExpr>(BaseSCEV);
570 
571  // Make sure the base is able to expand.
572  if (!isSafeToExpand(BasePtrSCEV->getStart(), *SE))
573  return MadeChange;
574 
575  assert(BasePtrSCEV->isAffine() &&
576  "Invalid SCEV type for the base ptr for a candidate chain!\n");
577 
578  std::pair<Instruction *, Instruction *> Base = rewriteForBase(
579  L, BasePtrSCEV, Bucket.Elements[BaseElemIdx].Instr,
580  false /* CanPreInc */, ChainCommoning, SCEVE, DeletedPtrs);
581 
582  if (!Base.first || !Base.second)
583  return MadeChange;
584 
585  // Keep track of the replacement pointer values we've inserted so that we
586  // don't generate more pointer values than necessary.
587  SmallPtrSet<Value *, 16> NewPtrs;
588  NewPtrs.insert(Base.first);
589 
590  for (unsigned Idx = BaseElemIdx + 1; Idx < BaseElemIdx + Bucket.ChainSize;
591  ++Idx) {
592  BucketElement &I = Bucket.Elements[Idx];
593  Value *Ptr = getPointerOperandAndType(I.Instr);
594  assert(Ptr && "No pointer operand");
595  if (NewPtrs.count(Ptr))
596  continue;
597 
598  const SCEV *OffsetSCEV =
599  BaseElemIdx ? SE->getMinusSCEV(Bucket.Elements[Idx].Offset,
600  Bucket.Elements[BaseElemIdx].Offset)
601  : Bucket.Elements[Idx].Offset;
602 
603  // Make sure offset is able to expand. Only need to check one time as the
604  // offsets are reused between different chains.
605  if (!BaseElemIdx)
606  if (!isSafeToExpand(OffsetSCEV, *SE))
607  return false;
608 
609  Value *OffsetValue = SCEVE.expandCodeFor(
610  OffsetSCEV, OffsetSCEV->getType(), LoopPredecessor->getTerminator());
611 
612  Instruction *NewPtr = rewriteForBucketElement(Base, Bucket.Elements[Idx],
613  OffsetValue, DeletedPtrs);
614 
615  assert(NewPtr && "Wrong rewrite!\n");
616  NewPtrs.insert(NewPtr);
617  }
618 
619  ++ChainCommoningRewritten;
620  }
621 
622  // Clear the rewriter cache, because values that are in the rewriter's cache
623  // can be deleted below, causing the AssertingVH in the cache to trigger.
624  SCEVE.clear();
625 
626  for (auto *Ptr : DeletedPtrs) {
627  if (Instruction *IDel = dyn_cast<Instruction>(Ptr))
628  BBChanged.insert(IDel->getParent());
630  }
631 
632  MadeChange = true;
633  return MadeChange;
634 }
635 
636 // Rewrite the new base according to BasePtrSCEV.
637 // bb.loop.preheader:
638 // %newstart = ...
639 // bb.loop.body:
640 // %phinode = phi [ %newstart, %bb.loop.preheader ], [ %add, %bb.loop.body ]
641 // ...
642 // %add = getelementptr %phinode, %inc
643 //
644 // First returned instruciton is %phinode (or a type cast to %phinode), caller
645 // needs this value to rewrite other load/stores in the same chain.
646 // Second returned instruction is %add, caller needs this value to rewrite other
647 // load/stores in the same chain.
648 std::pair<Instruction *, Instruction *>
649 PPCLoopInstrFormPrep::rewriteForBase(Loop *L, const SCEVAddRecExpr *BasePtrSCEV,
650  Instruction *BaseMemI, bool CanPreInc,
651  PrepForm Form, SCEVExpander &SCEVE,
652  SmallPtrSet<Value *, 16> &DeletedPtrs) {
653 
654  LLVM_DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n");
655 
656  assert(BasePtrSCEV->getLoop() == L && "AddRec for the wrong loop?");
657 
659  assert(BasePtr && "No pointer operand");
660 
661  Type *I8Ty = Type::getInt8Ty(BaseMemI->getParent()->getContext());
662  Type *I8PtrTy =
663  Type::getInt8PtrTy(BaseMemI->getParent()->getContext(),
664  BasePtr->getType()->getPointerAddressSpace());
665 
666  bool IsConstantInc = false;
667  const SCEV *BasePtrIncSCEV = BasePtrSCEV->getStepRecurrence(*SE);
668  Value *IncNode = getNodeForInc(L, BaseMemI, BasePtrIncSCEV);
669 
670  const SCEVConstant *BasePtrIncConstantSCEV =
671  dyn_cast<SCEVConstant>(BasePtrIncSCEV);
672  if (BasePtrIncConstantSCEV)
673  IsConstantInc = true;
674 
675  // No valid representation for the increment.
676  if (!IncNode) {
677  LLVM_DEBUG(dbgs() << "Loop Increasement can not be represented!\n");
678  return std::make_pair(nullptr, nullptr);
679  }
680 
681  if (Form == UpdateForm && !IsConstantInc && !EnableUpdateFormForNonConstInc) {
682  LLVM_DEBUG(
683  dbgs()
684  << "Update form prepare for non-const increment is not enabled!\n");
685  return std::make_pair(nullptr, nullptr);
686  }
687 
688  const SCEV *BasePtrStartSCEV = nullptr;
689  if (CanPreInc) {
690  assert(SE->isLoopInvariant(BasePtrIncSCEV, L) &&
691  "Increment is not loop invariant!\n");
692  BasePtrStartSCEV = SE->getMinusSCEV(BasePtrSCEV->getStart(),
693  IsConstantInc ? BasePtrIncConstantSCEV
694  : BasePtrIncSCEV);
695  } else
696  BasePtrStartSCEV = BasePtrSCEV->getStart();
697 
698  if (alreadyPrepared(L, BaseMemI, BasePtrStartSCEV, BasePtrIncSCEV, Form)) {
699  LLVM_DEBUG(dbgs() << "Instruction form is already prepared!\n");
700  return std::make_pair(nullptr, nullptr);
701  }
702 
703  LLVM_DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n");
704 
705  BasicBlock *Header = L->getHeader();
706  unsigned HeaderLoopPredCount = pred_size(Header);
707  BasicBlock *LoopPredecessor = L->getLoopPredecessor();
708 
709  PHINode *NewPHI = PHINode::Create(I8PtrTy, HeaderLoopPredCount,
710  getInstrName(BaseMemI, PHINodeNameSuffix),
711  Header->getFirstNonPHI());
712 
713  Value *BasePtrStart = SCEVE.expandCodeFor(BasePtrStartSCEV, I8PtrTy,
714  LoopPredecessor->getTerminator());
715 
716  // Note that LoopPredecessor might occur in the predecessor list multiple
717  // times, and we need to add it the right number of times.
718  for (auto PI : predecessors(Header)) {
719  if (PI != LoopPredecessor)
720  continue;
721 
722  NewPHI->addIncoming(BasePtrStart, LoopPredecessor);
723  }
724 
725  Instruction *PtrInc = nullptr;
726  Instruction *NewBasePtr = nullptr;
727  if (CanPreInc) {
728  Instruction *InsPoint = &*Header->getFirstInsertionPt();
729  PtrInc = GetElementPtrInst::Create(
730  I8Ty, NewPHI, IncNode, getInstrName(BaseMemI, GEPNodeIncNameSuffix),
731  InsPoint);
732  cast<GetElementPtrInst>(PtrInc)->setIsInBounds(IsPtrInBounds(BasePtr));
733  for (auto PI : predecessors(Header)) {
734  if (PI == LoopPredecessor)
735  continue;
736 
737  NewPHI->addIncoming(PtrInc, PI);
738  }
739  if (PtrInc->getType() != BasePtr->getType())
740  NewBasePtr =
741  new BitCastInst(PtrInc, BasePtr->getType(),
742  getInstrName(PtrInc, CastNodeNameSuffix), InsPoint);
743  else
744  NewBasePtr = PtrInc;
745  } else {
746  // Note that LoopPredecessor might occur in the predecessor list multiple
747  // times, and we need to make sure no more incoming value for them in PHI.
748  for (auto PI : predecessors(Header)) {
749  if (PI == LoopPredecessor)
750  continue;
751 
752  // For the latch predecessor, we need to insert a GEP just before the
753  // terminator to increase the address.
754  BasicBlock *BB = PI;
755  Instruction *InsPoint = BB->getTerminator();
756  PtrInc = GetElementPtrInst::Create(
757  I8Ty, NewPHI, IncNode, getInstrName(BaseMemI, GEPNodeIncNameSuffix),
758  InsPoint);
759  cast<GetElementPtrInst>(PtrInc)->setIsInBounds(IsPtrInBounds(BasePtr));
760 
761  NewPHI->addIncoming(PtrInc, PI);
762  }
763  PtrInc = NewPHI;
764  if (NewPHI->getType() != BasePtr->getType())
765  NewBasePtr = new BitCastInst(NewPHI, BasePtr->getType(),
767  &*Header->getFirstInsertionPt());
768  else
769  NewBasePtr = NewPHI;
770  }
771 
772  BasePtr->replaceAllUsesWith(NewBasePtr);
773 
774  DeletedPtrs.insert(BasePtr);
775 
776  return std::make_pair(NewBasePtr, PtrInc);
777 }
778 
779 Instruction *PPCLoopInstrFormPrep::rewriteForBucketElement(
780  std::pair<Instruction *, Instruction *> Base, const BucketElement &Element,
781  Value *OffToBase, SmallPtrSet<Value *, 16> &DeletedPtrs) {
782  Instruction *NewBasePtr = Base.first;
783  Instruction *PtrInc = Base.second;
784  assert((NewBasePtr && PtrInc) && "base does not exist!\n");
785 
786  Type *I8Ty = Type::getInt8Ty(PtrInc->getParent()->getContext());
787 
788  Value *Ptr = getPointerOperandAndType(Element.Instr);
789  assert(Ptr && "No pointer operand");
790 
791  Instruction *RealNewPtr;
792  if (!Element.Offset ||
793  (isa<SCEVConstant>(Element.Offset) &&
794  cast<SCEVConstant>(Element.Offset)->getValue()->isZero())) {
795  RealNewPtr = NewBasePtr;
796  } else {
797  Instruction *PtrIP = dyn_cast<Instruction>(Ptr);
798  if (PtrIP && isa<Instruction>(NewBasePtr) &&
799  cast<Instruction>(NewBasePtr)->getParent() == PtrIP->getParent())
800  PtrIP = nullptr;
801  else if (PtrIP && isa<PHINode>(PtrIP))
802  PtrIP = &*PtrIP->getParent()->getFirstInsertionPt();
803  else if (!PtrIP)
804  PtrIP = Element.Instr;
805 
806  assert(OffToBase && "There should be an offset for non base element!\n");
808  I8Ty, PtrInc, OffToBase,
809  getInstrName(Element.Instr, GEPNodeOffNameSuffix), PtrIP);
810  if (!PtrIP)
811  NewPtr->insertAfter(cast<Instruction>(PtrInc));
812  NewPtr->setIsInBounds(IsPtrInBounds(Ptr));
813  RealNewPtr = NewPtr;
814  }
815 
816  Instruction *ReplNewPtr;
817  if (Ptr->getType() != RealNewPtr->getType()) {
818  ReplNewPtr = new BitCastInst(RealNewPtr, Ptr->getType(),
820  ReplNewPtr->insertAfter(RealNewPtr);
821  } else
822  ReplNewPtr = RealNewPtr;
823 
824  Ptr->replaceAllUsesWith(ReplNewPtr);
825  DeletedPtrs.insert(Ptr);
826 
827  return ReplNewPtr;
828 }
829 
830 void PPCLoopInstrFormPrep::addOneCandidate(
831  Instruction *MemI, const SCEV *LSCEV, SmallVector<Bucket, 16> &Buckets,
832  std::function<bool(const SCEV *)> isValidDiff, unsigned MaxCandidateNum) {
833  assert((MemI && getPointerOperandAndType(MemI)) &&
834  "Candidate should be a memory instruction.");
835  assert(LSCEV && "Invalid SCEV for Ptr value.");
836 
837  bool FoundBucket = false;
838  for (auto &B : Buckets) {
839  if (cast<SCEVAddRecExpr>(B.BaseSCEV)->getStepRecurrence(*SE) !=
840  cast<SCEVAddRecExpr>(LSCEV)->getStepRecurrence(*SE))
841  continue;
842  const SCEV *Diff = SE->getMinusSCEV(LSCEV, B.BaseSCEV);
843  if (isValidDiff(Diff)) {
844  B.Elements.push_back(BucketElement(Diff, MemI));
845  FoundBucket = true;
846  break;
847  }
848  }
849 
850  if (!FoundBucket) {
851  if (Buckets.size() == MaxCandidateNum) {
852  LLVM_DEBUG(dbgs() << "Can not prepare more chains, reach maximum limit "
853  << MaxCandidateNum << "\n");
854  return;
855  }
856  Buckets.push_back(Bucket(LSCEV, MemI));
857  }
858 }
859 
860 SmallVector<Bucket, 16> PPCLoopInstrFormPrep::collectCandidates(
861  Loop *L,
862  std::function<bool(const Instruction *, Value *, const Type *)>
863  isValidCandidate,
864  std::function<bool(const SCEV *)> isValidDiff, unsigned MaxCandidateNum) {
865  SmallVector<Bucket, 16> Buckets;
866 
867  for (const auto &BB : L->blocks())
868  for (auto &J : *BB) {
869  Value *PtrValue = nullptr;
870  Type *PointerElementType = nullptr;
871  PtrValue = getPointerOperandAndType(&J, &PointerElementType);
872 
873  if (!PtrValue)
874  continue;
875 
876  if (PtrValue->getType()->getPointerAddressSpace())
877  continue;
878 
879  if (L->isLoopInvariant(PtrValue))
880  continue;
881 
882  const SCEV *LSCEV = SE->getSCEVAtScope(PtrValue, L);
883  const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV);
884  if (!LARSCEV || LARSCEV->getLoop() != L)
885  continue;
886 
887  // Mark that we have candidates for preparing.
888  HasCandidateForPrepare = true;
889 
890  if (isValidCandidate(&J, PtrValue, PointerElementType))
891  addOneCandidate(&J, LSCEV, Buckets, isValidDiff, MaxCandidateNum);
892  }
893  return Buckets;
894 }
895 
896 bool PPCLoopInstrFormPrep::prepareBaseForDispFormChain(Bucket &BucketChain,
897  PrepForm Form) {
898  // RemainderOffsetInfo details:
899  // key: value of (Offset urem DispConstraint). For DSForm, it can
900  // be [0, 4).
901  // first of pair: the index of first BucketElement whose remainder is equal
902  // to key. For key 0, this value must be 0.
903  // second of pair: number of load/stores with the same remainder.
905 
906  for (unsigned j = 0, je = BucketChain.Elements.size(); j != je; ++j) {
907  if (!BucketChain.Elements[j].Offset)
908  RemainderOffsetInfo[0] = std::make_pair(0, 1);
909  else {
910  unsigned Remainder = cast<SCEVConstant>(BucketChain.Elements[j].Offset)
911  ->getAPInt()
912  .urem(Form);
913  if (RemainderOffsetInfo.find(Remainder) == RemainderOffsetInfo.end())
914  RemainderOffsetInfo[Remainder] = std::make_pair(j, 1);
915  else
916  RemainderOffsetInfo[Remainder].second++;
917  }
918  }
919  // Currently we choose the most profitable base as the one which has the max
920  // number of load/store with same remainder.
921  // FIXME: adjust the base selection strategy according to load/store offset
922  // distribution.
923  // For example, if we have one candidate chain for DS form preparation, which
924  // contains following load/stores with different remainders:
925  // 1: 10 load/store whose remainder is 1;
926  // 2: 9 load/store whose remainder is 2;
927  // 3: 1 for remainder 3 and 0 for remainder 0;
928  // Now we will choose the first load/store whose remainder is 1 as base and
929  // adjust all other load/stores according to new base, so we will get 10 DS
930  // form and 10 X form.
931  // But we should be more clever, for this case we could use two bases, one for
932  // remainder 1 and the other for remainder 2, thus we could get 19 DS form and
933  // 1 X form.
934  unsigned MaxCountRemainder = 0;
935  for (unsigned j = 0; j < (unsigned)Form; j++)
936  if ((RemainderOffsetInfo.find(j) != RemainderOffsetInfo.end()) &&
937  RemainderOffsetInfo[j].second >
938  RemainderOffsetInfo[MaxCountRemainder].second)
939  MaxCountRemainder = j;
940 
941  // Abort when there are too few insts with common base.
942  if (RemainderOffsetInfo[MaxCountRemainder].second < DispFormPrepMinThreshold)
943  return false;
944 
945  // If the first value is most profitable, no needed to adjust BucketChain
946  // elements as they are substracted the first value when collecting.
947  if (MaxCountRemainder == 0)
948  return true;
949 
950  // Adjust load/store to the new chosen base.
951  const SCEV *Offset =
952  BucketChain.Elements[RemainderOffsetInfo[MaxCountRemainder].first].Offset;
953  BucketChain.BaseSCEV = SE->getAddExpr(BucketChain.BaseSCEV, Offset);
954  for (auto &E : BucketChain.Elements) {
955  if (E.Offset)
956  E.Offset = cast<SCEVConstant>(SE->getMinusSCEV(E.Offset, Offset));
957  else
958  E.Offset = cast<SCEVConstant>(SE->getNegativeSCEV(Offset));
959  }
960 
961  std::swap(BucketChain.Elements[RemainderOffsetInfo[MaxCountRemainder].first],
962  BucketChain.Elements[0]);
963  return true;
964 }
965 
966 // FIXME: implement a more clever base choosing policy.
967 // Currently we always choose an exist load/store offset. This maybe lead to
968 // suboptimal code sequences. For example, for one DS chain with offsets
969 // {-32769, 2003, 2007, 2011}, we choose -32769 as base offset, and left disp
970 // for load/stores are {0, 34772, 34776, 34780}. Though each offset now is a
971 // multipler of 4, it cannot be represented by sint16.
972 bool PPCLoopInstrFormPrep::prepareBaseForUpdateFormChain(Bucket &BucketChain) {
973  // We have a choice now of which instruction's memory operand we use as the
974  // base for the generated PHI. Always picking the first instruction in each
975  // bucket does not work well, specifically because that instruction might
976  // be a prefetch (and there are no pre-increment dcbt variants). Otherwise,
977  // the choice is somewhat arbitrary, because the backend will happily
978  // generate direct offsets from both the pre-incremented and
979  // post-incremented pointer values. Thus, we'll pick the first non-prefetch
980  // instruction in each bucket, and adjust the recurrence and other offsets
981  // accordingly.
982  for (int j = 0, je = BucketChain.Elements.size(); j != je; ++j) {
983  if (auto *II = dyn_cast<IntrinsicInst>(BucketChain.Elements[j].Instr))
984  if (II->getIntrinsicID() == Intrinsic::prefetch)
985  continue;
986 
987  // If we'd otherwise pick the first element anyway, there's nothing to do.
988  if (j == 0)
989  break;
990 
991  // If our chosen element has no offset from the base pointer, there's
992  // nothing to do.
993  if (!BucketChain.Elements[j].Offset ||
994  cast<SCEVConstant>(BucketChain.Elements[j].Offset)->isZero())
995  break;
996 
997  const SCEV *Offset = BucketChain.Elements[j].Offset;
998  BucketChain.BaseSCEV = SE->getAddExpr(BucketChain.BaseSCEV, Offset);
999  for (auto &E : BucketChain.Elements) {
1000  if (E.Offset)
1001  E.Offset = cast<SCEVConstant>(SE->getMinusSCEV(E.Offset, Offset));
1002  else
1003  E.Offset = cast<SCEVConstant>(SE->getNegativeSCEV(Offset));
1004  }
1005 
1006  std::swap(BucketChain.Elements[j], BucketChain.Elements[0]);
1007  break;
1008  }
1009  return true;
1010 }
1011 
1012 bool PPCLoopInstrFormPrep::rewriteLoadStores(
1013  Loop *L, Bucket &BucketChain, SmallSet<BasicBlock *, 16> &BBChanged,
1014  PrepForm Form) {
1015  bool MadeChange = false;
1016 
1017  const SCEVAddRecExpr *BasePtrSCEV =
1018  cast<SCEVAddRecExpr>(BucketChain.BaseSCEV);
1019  if (!BasePtrSCEV->isAffine())
1020  return MadeChange;
1021 
1022  if (!isSafeToExpand(BasePtrSCEV->getStart(), *SE))
1023  return MadeChange;
1024 
1025  SmallPtrSet<Value *, 16> DeletedPtrs;
1026 
1027  BasicBlock *Header = L->getHeader();
1028  SCEVExpander SCEVE(*SE, Header->getModule()->getDataLayout(),
1029  "loopprepare-formrewrite");
1030 
1031  // For some DS form load/store instructions, it can also be an update form,
1032  // if the stride is constant and is a multipler of 4. Use update form if
1033  // prefer it.
1034  bool CanPreInc = (Form == UpdateForm ||
1035  ((Form == DSForm) &&
1036  isa<SCEVConstant>(BasePtrSCEV->getStepRecurrence(*SE)) &&
1037  !cast<SCEVConstant>(BasePtrSCEV->getStepRecurrence(*SE))
1038  ->getAPInt()
1039  .urem(4) &&
1040  PreferUpdateForm));
1041 
1042  std::pair<Instruction *, Instruction *> Base =
1043  rewriteForBase(L, BasePtrSCEV, BucketChain.Elements.begin()->Instr,
1044  CanPreInc, Form, SCEVE, DeletedPtrs);
1045 
1046  if (!Base.first || !Base.second)
1047  return MadeChange;
1048 
1049  // Keep track of the replacement pointer values we've inserted so that we
1050  // don't generate more pointer values than necessary.
1051  SmallPtrSet<Value *, 16> NewPtrs;
1052  NewPtrs.insert(Base.first);
1053 
1054  for (auto I = std::next(BucketChain.Elements.begin()),
1055  IE = BucketChain.Elements.end(); I != IE; ++I) {
1056  Value *Ptr = getPointerOperandAndType(I->Instr);
1057  assert(Ptr && "No pointer operand");
1058  if (NewPtrs.count(Ptr))
1059  continue;
1060 
1061  Instruction *NewPtr = rewriteForBucketElement(
1062  Base, *I,
1063  I->Offset ? cast<SCEVConstant>(I->Offset)->getValue() : nullptr,
1064  DeletedPtrs);
1065  assert(NewPtr && "wrong rewrite!\n");
1066  NewPtrs.insert(NewPtr);
1067  }
1068 
1069  // Clear the rewriter cache, because values that are in the rewriter's cache
1070  // can be deleted below, causing the AssertingVH in the cache to trigger.
1071  SCEVE.clear();
1072 
1073  for (auto *Ptr : DeletedPtrs) {
1074  if (Instruction *IDel = dyn_cast<Instruction>(Ptr))
1075  BBChanged.insert(IDel->getParent());
1077  }
1078 
1079  MadeChange = true;
1080 
1081  SuccPrepCount++;
1082 
1083  if (Form == DSForm && !CanPreInc)
1084  DSFormChainRewritten++;
1085  else if (Form == DQForm)
1086  DQFormChainRewritten++;
1087  else if (Form == UpdateForm || (Form == DSForm && CanPreInc))
1088  UpdFormChainRewritten++;
1089 
1090  return MadeChange;
1091 }
1092 
1093 bool PPCLoopInstrFormPrep::updateFormPrep(Loop *L,
1094  SmallVector<Bucket, 16> &Buckets) {
1095  bool MadeChange = false;
1096  if (Buckets.empty())
1097  return MadeChange;
1098  SmallSet<BasicBlock *, 16> BBChanged;
1099  for (auto &Bucket : Buckets)
1100  // The base address of each bucket is transformed into a phi and the others
1101  // are rewritten based on new base.
1102  if (prepareBaseForUpdateFormChain(Bucket))
1103  MadeChange |= rewriteLoadStores(L, Bucket, BBChanged, UpdateForm);
1104 
1105  if (MadeChange)
1106  for (auto *BB : BBChanged)
1107  DeleteDeadPHIs(BB);
1108  return MadeChange;
1109 }
1110 
1111 bool PPCLoopInstrFormPrep::dispFormPrep(Loop *L,
1112  SmallVector<Bucket, 16> &Buckets,
1113  PrepForm Form) {
1114  bool MadeChange = false;
1115 
1116  if (Buckets.empty())
1117  return MadeChange;
1118 
1119  SmallSet<BasicBlock *, 16> BBChanged;
1120  for (auto &Bucket : Buckets) {
1121  if (Bucket.Elements.size() < DispFormPrepMinThreshold)
1122  continue;
1123  if (prepareBaseForDispFormChain(Bucket, Form))
1124  MadeChange |= rewriteLoadStores(L, Bucket, BBChanged, Form);
1125  }
1126 
1127  if (MadeChange)
1128  for (auto *BB : BBChanged)
1129  DeleteDeadPHIs(BB);
1130  return MadeChange;
1131 }
1132 
1133 // Find the loop invariant increment node for SCEV BasePtrIncSCEV.
1134 // bb.loop.preheader:
1135 // %start = ...
1136 // bb.loop.body:
1137 // %phinode = phi [ %start, %bb.loop.preheader ], [ %add, %bb.loop.body ]
1138 // ...
1139 // %add = add %phinode, %inc ; %inc is what we want to get.
1140 //
1141 Value *PPCLoopInstrFormPrep::getNodeForInc(Loop *L, Instruction *MemI,
1142  const SCEV *BasePtrIncSCEV) {
1143  // If the increment is a constant, no definition is needed.
1144  // Return the value directly.
1145  if (isa<SCEVConstant>(BasePtrIncSCEV))
1146  return cast<SCEVConstant>(BasePtrIncSCEV)->getValue();
1147 
1148  if (!SE->isLoopInvariant(BasePtrIncSCEV, L))
1149  return nullptr;
1150 
1151  BasicBlock *BB = MemI->getParent();
1152  if (!BB)
1153  return nullptr;
1154 
1155  BasicBlock *LatchBB = L->getLoopLatch();
1156 
1157  if (!LatchBB)
1158  return nullptr;
1159 
1160  // Run through the PHIs and check their operands to find valid representation
1161  // for the increment SCEV.
1162  iterator_range<BasicBlock::phi_iterator> PHIIter = BB->phis();
1163  for (auto &CurrentPHI : PHIIter) {
1164  PHINode *CurrentPHINode = dyn_cast<PHINode>(&CurrentPHI);
1165  if (!CurrentPHINode)
1166  continue;
1167 
1168  if (!SE->isSCEVable(CurrentPHINode->getType()))
1169  continue;
1170 
1171  const SCEV *PHISCEV = SE->getSCEVAtScope(CurrentPHINode, L);
1172 
1173  const SCEVAddRecExpr *PHIBasePtrSCEV = dyn_cast<SCEVAddRecExpr>(PHISCEV);
1174  if (!PHIBasePtrSCEV)
1175  continue;
1176 
1177  const SCEV *PHIBasePtrIncSCEV = PHIBasePtrSCEV->getStepRecurrence(*SE);
1178 
1179  if (!PHIBasePtrIncSCEV || (PHIBasePtrIncSCEV != BasePtrIncSCEV))
1180  continue;
1181 
1182  // Get the incoming value from the loop latch and check if the value has
1183  // the add form with the required increment.
1184  if (Instruction *I = dyn_cast<Instruction>(
1185  CurrentPHINode->getIncomingValueForBlock(LatchBB))) {
1186  Value *StrippedBaseI = I;
1187  while (BitCastInst *BC = dyn_cast<BitCastInst>(StrippedBaseI))
1188  StrippedBaseI = BC->getOperand(0);
1189 
1190  Instruction *StrippedI = dyn_cast<Instruction>(StrippedBaseI);
1191  if (!StrippedI)
1192  continue;
1193 
1194  // LSR pass may add a getelementptr instruction to do the loop increment,
1195  // also search in that getelementptr instruction.
1196  if (StrippedI->getOpcode() == Instruction::Add ||
1197  (StrippedI->getOpcode() == Instruction::GetElementPtr &&
1198  StrippedI->getNumOperands() == 2)) {
1199  if (SE->getSCEVAtScope(StrippedI->getOperand(0), L) == BasePtrIncSCEV)
1200  return StrippedI->getOperand(0);
1201  if (SE->getSCEVAtScope(StrippedI->getOperand(1), L) == BasePtrIncSCEV)
1202  return StrippedI->getOperand(1);
1203  }
1204  }
1205  }
1206  return nullptr;
1207 }
1208 
1209 // In order to prepare for the preferred instruction form, a PHI is added.
1210 // This function will check to see if that PHI already exists and will return
1211 // true if it found an existing PHI with the matched start and increment as the
1212 // one we wanted to create.
1213 bool PPCLoopInstrFormPrep::alreadyPrepared(Loop *L, Instruction *MemI,
1214  const SCEV *BasePtrStartSCEV,
1215  const SCEV *BasePtrIncSCEV,
1216  PrepForm Form) {
1217  BasicBlock *BB = MemI->getParent();
1218  if (!BB)
1219  return false;
1220 
1221  BasicBlock *PredBB = L->getLoopPredecessor();
1222  BasicBlock *LatchBB = L->getLoopLatch();
1223 
1224  if (!PredBB || !LatchBB)
1225  return false;
1226 
1227  // Run through the PHIs and see if we have some that looks like a preparation
1228  iterator_range<BasicBlock::phi_iterator> PHIIter = BB->phis();
1229  for (auto & CurrentPHI : PHIIter) {
1230  PHINode *CurrentPHINode = dyn_cast<PHINode>(&CurrentPHI);
1231  if (!CurrentPHINode)
1232  continue;
1233 
1234  if (!SE->isSCEVable(CurrentPHINode->getType()))
1235  continue;
1236 
1237  const SCEV *PHISCEV = SE->getSCEVAtScope(CurrentPHINode, L);
1238 
1239  const SCEVAddRecExpr *PHIBasePtrSCEV = dyn_cast<SCEVAddRecExpr>(PHISCEV);
1240  if (!PHIBasePtrSCEV)
1241  continue;
1242 
1243  const SCEVConstant *PHIBasePtrIncSCEV =
1244  dyn_cast<SCEVConstant>(PHIBasePtrSCEV->getStepRecurrence(*SE));
1245  if (!PHIBasePtrIncSCEV)
1246  continue;
1247 
1248  if (CurrentPHINode->getNumIncomingValues() == 2) {
1249  if ((CurrentPHINode->getIncomingBlock(0) == LatchBB &&
1250  CurrentPHINode->getIncomingBlock(1) == PredBB) ||
1251  (CurrentPHINode->getIncomingBlock(1) == LatchBB &&
1252  CurrentPHINode->getIncomingBlock(0) == PredBB)) {
1253  if (PHIBasePtrIncSCEV == BasePtrIncSCEV) {
1254  // The existing PHI (CurrentPHINode) has the same start and increment
1255  // as the PHI that we wanted to create.
1256  if ((Form == UpdateForm || Form == ChainCommoning ) &&
1257  PHIBasePtrSCEV->getStart() == BasePtrStartSCEV) {
1258  ++PHINodeAlreadyExistsUpdate;
1259  return true;
1260  }
1261  if (Form == DSForm || Form == DQForm) {
1262  const SCEVConstant *Diff = dyn_cast<SCEVConstant>(
1263  SE->getMinusSCEV(PHIBasePtrSCEV->getStart(), BasePtrStartSCEV));
1264  if (Diff && !Diff->getAPInt().urem(Form)) {
1265  if (Form == DSForm)
1266  ++PHINodeAlreadyExistsDS;
1267  else
1268  ++PHINodeAlreadyExistsDQ;
1269  return true;
1270  }
1271  }
1272  }
1273  }
1274  }
1275  }
1276  return false;
1277 }
1278 
1279 bool PPCLoopInstrFormPrep::runOnLoop(Loop *L) {
1280  bool MadeChange = false;
1281 
1282  // Only prep. the inner-most loop
1283  if (!L->isInnermost())
1284  return MadeChange;
1285 
1286  // Return if already done enough preparation.
1287  if (SuccPrepCount >= MaxVarsPrep)
1288  return MadeChange;
1289 
1290  LLVM_DEBUG(dbgs() << "PIP: Examining: " << *L << "\n");
1291 
1292  BasicBlock *LoopPredecessor = L->getLoopPredecessor();
1293  // If there is no loop predecessor, or the loop predecessor's terminator
1294  // returns a value (which might contribute to determining the loop's
1295  // iteration space), insert a new preheader for the loop.
1296  if (!LoopPredecessor ||
1297  !LoopPredecessor->getTerminator()->getType()->isVoidTy()) {
1298  LoopPredecessor = InsertPreheaderForLoop(L, DT, LI, nullptr, PreserveLCSSA);
1299  if (LoopPredecessor)
1300  MadeChange = true;
1301  }
1302  if (!LoopPredecessor) {
1303  LLVM_DEBUG(dbgs() << "PIP fails since no predecessor for current loop.\n");
1304  return MadeChange;
1305  }
1306  // Check if a load/store has update form. This lambda is used by function
1307  // collectCandidates which can collect candidates for types defined by lambda.
1308  auto isUpdateFormCandidate = [&](const Instruction *I, Value *PtrValue,
1309  const Type *PointerElementType) {
1310  assert((PtrValue && I) && "Invalid parameter!");
1311  // There are no update forms for Altivec vector load/stores.
1312  if (ST && ST->hasAltivec() && PointerElementType->isVectorTy())
1313  return false;
1314  // There are no update forms for P10 lxvp/stxvp intrinsic.
1315  auto *II = dyn_cast<IntrinsicInst>(I);
1316  if (II && ((II->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp) ||
1317  II->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp))
1318  return false;
1319  // See getPreIndexedAddressParts, the displacement for LDU/STDU has to
1320  // be 4's multiple (DS-form). For i64 loads/stores when the displacement
1321  // fits in a 16-bit signed field but isn't a multiple of 4, it will be
1322  // useless and possible to break some original well-form addressing mode
1323  // to make this pre-inc prep for it.
1324  if (PointerElementType->isIntegerTy(64)) {
1325  const SCEV *LSCEV = SE->getSCEVAtScope(const_cast<Value *>(PtrValue), L);
1326  const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV);
1327  if (!LARSCEV || LARSCEV->getLoop() != L)
1328  return false;
1329  if (const SCEVConstant *StepConst =
1330  dyn_cast<SCEVConstant>(LARSCEV->getStepRecurrence(*SE))) {
1331  const APInt &ConstInt = StepConst->getValue()->getValue();
1332  if (ConstInt.isSignedIntN(16) && ConstInt.srem(4) != 0)
1333  return false;
1334  }
1335  }
1336  return true;
1337  };
1338 
1339  // Check if a load/store has DS form.
1340  auto isDSFormCandidate = [](const Instruction *I, Value *PtrValue,
1341  const Type *PointerElementType) {
1342  assert((PtrValue && I) && "Invalid parameter!");
1343  if (isa<IntrinsicInst>(I))
1344  return false;
1345  return (PointerElementType->isIntegerTy(64)) ||
1346  (PointerElementType->isFloatTy()) ||
1347  (PointerElementType->isDoubleTy()) ||
1348  (PointerElementType->isIntegerTy(32) &&
1349  llvm::any_of(I->users(),
1350  [](const User *U) { return isa<SExtInst>(U); }));
1351  };
1352 
1353  // Check if a load/store has DQ form.
1354  auto isDQFormCandidate = [&](const Instruction *I, Value *PtrValue,
1355  const Type *PointerElementType) {
1356  assert((PtrValue && I) && "Invalid parameter!");
1357  // Check if it is a P10 lxvp/stxvp intrinsic.
1358  auto *II = dyn_cast<IntrinsicInst>(I);
1359  if (II)
1360  return II->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp ||
1361  II->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp;
1362  // Check if it is a P9 vector load/store.
1363  return ST && ST->hasP9Vector() && (PointerElementType->isVectorTy());
1364  };
1365 
1366  // Check if a load/store is candidate for chain commoning.
1367  // If the SCEV is only with one ptr operand in its start, we can use that
1368  // start as a chain separator. Mark this load/store as a candidate.
1369  auto isChainCommoningCandidate = [&](const Instruction *I, Value *PtrValue,
1370  const Type *PointerElementType) {
1371  const SCEVAddRecExpr *ARSCEV =
1372  cast<SCEVAddRecExpr>(SE->getSCEVAtScope(PtrValue, L));
1373  if (!ARSCEV)
1374  return false;
1375 
1376  if (!ARSCEV->isAffine())
1377  return false;
1378 
1379  const SCEV *Start = ARSCEV->getStart();
1380 
1381  // A single pointer. We can treat it as offset 0.
1382  if (isa<SCEVUnknown>(Start) && Start->getType()->isPointerTy())
1383  return true;
1384 
1385  const SCEVAddExpr *ASCEV = dyn_cast<SCEVAddExpr>(Start);
1386 
1387  // We need a SCEVAddExpr to include both base and offset.
1388  if (!ASCEV)
1389  return false;
1390 
1391  // Make sure there is only one pointer operand(base) and all other operands
1392  // are integer type.
1393  bool SawPointer = false;
1394  for (const SCEV *Op : ASCEV->operands()) {
1395  if (Op->getType()->isPointerTy()) {
1396  if (SawPointer)
1397  return false;
1398  SawPointer = true;
1399  } else if (!Op->getType()->isIntegerTy())
1400  return false;
1401  }
1402 
1403  return SawPointer;
1404  };
1405 
1406  // Check if the diff is a constant type. This is used for update/DS/DQ form
1407  // preparation.
1408  auto isValidConstantDiff = [](const SCEV *Diff) {
1409  return dyn_cast<SCEVConstant>(Diff) != nullptr;
1410  };
1411 
1412  // Make sure the diff between the base and new candidate is required type.
1413  // This is used for chain commoning preparation.
1414  auto isValidChainCommoningDiff = [](const SCEV *Diff) {
1415  assert(Diff && "Invalid Diff!\n");
1416 
1417  // Don't mess up previous dform prepare.
1418  if (isa<SCEVConstant>(Diff))
1419  return false;
1420 
1421  // A single integer type offset.
1422  if (isa<SCEVUnknown>(Diff) && Diff->getType()->isIntegerTy())
1423  return true;
1424 
1425  const SCEVNAryExpr *ADiff = dyn_cast<SCEVNAryExpr>(Diff);
1426  if (!ADiff)
1427  return false;
1428 
1429  for (const SCEV *Op : ADiff->operands())
1430  if (!Op->getType()->isIntegerTy())
1431  return false;
1432 
1433  return true;
1434  };
1435 
1436  HasCandidateForPrepare = false;
1437 
1438  LLVM_DEBUG(dbgs() << "Start to prepare for update form.\n");
1439  // Collect buckets of comparable addresses used by loads and stores for update
1440  // form.
1441  SmallVector<Bucket, 16> UpdateFormBuckets = collectCandidates(
1442  L, isUpdateFormCandidate, isValidConstantDiff, MaxVarsUpdateForm);
1443 
1444  // Prepare for update form.
1445  if (!UpdateFormBuckets.empty())
1446  MadeChange |= updateFormPrep(L, UpdateFormBuckets);
1447  else if (!HasCandidateForPrepare) {
1448  LLVM_DEBUG(
1449  dbgs()
1450  << "No prepare candidates found, stop praparation for current loop!\n");
1451  // If no candidate for preparing, return early.
1452  return MadeChange;
1453  }
1454 
1455  LLVM_DEBUG(dbgs() << "Start to prepare for DS form.\n");
1456  // Collect buckets of comparable addresses used by loads and stores for DS
1457  // form.
1458  SmallVector<Bucket, 16> DSFormBuckets = collectCandidates(
1459  L, isDSFormCandidate, isValidConstantDiff, MaxVarsDSForm);
1460 
1461  // Prepare for DS form.
1462  if (!DSFormBuckets.empty())
1463  MadeChange |= dispFormPrep(L, DSFormBuckets, DSForm);
1464 
1465  LLVM_DEBUG(dbgs() << "Start to prepare for DQ form.\n");
1466  // Collect buckets of comparable addresses used by loads and stores for DQ
1467  // form.
1468  SmallVector<Bucket, 16> DQFormBuckets = collectCandidates(
1469  L, isDQFormCandidate, isValidConstantDiff, MaxVarsDQForm);
1470 
1471  // Prepare for DQ form.
1472  if (!DQFormBuckets.empty())
1473  MadeChange |= dispFormPrep(L, DQFormBuckets, DQForm);
1474 
1475  // Collect buckets of comparable addresses used by loads and stores for chain
1476  // commoning. With chain commoning, we reuse offsets between the chains, so
1477  // the register pressure will be reduced.
1478  if (!EnableChainCommoning) {
1479  LLVM_DEBUG(dbgs() << "Chain commoning is not enabled.\n");
1480  return MadeChange;
1481  }
1482 
1483  LLVM_DEBUG(dbgs() << "Start to prepare for chain commoning.\n");
1484  SmallVector<Bucket, 16> Buckets =
1485  collectCandidates(L, isChainCommoningCandidate, isValidChainCommoningDiff,
1487 
1488  // Prepare for chain commoning.
1489  if (!Buckets.empty())
1490  MadeChange |= chainCommoning(L, Buckets);
1491 
1492  return MadeChange;
1493 }
i
i
Definition: README.txt:29
llvm::RecursivelyDeleteTriviallyDeadInstructions
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:517
llvm::GetElementPtrInst::setIsInBounds
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
Definition: Instructions.cpp:1828
MaxVarsChainCommon
static cl::opt< unsigned > MaxVarsChainCommon("ppc-chaincommon-max-vars", cl::Hidden, cl::init(4), cl::desc("Bucket number per loop for PPC loop chain common"))
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:60
llvm::ScalarEvolution::getNegativeSCEV
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
Definition: ScalarEvolution.cpp:4429
GEPNodeIncNameSuffix
static constexpr StringRef GEPNodeIncNameSuffix
Definition: PPCLoopInstrFormPrep.cpp:349
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::isSafeToExpand
bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE, bool CanonicalMode=true)
Return true if the given expression is safe to expand in the sense that all materialized values are s...
Definition: ScalarEvolutionExpander.cpp:2626
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:291
llvm::SCEVAddRecExpr::isAffine
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
Definition: ScalarEvolutionExpressions.h:370
IntrinsicInst.h
ScalarEvolutionExpander.h
llvm::APInt::isSignedIntN
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:420
MaxVarsDQForm
static cl::opt< unsigned > MaxVarsDQForm("ppc-dqprep-max-vars", cl::Hidden, cl::init(8), cl::desc("Potential PHI threshold per loop for PPC loop prep of DQ form"))
Scalar.h
llvm::SCEVAddRecExpr::getStart
const SCEV * getStart() const
Definition: ScalarEvolutionExpressions.h:353
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
Pass.h
llvm::SCEVExpander
This class uses information about analyze scalars to rewrite expressions in canonical form.
Definition: ScalarEvolutionExpander.h:63
CastNodeNameSuffix
static constexpr StringRef CastNodeNameSuffix
Definition: PPCLoopInstrFormPrep.cpp:348
llvm::BitCastInst
This class represents a no-op cast from one type to another.
Definition: Instructions.h:5225
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
Statistic.h
llvm::LoopBase::getLoopPredecessor
BlockT * getLoopPredecessor() const
If the given loop's header has exactly one unique predecessor outside the loop, return it.
Definition: LoopInfoImpl.h:194
llvm::cast
decltype(auto) LLVM_NODISCARD cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition: Casting.h:572
llvm::dwarf::Form
Form
Definition: Dwarf.h:132
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:729
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
Local.h
llvm::createPPCLoopInstrFormPrepPass
FunctionPass * createPPCLoopInstrFormPrepPass(PPCTargetMachine &TM)
Definition: PPCLoopInstrFormPrep.cpp:352
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::SCEVExpander::expandCodeFor
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
Definition: ScalarEvolutionExpander.h:275
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
ScalarEvolution.h
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Module.h
getPointerOperandAndType
static Value * getPointerOperandAndType(Value *MemI, Type **PtrElementType=nullptr)
Definition: PPCLoopInstrFormPrep.cpp:374
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1271
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:136
PHINodeNameSuffix
static constexpr StringRef PHINodeNameSuffix
Definition: PPCLoopInstrFormPrep.cpp:347
llvm::SmallPtrSet< Value *, 16 >
EnableChainCommoning
static cl::opt< bool > EnableChainCommoning("ppc-formprep-chain-commoning", cl::init(false), cl::Hidden, cl::desc("Enable chain commoning in PPC loop prepare pass."))
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:237
llvm::SCEVNAryExpr::operands
op_range operands() const
Definition: ScalarEvolutionExpressions.h:211
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::initializePPCLoopInstrFormPrepPass
void initializePPCLoopInstrFormPrepPass(PassRegistry &)
DepthFirstIterator.h
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Instruction.h
PPCSubtarget.h
CommandLine.h
llvm::Instruction::insertAfter
void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
Definition: Instruction.cpp:89
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:157
EnableUpdateFormForNonConstInc
static cl::opt< bool > EnableUpdateFormForNonConstInc("ppc-formprep-update-nonconst-inc", cl::init(false), cl::Hidden, cl::desc("prepare update form when the load/store increment is a loop " "invariant non-const value."))
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::PPCSubtarget
Definition: PPCSubtarget.h:71
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
MaxVarsDSForm
static cl::opt< unsigned > MaxVarsDSForm("ppc-dsprep-max-vars", cl::Hidden, cl::init(3), cl::desc("Potential PHI threshold per loop for PPC loop prep of DS form"))
llvm::User
Definition: User.h:44
name
static const char * name
Definition: PPCLoopInstrFormPrep.cpp:341
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
llvm::BasicBlock::getFirstInsertionPt
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:246
false
Definition: StackSlotColoring.cpp:141
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::PHINode::getIncomingValueForBlock
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Definition: Instructions.h:2849
llvm::Instruction
Definition: Instruction.h:42
MaxVarsUpdateForm
static cl::opt< unsigned > MaxVarsUpdateForm("ppc-preinc-prep-max-vars", cl::Hidden, cl::init(3), cl::desc("Potential PHI threshold per loop for PPC loop prep of update " "form"))
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:302
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::predecessors
auto predecessors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:30
PPC.h
llvm::codeview::EncodedFramePtrReg::BasePtr
@ BasePtr
LoopUtils.h
llvm::ScalarEvolutionWrapperPass
Definition: ScalarEvolution.h:2145
SmallPtrSet.h
llvm::SCEVNAryExpr
This node is a base class providing common functionality for n'ary operators.
Definition: ScalarEvolutionExpressions.h:184
llvm::BasicBlock::getModule
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:147
llvm::BasicBlock::getFirstNonPHI
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:209
Utils.h
llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition: Instructions.h:2756
ChainCommonPrepMinThreshold
static cl::opt< unsigned > ChainCommonPrepMinThreshold("ppc-chaincommon-min-threshold", cl::Hidden, cl::init(4), cl::desc("Minimal common base load/store instructions triggering chain " "commoning preparation. Must be not smaller than 4"))
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Type.h
llvm::APInt::srem
APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition: APInt.cpp:1752
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
CFG.h
LoopInfo.h
llvm::ARM_PROC::IE
@ IE
Definition: ARMBaseInfo.h:27
llvm::cl::ZeroOrMore
@ ZeroOrMore
Definition: CommandLine.h:116
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:191
llvm::InsertPreheaderForLoop
BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...
Definition: LoopSimplify.cpp:118
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1392
DispFormPrepMinThreshold
static cl::opt< unsigned > DispFormPrepMinThreshold("ppc-dispprep-min-threshold", cl::Hidden, cl::init(2), cl::desc("Minimal common base load/store instructions triggering DS/DQ form " "preparation"))
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:75
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:239
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:305
inc
Current eax eax eax ret Ideal eax eax ret Re implement atomic builtins x86 does not have to use add to implement these it can use inc
Definition: README.txt:1367
llvm::AMDGPU::Hwreg::Offset
Offset
Definition: SIDefines.h:405
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2814
llvm::DenseMap
Definition: DenseMap.h:716
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:929
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
llvm::SCEVConstant
This class represents a constant integer value.
Definition: ScalarEvolutionExpressions.h:60
GEPNodeOffNameSuffix
static constexpr StringRef GEPNodeOffNameSuffix
Definition: PPCLoopInstrFormPrep.cpp:350
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:215
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:152
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:82
llvm::Type::isVoidTy
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
llvm::SCEVExpander::clear
void clear()
Erase the contents of the InsertedExpressions map so that users trying to expand the same expression ...
Definition: ScalarEvolutionExpander.h:199
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
llvm::APInt::urem
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1682
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
getInstrName
static std::string getInstrName(const Value *I, StringRef Suffix)
Definition: PPCLoopInstrFormPrep.cpp:366
prefetch
loop data prefetch
Definition: LoopDataPrefetch.cpp:149
llvm::GetElementPtrInst::Create
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:955
llvm::LoopInfo
Definition: LoopInfo.h:1086
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1612
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::pred_size
unsigned pred_size(const BasicBlock *BB)
Get the number of predecessors of BB.
Definition: CFG.h:123
DEBUG_TYPE
#define DEBUG_TYPE
Definition: PPCLoopInstrFormPrep.cpp:114
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:529
getParent
static const Function * getParent(const Value *V)
Definition: BasicAliasAnalysis.cpp:868
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:991
PreferUpdateForm
static cl::opt< bool > PreferUpdateForm("ppc-formprep-prefer-update", cl::init(true), cl::Hidden, cl::desc("prefer update form when ds form is also a update form"))
llvm::SmallSet::insert
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:182
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:176
llvm::ScalarEvolution::isLoopInvariant
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Definition: ScalarEvolution.cpp:13204
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:35
llvm::depth_first
iterator_range< df_iterator< T > > depth_first(const T &G)
Definition: DepthFirstIterator.h:230
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
j
return j(j<< 16)
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:165
llvm::LCSSAID
char & LCSSAID
Definition: LCSSA.cpp:486
llvm::SCEVAddRecExpr::getLoop
const Loop * getLoop() const
Definition: ScalarEvolutionExpressions.h:354
llvm::ScalarEvolution::getMinusSCEV
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
Definition: ScalarEvolution.cpp:4515
llvm::Type::isFloatTy
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:148
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:84
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:337
llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition: Instructions.h:2706
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:342
Casting.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::ScalarEvolution::isSCEVable
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Definition: ScalarEvolution.cpp:4284
llvm::Type::isDoubleTy
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:151
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:185
llvm::PPCTargetMachine
Common code between 32-bit and 64-bit PowerPC targets.
Definition: PPCTargetMachine.h:25
llvm::DeleteDeadPHIs
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
Definition: BasicBlockUtils.cpp:162
llvm::SCEVConstant::getAPInt
const APInt & getAPInt() const
Definition: ScalarEvolutionExpressions.h:70
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:46
ScalarEvolutionExpressions.h
IsPtrInBounds
static bool IsPtrInBounds(Value *BasePtr)
Definition: PPCLoopInstrFormPrep.cpp:356
Instructions.h
llvm::numbers::phi
constexpr double phi
Definition: MathExtras.h:71
llvm::SCEVAddExpr
This node represents an addition of some number of SCEVs.
Definition: ScalarEvolutionExpressions.h:257
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition: User.h:191
INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:51
SmallVector.h
MaxVarsPrep
static cl::opt< unsigned > MaxVarsPrep("ppc-formprep-max-vars", cl::Hidden, cl::init(24), cl::ZeroOrMore, cl::desc("Potential common base number threshold per function " "for PPC loop prep"))
Dominators.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::PHINode::getIncomingBlock
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Definition: Instructions.h:2780
llvm::iterator_range
A range adaptor for a pair of iterators.
Definition: iterator_range.h:30
llvm::PHINode
Definition: Instructions.h:2664
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:119
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
llvm::SCEV::getType
Type * getType() const
Return the LLVM type of this SCEV expression.
Definition: ScalarEvolution.cpp:393
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::ScalarEvolution::getAddExpr
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:2446
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:172
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:405
BasicBlockUtils.h
Value.h
InitializePasses.h
llvm::ScalarEvolution::getSCEVAtScope
const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
Definition: ScalarEvolution.cpp:9211
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
PPCTargetMachine.h
llvm::SCEVAddRecExpr::getStepRecurrence
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Definition: ScalarEvolutionExpressions.h:360
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
SmallSet.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37