LLVM  10.0.0svn
LoopIdiomRecognize.cpp
Go to the documentation of this file.
1 //===- LoopIdiomRecognize.cpp - Loop idiom recognition --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass implements an idiom recognizer that transforms simple loops into a
10 // non-loop form. In cases that this kicks in, it can be a significant
11 // performance win.
12 //
13 // If compiling for code size we avoid idiom recognition if the resulting
14 // code could be larger than the code for the original loop. One way this could
15 // happen is if the loop is not removable after idiom recognition due to the
16 // presence of non-idiom instructions. The initial implementation of the
17 // heuristics applies to idioms in multi-block loops.
18 //
19 //===----------------------------------------------------------------------===//
20 //
21 // TODO List:
22 //
23 // Future loop memory idioms to recognize:
24 // memcmp, memmove, strlen, etc.
25 // Future floating point idioms to recognize in -ffast-math mode:
26 // fpowi
27 // Future integer operation idioms to recognize:
28 // ctpop
29 //
30 // Beware that isel's default lowering for ctpop is highly inefficient for
31 // i64 and larger types when i64 is legal and the value has few bits set. It
32 // would be good to enhance isel to emit a loop for ctpop in this case.
33 //
34 // This could recognize common matrix multiplies and dot product idioms and
35 // replace them with calls to BLAS (if linked in??).
36 //
37 //===----------------------------------------------------------------------===//
38 
40 #include "llvm/ADT/APInt.h"
41 #include "llvm/ADT/ArrayRef.h"
42 #include "llvm/ADT/DenseMap.h"
43 #include "llvm/ADT/MapVector.h"
44 #include "llvm/ADT/SetVector.h"
45 #include "llvm/ADT/SmallPtrSet.h"
46 #include "llvm/ADT/SmallVector.h"
47 #include "llvm/ADT/Statistic.h"
48 #include "llvm/ADT/StringRef.h"
51 #include "llvm/Analysis/LoopInfo.h"
52 #include "llvm/Analysis/LoopPass.h"
61 #include "llvm/IR/Attributes.h"
62 #include "llvm/IR/BasicBlock.h"
63 #include "llvm/IR/Constant.h"
64 #include "llvm/IR/Constants.h"
65 #include "llvm/IR/DataLayout.h"
66 #include "llvm/IR/DebugLoc.h"
67 #include "llvm/IR/DerivedTypes.h"
68 #include "llvm/IR/Dominators.h"
69 #include "llvm/IR/GlobalValue.h"
70 #include "llvm/IR/GlobalVariable.h"
71 #include "llvm/IR/IRBuilder.h"
72 #include "llvm/IR/InstrTypes.h"
73 #include "llvm/IR/Instruction.h"
74 #include "llvm/IR/Instructions.h"
75 #include "llvm/IR/IntrinsicInst.h"
76 #include "llvm/IR/Intrinsics.h"
77 #include "llvm/IR/LLVMContext.h"
78 #include "llvm/IR/Module.h"
79 #include "llvm/IR/PassManager.h"
80 #include "llvm/IR/Type.h"
81 #include "llvm/IR/User.h"
82 #include "llvm/IR/Value.h"
83 #include "llvm/IR/ValueHandle.h"
84 #include "llvm/Pass.h"
85 #include "llvm/Support/Casting.h"
87 #include "llvm/Support/Debug.h"
89 #include "llvm/Transforms/Scalar.h"
93 #include <algorithm>
94 #include <cassert>
95 #include <cstdint>
96 #include <utility>
97 #include <vector>
98 
99 using namespace llvm;
100 
101 #define DEBUG_TYPE "loop-idiom"
102 
103 STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
104 STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
105 
107  "use-lir-code-size-heurs",
108  cl::desc("Use loop idiom recognition code size heuristics when compiling"
109  "with -Os/-Oz"),
110  cl::init(true), cl::Hidden);
111 
112 namespace {
113 
114 class LoopIdiomRecognize {
115  Loop *CurLoop = nullptr;
116  AliasAnalysis *AA;
117  DominatorTree *DT;
118  LoopInfo *LI;
119  ScalarEvolution *SE;
120  TargetLibraryInfo *TLI;
121  const TargetTransformInfo *TTI;
122  const DataLayout *DL;
124  bool ApplyCodeSizeHeuristics;
125 
126 public:
127  explicit LoopIdiomRecognize(AliasAnalysis *AA, DominatorTree *DT,
128  LoopInfo *LI, ScalarEvolution *SE,
129  TargetLibraryInfo *TLI,
130  const TargetTransformInfo *TTI,
131  const DataLayout *DL,
133  : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE) {}
134 
135  bool runOnLoop(Loop *L);
136 
137 private:
138  using StoreList = SmallVector<StoreInst *, 8>;
139  using StoreListMap = MapVector<Value *, StoreList>;
140 
141  StoreListMap StoreRefsForMemset;
142  StoreListMap StoreRefsForMemsetPattern;
143  StoreList StoreRefsForMemcpy;
144  bool HasMemset;
145  bool HasMemsetPattern;
146  bool HasMemcpy;
147 
148  /// Return code for isLegalStore()
149  enum LegalStoreKind {
150  None = 0,
151  Memset,
152  MemsetPattern,
153  Memcpy,
154  UnorderedAtomicMemcpy,
155  DontUse // Dummy retval never to be used. Allows catching errors in retval
156  // handling.
157  };
158 
159  /// \name Countable Loop Idiom Handling
160  /// @{
161 
162  bool runOnCountableLoop();
163  bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
164  SmallVectorImpl<BasicBlock *> &ExitBlocks);
165 
166  void collectStores(BasicBlock *BB);
167  LegalStoreKind isLegalStore(StoreInst *SI);
168  enum class ForMemset { No, Yes };
169  bool processLoopStores(SmallVectorImpl<StoreInst *> &SL, const SCEV *BECount,
170  ForMemset For);
171  bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
172 
173  bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
174  unsigned StoreAlignment, Value *StoredVal,
175  Instruction *TheStore,
177  const SCEVAddRecExpr *Ev, const SCEV *BECount,
178  bool NegStride, bool IsLoopMemset = false);
179  bool processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount);
180  bool avoidLIRForMultiBlockLoop(bool IsMemset = false,
181  bool IsLoopMemset = false);
182 
183  /// @}
184  /// \name Noncountable Loop Idiom Handling
185  /// @{
186 
187  bool runOnNoncountableLoop();
188 
189  bool recognizePopcount();
190  void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst,
191  PHINode *CntPhi, Value *Var);
192  bool recognizeAndInsertFFS(); /// Find First Set: ctlz or cttz
193  void transformLoopToCountable(Intrinsic::ID IntrinID, BasicBlock *PreCondBB,
194  Instruction *CntInst, PHINode *CntPhi,
195  Value *Var, Instruction *DefX,
196  const DebugLoc &DL, bool ZeroCheck,
197  bool IsCntPhiUsedOutsideLoop);
198 
199  /// @}
200 };
201 
202 class LoopIdiomRecognizeLegacyPass : public LoopPass {
203 public:
204  static char ID;
205 
206  explicit LoopIdiomRecognizeLegacyPass() : LoopPass(ID) {
209  }
210 
211  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
212  if (skipLoop(L))
213  return false;
214 
215  AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
216  DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
217  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
218  ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
219  TargetLibraryInfo *TLI =
220  &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
221  const TargetTransformInfo *TTI =
222  &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
223  *L->getHeader()->getParent());
224  const DataLayout *DL = &L->getHeader()->getModule()->getDataLayout();
225 
226  // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
227  // pass. Function analyses need to be preserved across loop transformations
228  // but ORE cannot be preserved (see comment before the pass definition).
230 
231  LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL, ORE);
232  return LIR.runOnLoop(L);
233  }
234 
235  /// This transformation requires natural loop information & requires that
236  /// loop preheaders be inserted into the CFG.
237  void getAnalysisUsage(AnalysisUsage &AU) const override {
241  }
242 };
243 
244 } // end anonymous namespace
245 
247 
250  LPMUpdater &) {
251  const auto *DL = &L.getHeader()->getModule()->getDataLayout();
252 
253  const auto &FAM =
254  AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
255  Function *F = L.getHeader()->getParent();
256 
257  auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
258  // FIXME: This should probably be optional rather than required.
259  if (!ORE)
261  "LoopIdiomRecognizePass: OptimizationRemarkEmitterAnalysis not cached "
262  "at a higher level");
263 
264  LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI, DL,
265  *ORE);
266  if (!LIR.runOnLoop(&L))
267  return PreservedAnalyses::all();
268 
270 }
271 
272 INITIALIZE_PASS_BEGIN(LoopIdiomRecognizeLegacyPass, "loop-idiom",
273  "Recognize loop idioms", false, false)
277 INITIALIZE_PASS_END(LoopIdiomRecognizeLegacyPass, "loop-idiom",
278  "Recognize loop idioms", false, false)
279 
280 Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognizeLegacyPass(); }
281 
284  I->eraseFromParent();
285 }
286 
287 //===----------------------------------------------------------------------===//
288 //
289 // Implementation of LoopIdiomRecognize
290 //
291 //===----------------------------------------------------------------------===//
292 
293 bool LoopIdiomRecognize::runOnLoop(Loop *L) {
294  CurLoop = L;
295  // If the loop could not be converted to canonical form, it must have an
296  // indirectbr in it, just give up.
297  if (!L->getLoopPreheader())
298  return false;
299 
300  // Disable loop idiom recognition if the function's name is a common idiom.
302  if (Name == "memset" || Name == "memcpy")
303  return false;
304 
305  // Determine if code size heuristics need to be applied.
306  ApplyCodeSizeHeuristics =
308 
309  HasMemset = TLI->has(LibFunc_memset);
310  HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);
311  HasMemcpy = TLI->has(LibFunc_memcpy);
312 
313  if (HasMemset || HasMemsetPattern || HasMemcpy)
315  return runOnCountableLoop();
316 
317  return runOnNoncountableLoop();
318 }
319 
320 bool LoopIdiomRecognize::runOnCountableLoop() {
321  const SCEV *BECount = SE->getBackedgeTakenCount(CurLoop);
322  assert(!isa<SCEVCouldNotCompute>(BECount) &&
323  "runOnCountableLoop() called on a loop without a predictable"
324  "backedge-taken count");
325 
326  // If this loop executes exactly one time, then it should be peeled, not
327  // optimized by this pass.
328  if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
329  if (BECst->getAPInt() == 0)
330  return false;
331 
332  SmallVector<BasicBlock *, 8> ExitBlocks;
333  CurLoop->getUniqueExitBlocks(ExitBlocks);
334 
335  LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F["
336  << CurLoop->getHeader()->getParent()->getName()
337  << "] Countable Loop %" << CurLoop->getHeader()->getName()
338  << "\n");
339 
340  bool MadeChange = false;
341 
342  // The following transforms hoist stores/memsets into the loop pre-header.
343  // Give up if the loop has instructions may throw.
344  SimpleLoopSafetyInfo SafetyInfo;
345  SafetyInfo.computeLoopSafetyInfo(CurLoop);
346  if (SafetyInfo.anyBlockMayThrow())
347  return MadeChange;
348 
349  // Scan all the blocks in the loop that are not in subloops.
350  for (auto *BB : CurLoop->getBlocks()) {
351  // Ignore blocks in subloops.
352  if (LI->getLoopFor(BB) != CurLoop)
353  continue;
354 
355  MadeChange |= runOnLoopBlock(BB, BECount, ExitBlocks);
356  }
357  return MadeChange;
358 }
359 
360 static APInt getStoreStride(const SCEVAddRecExpr *StoreEv) {
361  const SCEVConstant *ConstStride = cast<SCEVConstant>(StoreEv->getOperand(1));
362  return ConstStride->getAPInt();
363 }
364 
365 /// getMemSetPatternValue - If a strided store of the specified value is safe to
366 /// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should
367 /// be passed in. Otherwise, return null.
368 ///
369 /// Note that we don't ever attempt to use memset_pattern8 or 4, because these
370 /// just replicate their input array and then pass on to memset_pattern16.
372  // FIXME: This could check for UndefValue because it can be merged into any
373  // other valid pattern.
374 
375  // If the value isn't a constant, we can't promote it to being in a constant
376  // array. We could theoretically do a store to an alloca or something, but
377  // that doesn't seem worthwhile.
378  Constant *C = dyn_cast<Constant>(V);
379  if (!C)
380  return nullptr;
381 
382  // Only handle simple values that are a power of two bytes in size.
383  uint64_t Size = DL->getTypeSizeInBits(V->getType());
384  if (Size == 0 || (Size & 7) || (Size & (Size - 1)))
385  return nullptr;
386 
387  // Don't care enough about darwin/ppc to implement this.
388  if (DL->isBigEndian())
389  return nullptr;
390 
391  // Convert to size in bytes.
392  Size /= 8;
393 
394  // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
395  // if the top and bottom are the same (e.g. for vectors and large integers).
396  if (Size > 16)
397  return nullptr;
398 
399  // If the constant is exactly 16 bytes, just use it.
400  if (Size == 16)
401  return C;
402 
403  // Otherwise, we'll use an array of the constants.
404  unsigned ArraySize = 16 / Size;
405  ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
406  return ConstantArray::get(AT, std::vector<Constant *>(ArraySize, C));
407 }
408 
409 LoopIdiomRecognize::LegalStoreKind
410 LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
411  // Don't touch volatile stores.
412  if (SI->isVolatile())
413  return LegalStoreKind::None;
414  // We only want simple or unordered-atomic stores.
415  if (!SI->isUnordered())
416  return LegalStoreKind::None;
417 
418  // Don't convert stores of non-integral pointer types to memsets (which stores
419  // integers).
420  if (DL->isNonIntegralPointerType(SI->getValueOperand()->getType()))
421  return LegalStoreKind::None;
422 
423  // Avoid merging nontemporal stores.
425  return LegalStoreKind::None;
426 
427  Value *StoredVal = SI->getValueOperand();
428  Value *StorePtr = SI->getPointerOperand();
429 
430  // Reject stores that are so large that they overflow an unsigned.
431  uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
432  if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
433  return LegalStoreKind::None;
434 
435  // See if the pointer expression is an AddRec like {base,+,1} on the current
436  // loop, which indicates a strided store. If we have something else, it's a
437  // random store we can't handle.
438  const SCEVAddRecExpr *StoreEv =
439  dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
440  if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
441  return LegalStoreKind::None;
442 
443  // Check to see if we have a constant stride.
444  if (!isa<SCEVConstant>(StoreEv->getOperand(1)))
445  return LegalStoreKind::None;
446 
447  // See if the store can be turned into a memset.
448 
449  // If the stored value is a byte-wise value (like i32 -1), then it may be
450  // turned into a memset of i8 -1, assuming that all the consecutive bytes
451  // are stored. A store of i32 0x01020304 can never be turned into a memset,
452  // but it can be turned into memset_pattern if the target supports it.
453  Value *SplatValue = isBytewiseValue(StoredVal, *DL);
454  Constant *PatternValue = nullptr;
455 
456  // Note: memset and memset_pattern on unordered-atomic is yet not supported
457  bool UnorderedAtomic = SI->isUnordered() && !SI->isSimple();
458 
459  // If we're allowed to form a memset, and the stored value would be
460  // acceptable for memset, use it.
461  if (!UnorderedAtomic && HasMemset && SplatValue &&
462  // Verify that the stored value is loop invariant. If not, we can't
463  // promote the memset.
464  CurLoop->isLoopInvariant(SplatValue)) {
465  // It looks like we can use SplatValue.
466  return LegalStoreKind::Memset;
467  } else if (!UnorderedAtomic && HasMemsetPattern &&
468  // Don't create memset_pattern16s with address spaces.
469  StorePtr->getType()->getPointerAddressSpace() == 0 &&
470  (PatternValue = getMemSetPatternValue(StoredVal, DL))) {
471  // It looks like we can use PatternValue!
472  return LegalStoreKind::MemsetPattern;
473  }
474 
475  // Otherwise, see if the store can be turned into a memcpy.
476  if (HasMemcpy) {
477  // Check to see if the stride matches the size of the store. If so, then we
478  // know that every byte is touched in the loop.
479  APInt Stride = getStoreStride(StoreEv);
480  unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
481  if (StoreSize != Stride && StoreSize != -Stride)
482  return LegalStoreKind::None;
483 
484  // The store must be feeding a non-volatile load.
486 
487  // Only allow non-volatile loads
488  if (!LI || LI->isVolatile())
489  return LegalStoreKind::None;
490  // Only allow simple or unordered-atomic loads
491  if (!LI->isUnordered())
492  return LegalStoreKind::None;
493 
494  // See if the pointer expression is an AddRec like {base,+,1} on the current
495  // loop, which indicates a strided load. If we have something else, it's a
496  // random load we can't handle.
497  const SCEVAddRecExpr *LoadEv =
499  if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
500  return LegalStoreKind::None;
501 
502  // The store and load must share the same stride.
503  if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
504  return LegalStoreKind::None;
505 
506  // Success. This store can be converted into a memcpy.
507  UnorderedAtomic = UnorderedAtomic || LI->isAtomic();
508  return UnorderedAtomic ? LegalStoreKind::UnorderedAtomicMemcpy
509  : LegalStoreKind::Memcpy;
510  }
511  // This store can't be transformed into a memset/memcpy.
512  return LegalStoreKind::None;
513 }
514 
515 void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
516  StoreRefsForMemset.clear();
517  StoreRefsForMemsetPattern.clear();
518  StoreRefsForMemcpy.clear();
519  for (Instruction &I : *BB) {
520  StoreInst *SI = dyn_cast<StoreInst>(&I);
521  if (!SI)
522  continue;
523 
524  // Make sure this is a strided store with a constant stride.
525  switch (isLegalStore(SI)) {
527  // Nothing to do
528  break;
529  case LegalStoreKind::Memset: {
530  // Find the base pointer.
531  Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL);
532  StoreRefsForMemset[Ptr].push_back(SI);
533  } break;
534  case LegalStoreKind::MemsetPattern: {
535  // Find the base pointer.
536  Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL);
537  StoreRefsForMemsetPattern[Ptr].push_back(SI);
538  } break;
539  case LegalStoreKind::Memcpy:
540  case LegalStoreKind::UnorderedAtomicMemcpy:
541  StoreRefsForMemcpy.push_back(SI);
542  break;
543  default:
544  assert(false && "unhandled return value");
545  break;
546  }
547  }
548 }
549 
550 /// runOnLoopBlock - Process the specified block, which lives in a counted loop
551 /// with the specified backedge count. This block is known to be in the current
552 /// loop and not in any subloops.
553 bool LoopIdiomRecognize::runOnLoopBlock(
554  BasicBlock *BB, const SCEV *BECount,
555  SmallVectorImpl<BasicBlock *> &ExitBlocks) {
556  // We can only promote stores in this block if they are unconditionally
557  // executed in the loop. For a block to be unconditionally executed, it has
558  // to dominate all the exit blocks of the loop. Verify this now.
559  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
560  if (!DT->dominates(BB, ExitBlocks[i]))
561  return false;
562 
563  bool MadeChange = false;
564  // Look for store instructions, which may be optimized to memset/memcpy.
565  collectStores(BB);
566 
567  // Look for a single store or sets of stores with a common base, which can be
568  // optimized into a memset (memset_pattern). The latter most commonly happens
569  // with structs and handunrolled loops.
570  for (auto &SL : StoreRefsForMemset)
571  MadeChange |= processLoopStores(SL.second, BECount, ForMemset::Yes);
572 
573  for (auto &SL : StoreRefsForMemsetPattern)
574  MadeChange |= processLoopStores(SL.second, BECount, ForMemset::No);
575 
576  // Optimize the store into a memcpy, if it feeds an similarly strided load.
577  for (auto &SI : StoreRefsForMemcpy)
578  MadeChange |= processLoopStoreOfLoopLoad(SI, BECount);
579 
580  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
581  Instruction *Inst = &*I++;
582  // Look for memset instructions, which may be optimized to a larger memset.
583  if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) {
584  WeakTrackingVH InstPtr(&*I);
585  if (!processLoopMemSet(MSI, BECount))
586  continue;
587  MadeChange = true;
588 
589  // If processing the memset invalidated our iterator, start over from the
590  // top of the block.
591  if (!InstPtr)
592  I = BB->begin();
593  continue;
594  }
595  }
596 
597  return MadeChange;
598 }
599 
600 /// See if this store(s) can be promoted to a memset.
601 bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
602  const SCEV *BECount, ForMemset For) {
603  // Try to find consecutive stores that can be transformed into memsets.
604  SetVector<StoreInst *> Heads, Tails;
606 
607  // Do a quadratic search on all of the given stores and find
608  // all of the pairs of stores that follow each other.
609  SmallVector<unsigned, 16> IndexQueue;
610  for (unsigned i = 0, e = SL.size(); i < e; ++i) {
611  assert(SL[i]->isSimple() && "Expected only non-volatile stores.");
612 
613  Value *FirstStoredVal = SL[i]->getValueOperand();
614  Value *FirstStorePtr = SL[i]->getPointerOperand();
615  const SCEVAddRecExpr *FirstStoreEv =
616  cast<SCEVAddRecExpr>(SE->getSCEV(FirstStorePtr));
617  APInt FirstStride = getStoreStride(FirstStoreEv);
618  unsigned FirstStoreSize = DL->getTypeStoreSize(SL[i]->getValueOperand()->getType());
619 
620  // See if we can optimize just this store in isolation.
621  if (FirstStride == FirstStoreSize || -FirstStride == FirstStoreSize) {
622  Heads.insert(SL[i]);
623  continue;
624  }
625 
626  Value *FirstSplatValue = nullptr;
627  Constant *FirstPatternValue = nullptr;
628 
629  if (For == ForMemset::Yes)
630  FirstSplatValue = isBytewiseValue(FirstStoredVal, *DL);
631  else
632  FirstPatternValue = getMemSetPatternValue(FirstStoredVal, DL);
633 
634  assert((FirstSplatValue || FirstPatternValue) &&
635  "Expected either splat value or pattern value.");
636 
637  IndexQueue.clear();
638  // If a store has multiple consecutive store candidates, search Stores
639  // array according to the sequence: from i+1 to e, then from i-1 to 0.
640  // This is because usually pairing with immediate succeeding or preceding
641  // candidate create the best chance to find memset opportunity.
642  unsigned j = 0;
643  for (j = i + 1; j < e; ++j)
644  IndexQueue.push_back(j);
645  for (j = i; j > 0; --j)
646  IndexQueue.push_back(j - 1);
647 
648  for (auto &k : IndexQueue) {
649  assert(SL[k]->isSimple() && "Expected only non-volatile stores.");
650  Value *SecondStorePtr = SL[k]->getPointerOperand();
651  const SCEVAddRecExpr *SecondStoreEv =
652  cast<SCEVAddRecExpr>(SE->getSCEV(SecondStorePtr));
653  APInt SecondStride = getStoreStride(SecondStoreEv);
654 
655  if (FirstStride != SecondStride)
656  continue;
657 
658  Value *SecondStoredVal = SL[k]->getValueOperand();
659  Value *SecondSplatValue = nullptr;
660  Constant *SecondPatternValue = nullptr;
661 
662  if (For == ForMemset::Yes)
663  SecondSplatValue = isBytewiseValue(SecondStoredVal, *DL);
664  else
665  SecondPatternValue = getMemSetPatternValue(SecondStoredVal, DL);
666 
667  assert((SecondSplatValue || SecondPatternValue) &&
668  "Expected either splat value or pattern value.");
669 
670  if (isConsecutiveAccess(SL[i], SL[k], *DL, *SE, false)) {
671  if (For == ForMemset::Yes) {
672  if (isa<UndefValue>(FirstSplatValue))
673  FirstSplatValue = SecondSplatValue;
674  if (FirstSplatValue != SecondSplatValue)
675  continue;
676  } else {
677  if (isa<UndefValue>(FirstPatternValue))
678  FirstPatternValue = SecondPatternValue;
679  if (FirstPatternValue != SecondPatternValue)
680  continue;
681  }
682  Tails.insert(SL[k]);
683  Heads.insert(SL[i]);
684  ConsecutiveChain[SL[i]] = SL[k];
685  break;
686  }
687  }
688  }
689 
690  // We may run into multiple chains that merge into a single chain. We mark the
691  // stores that we transformed so that we don't visit the same store twice.
692  SmallPtrSet<Value *, 16> TransformedStores;
693  bool Changed = false;
694 
695  // For stores that start but don't end a link in the chain:
696  for (SetVector<StoreInst *>::iterator it = Heads.begin(), e = Heads.end();
697  it != e; ++it) {
698  if (Tails.count(*it))
699  continue;
700 
701  // We found a store instr that starts a chain. Now follow the chain and try
702  // to transform it.
703  SmallPtrSet<Instruction *, 8> AdjacentStores;
704  StoreInst *I = *it;
705 
706  StoreInst *HeadStore = I;
707  unsigned StoreSize = 0;
708 
709  // Collect the chain into a list.
710  while (Tails.count(I) || Heads.count(I)) {
711  if (TransformedStores.count(I))
712  break;
713  AdjacentStores.insert(I);
714 
715  StoreSize += DL->getTypeStoreSize(I->getValueOperand()->getType());
716  // Move to the next value in the chain.
717  I = ConsecutiveChain[I];
718  }
719 
720  Value *StoredVal = HeadStore->getValueOperand();
721  Value *StorePtr = HeadStore->getPointerOperand();
722  const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
723  APInt Stride = getStoreStride(StoreEv);
724 
725  // Check to see if the stride matches the size of the stores. If so, then
726  // we know that every byte is touched in the loop.
727  if (StoreSize != Stride && StoreSize != -Stride)
728  continue;
729 
730  bool NegStride = StoreSize == -Stride;
731 
732  if (processLoopStridedStore(StorePtr, StoreSize, HeadStore->getAlignment(),
733  StoredVal, HeadStore, AdjacentStores, StoreEv,
734  BECount, NegStride)) {
735  TransformedStores.insert(AdjacentStores.begin(), AdjacentStores.end());
736  Changed = true;
737  }
738  }
739 
740  return Changed;
741 }
742 
743 /// processLoopMemSet - See if this memset can be promoted to a large memset.
744 bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
745  const SCEV *BECount) {
746  // We can only handle non-volatile memsets with a constant size.
747  if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength()))
748  return false;
749 
750  // If we're not allowed to hack on memset, we fail.
751  if (!HasMemset)
752  return false;
753 
754  Value *Pointer = MSI->getDest();
755 
756  // See if the pointer expression is an AddRec like {base,+,1} on the current
757  // loop, which indicates a strided store. If we have something else, it's a
758  // random store we can't handle.
759  const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer));
760  if (!Ev || Ev->getLoop() != CurLoop || !Ev->isAffine())
761  return false;
762 
763  // Reject memsets that are so large that they overflow an unsigned.
764  uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
765  if ((SizeInBytes >> 32) != 0)
766  return false;
767 
768  // Check to see if the stride matches the size of the memset. If so, then we
769  // know that every byte is touched in the loop.
770  const SCEVConstant *ConstStride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
771  if (!ConstStride)
772  return false;
773 
774  APInt Stride = ConstStride->getAPInt();
775  if (SizeInBytes != Stride && SizeInBytes != -Stride)
776  return false;
777 
778  // Verify that the memset value is loop invariant. If not, we can't promote
779  // the memset.
780  Value *SplatValue = MSI->getValue();
781  if (!SplatValue || !CurLoop->isLoopInvariant(SplatValue))
782  return false;
783 
785  MSIs.insert(MSI);
786  bool NegStride = SizeInBytes == -Stride;
787  return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
788  MSI->getDestAlignment(), SplatValue, MSI, MSIs,
789  Ev, BECount, NegStride, /*IsLoopMemset=*/true);
790 }
791 
792 /// mayLoopAccessLocation - Return true if the specified loop might access the
793 /// specified pointer location, which is a loop-strided access. The 'Access'
794 /// argument specifies what the verboten forms of access are (read or write).
795 static bool
797  const SCEV *BECount, unsigned StoreSize,
798  AliasAnalysis &AA,
799  SmallPtrSetImpl<Instruction *> &IgnoredStores) {
800  // Get the location that may be stored across the loop. Since the access is
801  // strided positively through memory, we say that the modified location starts
802  // at the pointer and has infinite size.
803  LocationSize AccessSize = LocationSize::unknown();
804 
805  // If the loop iterates a fixed number of times, we can refine the access size
806  // to be exactly the size of the memset, which is (BECount+1)*StoreSize
807  if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
808  AccessSize = LocationSize::precise((BECst->getValue()->getZExtValue() + 1) *
809  StoreSize);
810 
811  // TODO: For this to be really effective, we have to dive into the pointer
812  // operand in the store. Store to &A[i] of 100 will always return may alias
813  // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
814  // which will then no-alias a store to &A[100].
815  MemoryLocation StoreLoc(Ptr, AccessSize);
816 
817  for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
818  ++BI)
819  for (Instruction &I : **BI)
820  if (IgnoredStores.count(&I) == 0 &&
822  intersectModRef(AA.getModRefInfo(&I, StoreLoc), Access)))
823  return true;
824 
825  return false;
826 }
827 
828 // If we have a negative stride, Start refers to the end of the memory location
829 // we're trying to memset. Therefore, we need to recompute the base pointer,
830 // which is just Start - BECount*Size.
831 static const SCEV *getStartForNegStride(const SCEV *Start, const SCEV *BECount,
832  Type *IntPtr, unsigned StoreSize,
833  ScalarEvolution *SE) {
834  const SCEV *Index = SE->getTruncateOrZeroExtend(BECount, IntPtr);
835  if (StoreSize != 1)
836  Index = SE->getMulExpr(Index, SE->getConstant(IntPtr, StoreSize),
837  SCEV::FlagNUW);
838  return SE->getMinusSCEV(Start, Index);
839 }
840 
841 /// Compute the number of bytes as a SCEV from the backedge taken count.
842 ///
843 /// This also maps the SCEV into the provided type and tries to handle the
844 /// computation in a way that will fold cleanly.
845 static const SCEV *getNumBytes(const SCEV *BECount, Type *IntPtr,
846  unsigned StoreSize, Loop *CurLoop,
847  const DataLayout *DL, ScalarEvolution *SE) {
848  const SCEV *NumBytesS;
849  // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
850  // pointer size if it isn't already.
851  //
852  // If we're going to need to zero extend the BE count, check if we can add
853  // one to it prior to zero extending without overflow. Provided this is safe,
854  // it allows better simplification of the +1.
855  if (DL->getTypeSizeInBits(BECount->getType()) <
856  DL->getTypeSizeInBits(IntPtr) &&
858  CurLoop, ICmpInst::ICMP_NE, BECount,
859  SE->getNegativeSCEV(SE->getOne(BECount->getType())))) {
860  NumBytesS = SE->getZeroExtendExpr(
861  SE->getAddExpr(BECount, SE->getOne(BECount->getType()), SCEV::FlagNUW),
862  IntPtr);
863  } else {
864  NumBytesS = SE->getAddExpr(SE->getTruncateOrZeroExtend(BECount, IntPtr),
865  SE->getOne(IntPtr), SCEV::FlagNUW);
866  }
867 
868  // And scale it based on the store size.
869  if (StoreSize != 1) {
870  NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
871  SCEV::FlagNUW);
872  }
873  return NumBytesS;
874 }
875 
876 /// processLoopStridedStore - We see a strided store of some value. If we can
877 /// transform this into a memset or memset_pattern in the loop preheader, do so.
878 bool LoopIdiomRecognize::processLoopStridedStore(
879  Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment,
880  Value *StoredVal, Instruction *TheStore,
882  const SCEV *BECount, bool NegStride, bool IsLoopMemset) {
883  Value *SplatValue = isBytewiseValue(StoredVal, *DL);
884  Constant *PatternValue = nullptr;
885 
886  if (!SplatValue)
887  PatternValue = getMemSetPatternValue(StoredVal, DL);
888 
889  assert((SplatValue || PatternValue) &&
890  "Expected either splat value or pattern value.");
891 
892  // The trip count of the loop and the base pointer of the addrec SCEV is
893  // guaranteed to be loop invariant, which means that it should dominate the
894  // header. This allows us to insert code for it in the preheader.
895  unsigned DestAS = DestPtr->getType()->getPointerAddressSpace();
896  BasicBlock *Preheader = CurLoop->getLoopPreheader();
897  IRBuilder<> Builder(Preheader->getTerminator());
898  SCEVExpander Expander(*SE, *DL, "loop-idiom");
899 
900  Type *DestInt8PtrTy = Builder.getInt8PtrTy(DestAS);
901  Type *IntPtr = Builder.getIntPtrTy(*DL, DestAS);
902 
903  const SCEV *Start = Ev->getStart();
904  // Handle negative strided loops.
905  if (NegStride)
906  Start = getStartForNegStride(Start, BECount, IntPtr, StoreSize, SE);
907 
908  // TODO: ideally we should still be able to generate memset if SCEV expander
909  // is taught to generate the dependencies at the latest point.
910  if (!isSafeToExpand(Start, *SE))
911  return false;
912 
913  // Okay, we have a strided store "p[i]" of a splattable value. We can turn
914  // this into a memset in the loop preheader now if we want. However, this
915  // would be unsafe to do if there is anything else in the loop that may read
916  // or write to the aliased location. Check for any overlap by generating the
917  // base pointer and checking the region.
918  Value *BasePtr =
919  Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->getTerminator());
920  if (mayLoopAccessLocation(BasePtr, ModRefInfo::ModRef, CurLoop, BECount,
921  StoreSize, *AA, Stores)) {
922  Expander.clear();
923  // If we generated new code for the base pointer, clean up.
925  return false;
926  }
927 
928  if (avoidLIRForMultiBlockLoop(/*IsMemset=*/true, IsLoopMemset))
929  return false;
930 
931  // Okay, everything looks good, insert the memset.
932 
933  const SCEV *NumBytesS =
934  getNumBytes(BECount, IntPtr, StoreSize, CurLoop, DL, SE);
935 
936  // TODO: ideally we should still be able to generate memset if SCEV expander
937  // is taught to generate the dependencies at the latest point.
938  if (!isSafeToExpand(NumBytesS, *SE))
939  return false;
940 
941  Value *NumBytes =
942  Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
943 
944  CallInst *NewCall;
945  if (SplatValue) {
946  NewCall =
947  Builder.CreateMemSet(BasePtr, SplatValue, NumBytes, StoreAlignment);
948  } else {
949  // Everything is emitted in default address space
950  Type *Int8PtrTy = DestInt8PtrTy;
951 
952  Module *M = TheStore->getModule();
953  StringRef FuncName = "memset_pattern16";
954  FunctionCallee MSP = M->getOrInsertFunction(FuncName, Builder.getVoidTy(),
955  Int8PtrTy, Int8PtrTy, IntPtr);
956  inferLibFuncAttributes(M, FuncName, *TLI);
957 
958  // Otherwise we should form a memset_pattern16. PatternValue is known to be
959  // an constant array of 16-bytes. Plop the value into a mergable global.
960  GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true,
962  PatternValue, ".memset_pattern");
963  GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); // Ok to merge these.
964  GV->setAlignment(16);
965  Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy);
966  NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes});
967  }
968 
969  LLVM_DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n"
970  << " from store to: " << *Ev << " at: " << *TheStore
971  << "\n");
972  NewCall->setDebugLoc(TheStore->getDebugLoc());
973 
974  ORE.emit([&]() {
975  return OptimizationRemark(DEBUG_TYPE, "ProcessLoopStridedStore",
976  NewCall->getDebugLoc(), Preheader)
977  << "Transformed loop-strided store into a call to "
978  << ore::NV("NewFunction", NewCall->getCalledFunction())
979  << "() function";
980  });
981 
982  // Okay, the memset has been formed. Zap the original store and anything that
983  // feeds into it.
984  for (auto *I : Stores)
986  ++NumMemSet;
987  return true;
988 }
989 
990 /// If the stored value is a strided load in the same loop with the same stride
991 /// this may be transformable into a memcpy. This kicks in for stuff like
992 /// for (i) A[i] = B[i];
993 bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
994  const SCEV *BECount) {
995  assert(SI->isUnordered() && "Expected only non-volatile non-ordered stores.");
996 
997  Value *StorePtr = SI->getPointerOperand();
998  const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
999  APInt Stride = getStoreStride(StoreEv);
1000  unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
1001  bool NegStride = StoreSize == -Stride;
1002 
1003  // The store must be feeding a non-volatile load.
1004  LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
1005  assert(LI->isUnordered() && "Expected only non-volatile non-ordered loads.");
1006 
1007  // See if the pointer expression is an AddRec like {base,+,1} on the current
1008  // loop, which indicates a strided load. If we have something else, it's a
1009  // random load we can't handle.
1010  const SCEVAddRecExpr *LoadEv =
1011  cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
1012 
1013  // The trip count of the loop and the base pointer of the addrec SCEV is
1014  // guaranteed to be loop invariant, which means that it should dominate the
1015  // header. This allows us to insert code for it in the preheader.
1016  BasicBlock *Preheader = CurLoop->getLoopPreheader();
1017  IRBuilder<> Builder(Preheader->getTerminator());
1018  SCEVExpander Expander(*SE, *DL, "loop-idiom");
1019 
1020  const SCEV *StrStart = StoreEv->getStart();
1021  unsigned StrAS = SI->getPointerAddressSpace();
1022  Type *IntPtrTy = Builder.getIntPtrTy(*DL, StrAS);
1023 
1024  // Handle negative strided loops.
1025  if (NegStride)
1026  StrStart = getStartForNegStride(StrStart, BECount, IntPtrTy, StoreSize, SE);
1027 
1028  // Okay, we have a strided store "p[i]" of a loaded value. We can turn
1029  // this into a memcpy in the loop preheader now if we want. However, this
1030  // would be unsafe to do if there is anything else in the loop that may read
1031  // or write the memory region we're storing to. This includes the load that
1032  // feeds the stores. Check for an alias by generating the base address and
1033  // checking everything.
1034  Value *StoreBasePtr = Expander.expandCodeFor(
1035  StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator());
1036 
1038  Stores.insert(SI);
1039  if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
1040  StoreSize, *AA, Stores)) {
1041  Expander.clear();
1042  // If we generated new code for the base pointer, clean up.
1043  RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
1044  return false;
1045  }
1046 
1047  const SCEV *LdStart = LoadEv->getStart();
1048  unsigned LdAS = LI->getPointerAddressSpace();
1049 
1050  // Handle negative strided loops.
1051  if (NegStride)
1052  LdStart = getStartForNegStride(LdStart, BECount, IntPtrTy, StoreSize, SE);
1053 
1054  // For a memcpy, we have to make sure that the input array is not being
1055  // mutated by the loop.
1056  Value *LoadBasePtr = Expander.expandCodeFor(
1057  LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator());
1058 
1059  if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
1060  StoreSize, *AA, Stores)) {
1061  Expander.clear();
1062  // If we generated new code for the base pointer, clean up.
1064  RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
1065  return false;
1066  }
1067 
1068  if (avoidLIRForMultiBlockLoop())
1069  return false;
1070 
1071  // Okay, everything is safe, we can transform this!
1072 
1073  const SCEV *NumBytesS =
1074  getNumBytes(BECount, IntPtrTy, StoreSize, CurLoop, DL, SE);
1075 
1076  Value *NumBytes =
1077  Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator());
1078 
1079  CallInst *NewCall = nullptr;
1080  // Check whether to generate an unordered atomic memcpy:
1081  // If the load or store are atomic, then they must necessarily be unordered
1082  // by previous checks.
1083  if (!SI->isAtomic() && !LI->isAtomic())
1084  NewCall = Builder.CreateMemCpy(StoreBasePtr, SI->getAlignment(),
1085  LoadBasePtr, LI->getAlignment(), NumBytes);
1086  else {
1087  // We cannot allow unaligned ops for unordered load/store, so reject
1088  // anything where the alignment isn't at least the element size.
1089  unsigned Align = std::min(SI->getAlignment(), LI->getAlignment());
1090  if (Align < StoreSize)
1091  return false;
1092 
1093  // If the element.atomic memcpy is not lowered into explicit
1094  // loads/stores later, then it will be lowered into an element-size
1095  // specific lib call. If the lib call doesn't exist for our store size, then
1096  // we shouldn't generate the memcpy.
1097  if (StoreSize > TTI->getAtomicMemIntrinsicMaxElementSize())
1098  return false;
1099 
1100  // Create the call.
1101  // Note that unordered atomic loads/stores are *required* by the spec to
1102  // have an alignment but non-atomic loads/stores may not.
1103  NewCall = Builder.CreateElementUnorderedAtomicMemCpy(
1104  StoreBasePtr, SI->getAlignment(), LoadBasePtr, LI->getAlignment(),
1105  NumBytes, StoreSize);
1106  }
1107  NewCall->setDebugLoc(SI->getDebugLoc());
1108 
1109  LLVM_DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n"
1110  << " from load ptr=" << *LoadEv << " at: " << *LI << "\n"
1111  << " from store ptr=" << *StoreEv << " at: " << *SI
1112  << "\n");
1113 
1114  ORE.emit([&]() {
1115  return OptimizationRemark(DEBUG_TYPE, "ProcessLoopStoreOfLoopLoad",
1116  NewCall->getDebugLoc(), Preheader)
1117  << "Formed a call to "
1118  << ore::NV("NewFunction", NewCall->getCalledFunction())
1119  << "() function";
1120  });
1121 
1122  // Okay, the memcpy has been formed. Zap the original store and anything that
1123  // feeds into it.
1125  ++NumMemCpy;
1126  return true;
1127 }
1128 
1129 // When compiling for codesize we avoid idiom recognition for a multi-block loop
1130 // unless it is a loop_memset idiom or a memset/memcpy idiom in a nested loop.
1131 //
1132 bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset,
1133  bool IsLoopMemset) {
1134  if (ApplyCodeSizeHeuristics && CurLoop->getNumBlocks() > 1) {
1135  if (!CurLoop->getParentLoop() && (!IsMemset || !IsLoopMemset)) {
1136  LLVM_DEBUG(dbgs() << " " << CurLoop->getHeader()->getParent()->getName()
1137  << " : LIR " << (IsMemset ? "Memset" : "Memcpy")
1138  << " avoided: multi-block top-level loop\n");
1139  return true;
1140  }
1141  }
1142 
1143  return false;
1144 }
1145 
1146 bool LoopIdiomRecognize::runOnNoncountableLoop() {
1147  LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F["
1148  << CurLoop->getHeader()->getParent()->getName()
1149  << "] Noncountable Loop %"
1150  << CurLoop->getHeader()->getName() << "\n");
1151 
1152  return recognizePopcount() || recognizeAndInsertFFS();
1153 }
1154 
1155 /// Check if the given conditional branch is based on the comparison between
1156 /// a variable and zero, and if the variable is non-zero or zero (JmpOnZero is
1157 /// true), the control yields to the loop entry. If the branch matches the
1158 /// behavior, the variable involved in the comparison is returned. This function
1159 /// will be called to see if the precondition and postcondition of the loop are
1160 /// in desirable form.
1161 static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
1162  bool JmpOnZero = false) {
1163  if (!BI || !BI->isConditional())
1164  return nullptr;
1165 
1166  ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
1167  if (!Cond)
1168  return nullptr;
1169 
1170  ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
1171  if (!CmpZero || !CmpZero->isZero())
1172  return nullptr;
1173 
1174  BasicBlock *TrueSucc = BI->getSuccessor(0);
1175  BasicBlock *FalseSucc = BI->getSuccessor(1);
1176  if (JmpOnZero)
1177  std::swap(TrueSucc, FalseSucc);
1178 
1179  ICmpInst::Predicate Pred = Cond->getPredicate();
1180  if ((Pred == ICmpInst::ICMP_NE && TrueSucc == LoopEntry) ||
1181  (Pred == ICmpInst::ICMP_EQ && FalseSucc == LoopEntry))
1182  return Cond->getOperand(0);
1183 
1184  return nullptr;
1185 }
1186 
1187 // Check if the recurrence variable `VarX` is in the right form to create
1188 // the idiom. Returns the value coerced to a PHINode if so.
1190  BasicBlock *LoopEntry) {
1191  auto *PhiX = dyn_cast<PHINode>(VarX);
1192  if (PhiX && PhiX->getParent() == LoopEntry &&
1193  (PhiX->getOperand(0) == DefX || PhiX->getOperand(1) == DefX))
1194  return PhiX;
1195  return nullptr;
1196 }
1197 
1198 /// Return true iff the idiom is detected in the loop.
1199 ///
1200 /// Additionally:
1201 /// 1) \p CntInst is set to the instruction counting the population bit.
1202 /// 2) \p CntPhi is set to the corresponding phi node.
1203 /// 3) \p Var is set to the value whose population bits are being counted.
1204 ///
1205 /// The core idiom we are trying to detect is:
1206 /// \code
1207 /// if (x0 != 0)
1208 /// goto loop-exit // the precondition of the loop
1209 /// cnt0 = init-val;
1210 /// do {
1211 /// x1 = phi (x0, x2);
1212 /// cnt1 = phi(cnt0, cnt2);
1213 ///
1214 /// cnt2 = cnt1 + 1;
1215 /// ...
1216 /// x2 = x1 & (x1 - 1);
1217 /// ...
1218 /// } while(x != 0);
1219 ///
1220 /// loop-exit:
1221 /// \endcode
1222 static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB,
1223  Instruction *&CntInst, PHINode *&CntPhi,
1224  Value *&Var) {
1225  // step 1: Check to see if the look-back branch match this pattern:
1226  // "if (a!=0) goto loop-entry".
1227  BasicBlock *LoopEntry;
1228  Instruction *DefX2, *CountInst;
1229  Value *VarX1, *VarX0;
1230  PHINode *PhiX, *CountPhi;
1231 
1232  DefX2 = CountInst = nullptr;
1233  VarX1 = VarX0 = nullptr;
1234  PhiX = CountPhi = nullptr;
1235  LoopEntry = *(CurLoop->block_begin());
1236 
1237  // step 1: Check if the loop-back branch is in desirable form.
1238  {
1239  if (Value *T = matchCondition(
1240  dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry))
1241  DefX2 = dyn_cast<Instruction>(T);
1242  else
1243  return false;
1244  }
1245 
1246  // step 2: detect instructions corresponding to "x2 = x1 & (x1 - 1)"
1247  {
1248  if (!DefX2 || DefX2->getOpcode() != Instruction::And)
1249  return false;
1250 
1251  BinaryOperator *SubOneOp;
1252 
1253  if ((SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(0))))
1254  VarX1 = DefX2->getOperand(1);
1255  else {
1256  VarX1 = DefX2->getOperand(0);
1257  SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(1));
1258  }
1259  if (!SubOneOp || SubOneOp->getOperand(0) != VarX1)
1260  return false;
1261 
1262  ConstantInt *Dec = dyn_cast<ConstantInt>(SubOneOp->getOperand(1));
1263  if (!Dec ||
1264  !((SubOneOp->getOpcode() == Instruction::Sub && Dec->isOne()) ||
1265  (SubOneOp->getOpcode() == Instruction::Add &&
1266  Dec->isMinusOne()))) {
1267  return false;
1268  }
1269  }
1270 
1271  // step 3: Check the recurrence of variable X
1272  PhiX = getRecurrenceVar(VarX1, DefX2, LoopEntry);
1273  if (!PhiX)
1274  return false;
1275 
1276  // step 4: Find the instruction which count the population: cnt2 = cnt1 + 1
1277  {
1278  CountInst = nullptr;
1279  for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI()->getIterator(),
1280  IterE = LoopEntry->end();
1281  Iter != IterE; Iter++) {
1282  Instruction *Inst = &*Iter;
1283  if (Inst->getOpcode() != Instruction::Add)
1284  continue;
1285 
1286  ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
1287  if (!Inc || !Inc->isOne())
1288  continue;
1289 
1290  PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry);
1291  if (!Phi)
1292  continue;
1293 
1294  // Check if the result of the instruction is live of the loop.
1295  bool LiveOutLoop = false;
1296  for (User *U : Inst->users()) {
1297  if ((cast<Instruction>(U))->getParent() != LoopEntry) {
1298  LiveOutLoop = true;
1299  break;
1300  }
1301  }
1302 
1303  if (LiveOutLoop) {
1304  CountInst = Inst;
1305  CountPhi = Phi;
1306  break;
1307  }
1308  }
1309 
1310  if (!CountInst)
1311  return false;
1312  }
1313 
1314  // step 5: check if the precondition is in this form:
1315  // "if (x != 0) goto loop-head ; else goto somewhere-we-don't-care;"
1316  {
1317  auto *PreCondBr = dyn_cast<BranchInst>(PreCondBB->getTerminator());
1318  Value *T = matchCondition(PreCondBr, CurLoop->getLoopPreheader());
1319  if (T != PhiX->getOperand(0) && T != PhiX->getOperand(1))
1320  return false;
1321 
1322  CntInst = CountInst;
1323  CntPhi = CountPhi;
1324  Var = T;
1325  }
1326 
1327  return true;
1328 }
1329 
1330 /// Return true if the idiom is detected in the loop.
1331 ///
1332 /// Additionally:
1333 /// 1) \p CntInst is set to the instruction Counting Leading Zeros (CTLZ)
1334 /// or nullptr if there is no such.
1335 /// 2) \p CntPhi is set to the corresponding phi node
1336 /// or nullptr if there is no such.
1337 /// 3) \p Var is set to the value whose CTLZ could be used.
1338 /// 4) \p DefX is set to the instruction calculating Loop exit condition.
1339 ///
1340 /// The core idiom we are trying to detect is:
1341 /// \code
1342 /// if (x0 == 0)
1343 /// goto loop-exit // the precondition of the loop
1344 /// cnt0 = init-val;
1345 /// do {
1346 /// x = phi (x0, x.next); //PhiX
1347 /// cnt = phi(cnt0, cnt.next);
1348 ///
1349 /// cnt.next = cnt + 1;
1350 /// ...
1351 /// x.next = x >> 1; // DefX
1352 /// ...
1353 /// } while(x.next != 0);
1354 ///
1355 /// loop-exit:
1356 /// \endcode
1357 static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
1358  Intrinsic::ID &IntrinID, Value *&InitX,
1359  Instruction *&CntInst, PHINode *&CntPhi,
1360  Instruction *&DefX) {
1361  BasicBlock *LoopEntry;
1362  Value *VarX = nullptr;
1363 
1364  DefX = nullptr;
1365  CntInst = nullptr;
1366  CntPhi = nullptr;
1367  LoopEntry = *(CurLoop->block_begin());
1368 
1369  // step 1: Check if the loop-back branch is in desirable form.
1370  if (Value *T = matchCondition(
1371  dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry))
1372  DefX = dyn_cast<Instruction>(T);
1373  else
1374  return false;
1375 
1376  // step 2: detect instructions corresponding to "x.next = x >> 1 or x << 1"
1377  if (!DefX || !DefX->isShift())
1378  return false;
1379  IntrinID = DefX->getOpcode() == Instruction::Shl ? Intrinsic::cttz :
1380  Intrinsic::ctlz;
1381  ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1));
1382  if (!Shft || !Shft->isOne())
1383  return false;
1384  VarX = DefX->getOperand(0);
1385 
1386  // step 3: Check the recurrence of variable X
1387  PHINode *PhiX = getRecurrenceVar(VarX, DefX, LoopEntry);
1388  if (!PhiX)
1389  return false;
1390 
1391  InitX = PhiX->getIncomingValueForBlock(CurLoop->getLoopPreheader());
1392 
1393  // Make sure the initial value can't be negative otherwise the ashr in the
1394  // loop might never reach zero which would make the loop infinite.
1395  if (DefX->getOpcode() == Instruction::AShr && !isKnownNonNegative(InitX, DL))
1396  return false;
1397 
1398  // step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
1399  // TODO: We can skip the step. If loop trip count is known (CTLZ),
1400  // then all uses of "cnt.next" could be optimized to the trip count
1401  // plus "cnt0". Currently it is not optimized.
1402  // This step could be used to detect POPCNT instruction:
1403  // cnt.next = cnt + (x.next & 1)
1404  for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI()->getIterator(),
1405  IterE = LoopEntry->end();
1406  Iter != IterE; Iter++) {
1407  Instruction *Inst = &*Iter;
1408  if (Inst->getOpcode() != Instruction::Add)
1409  continue;
1410 
1411  ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
1412  if (!Inc || !Inc->isOne())
1413  continue;
1414 
1415  PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry);
1416  if (!Phi)
1417  continue;
1418 
1419  CntInst = Inst;
1420  CntPhi = Phi;
1421  break;
1422  }
1423  if (!CntInst)
1424  return false;
1425 
1426  return true;
1427 }
1428 
1429 /// Recognize CTLZ or CTTZ idiom in a non-countable loop and convert the loop
1430 /// to countable (with CTLZ / CTTZ trip count). If CTLZ / CTTZ inserted as a new
1431 /// trip count returns true; otherwise, returns false.
1432 bool LoopIdiomRecognize::recognizeAndInsertFFS() {
1433  // Give up if the loop has multiple blocks or multiple backedges.
1434  if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
1435  return false;
1436 
1437  Intrinsic::ID IntrinID;
1438  Value *InitX;
1439  Instruction *DefX = nullptr;
1440  PHINode *CntPhi = nullptr;
1441  Instruction *CntInst = nullptr;
1442  // Help decide if transformation is profitable. For ShiftUntilZero idiom,
1443  // this is always 6.
1444  size_t IdiomCanonicalSize = 6;
1445 
1446  if (!detectShiftUntilZeroIdiom(CurLoop, *DL, IntrinID, InitX,
1447  CntInst, CntPhi, DefX))
1448  return false;
1449 
1450  bool IsCntPhiUsedOutsideLoop = false;
1451  for (User *U : CntPhi->users())
1452  if (!CurLoop->contains(cast<Instruction>(U))) {
1453  IsCntPhiUsedOutsideLoop = true;
1454  break;
1455  }
1456  bool IsCntInstUsedOutsideLoop = false;
1457  for (User *U : CntInst->users())
1458  if (!CurLoop->contains(cast<Instruction>(U))) {
1459  IsCntInstUsedOutsideLoop = true;
1460  break;
1461  }
1462  // If both CntInst and CntPhi are used outside the loop the profitability
1463  // is questionable.
1464  if (IsCntInstUsedOutsideLoop && IsCntPhiUsedOutsideLoop)
1465  return false;
1466 
1467  // For some CPUs result of CTLZ(X) intrinsic is undefined
1468  // when X is 0. If we can not guarantee X != 0, we need to check this
1469  // when expand.
1470  bool ZeroCheck = false;
1471  // It is safe to assume Preheader exist as it was checked in
1472  // parent function RunOnLoop.
1473  BasicBlock *PH = CurLoop->getLoopPreheader();
1474 
1475  // If we are using the count instruction outside the loop, make sure we
1476  // have a zero check as a precondition. Without the check the loop would run
1477  // one iteration for before any check of the input value. This means 0 and 1
1478  // would have identical behavior in the original loop and thus
1479  if (!IsCntPhiUsedOutsideLoop) {
1480  auto *PreCondBB = PH->getSinglePredecessor();
1481  if (!PreCondBB)
1482  return false;
1483  auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
1484  if (!PreCondBI)
1485  return false;
1486  if (matchCondition(PreCondBI, PH) != InitX)
1487  return false;
1488  ZeroCheck = true;
1489  }
1490 
1491  // Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
1492  // profitable if we delete the loop.
1493 
1494  // the loop has only 6 instructions:
1495  // %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
1496  // %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
1497  // %shr = ashr %n.addr.0, 1
1498  // %tobool = icmp eq %shr, 0
1499  // %inc = add nsw %i.0, 1
1500  // br i1 %tobool
1501 
1502  const Value *Args[] =
1503  {InitX, ZeroCheck ? ConstantInt::getTrue(InitX->getContext())
1504  : ConstantInt::getFalse(InitX->getContext())};
1505 
1506  // @llvm.dbg doesn't count as they have no semantic effect.
1507  auto InstWithoutDebugIt = CurLoop->getHeader()->instructionsWithoutDebug();
1509  std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end());
1510 
1511  if (HeaderSize != IdiomCanonicalSize &&
1512  TTI->getIntrinsicCost(IntrinID, InitX->getType(), Args) >
1514  return false;
1515 
1516  transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
1517  DefX->getDebugLoc(), ZeroCheck,
1518  IsCntPhiUsedOutsideLoop);
1519  return true;
1520 }
1521 
1522 /// Recognizes a population count idiom in a non-countable loop.
1523 ///
1524 /// If detected, transforms the relevant code to issue the popcount intrinsic
1525 /// function call, and returns true; otherwise, returns false.
1526 bool LoopIdiomRecognize::recognizePopcount() {
1528  return false;
1529 
1530  // Counting population are usually conducted by few arithmetic instructions.
1531  // Such instructions can be easily "absorbed" by vacant slots in a
1532  // non-compact loop. Therefore, recognizing popcount idiom only makes sense
1533  // in a compact loop.
1534 
1535  // Give up if the loop has multiple blocks or multiple backedges.
1536  if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
1537  return false;
1538 
1539  BasicBlock *LoopBody = *(CurLoop->block_begin());
1540  if (LoopBody->size() >= 20) {
1541  // The loop is too big, bail out.
1542  return false;
1543  }
1544 
1545  // It should have a preheader containing nothing but an unconditional branch.
1546  BasicBlock *PH = CurLoop->getLoopPreheader();
1547  if (!PH || &PH->front() != PH->getTerminator())
1548  return false;
1549  auto *EntryBI = dyn_cast<BranchInst>(PH->getTerminator());
1550  if (!EntryBI || EntryBI->isConditional())
1551  return false;
1552 
1553  // It should have a precondition block where the generated popcount intrinsic
1554  // function can be inserted.
1555  auto *PreCondBB = PH->getSinglePredecessor();
1556  if (!PreCondBB)
1557  return false;
1558  auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
1559  if (!PreCondBI || PreCondBI->isUnconditional())
1560  return false;
1561 
1562  Instruction *CntInst;
1563  PHINode *CntPhi;
1564  Value *Val;
1565  if (!detectPopcountIdiom(CurLoop, PreCondBB, CntInst, CntPhi, Val))
1566  return false;
1567 
1568  transformLoopToPopcount(PreCondBB, CntInst, CntPhi, Val);
1569  return true;
1570 }
1571 
1573  const DebugLoc &DL) {
1574  Value *Ops[] = {Val};
1575  Type *Tys[] = {Val->getType()};
1576 
1577  Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent();
1578  Function *Func = Intrinsic::getDeclaration(M, Intrinsic::ctpop, Tys);
1579  CallInst *CI = IRBuilder.CreateCall(Func, Ops);
1580  CI->setDebugLoc(DL);
1581 
1582  return CI;
1583 }
1584 
1586  const DebugLoc &DL, bool ZeroCheck,
1587  Intrinsic::ID IID) {
1588  Value *Ops[] = {Val, ZeroCheck ? IRBuilder.getTrue() : IRBuilder.getFalse()};
1589  Type *Tys[] = {Val->getType()};
1590 
1591  Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent();
1592  Function *Func = Intrinsic::getDeclaration(M, IID, Tys);
1593  CallInst *CI = IRBuilder.CreateCall(Func, Ops);
1594  CI->setDebugLoc(DL);
1595 
1596  return CI;
1597 }
1598 
1599 /// Transform the following loop (Using CTLZ, CTTZ is similar):
1600 /// loop:
1601 /// CntPhi = PHI [Cnt0, CntInst]
1602 /// PhiX = PHI [InitX, DefX]
1603 /// CntInst = CntPhi + 1
1604 /// DefX = PhiX >> 1
1605 /// LOOP_BODY
1606 /// Br: loop if (DefX != 0)
1607 /// Use(CntPhi) or Use(CntInst)
1608 ///
1609 /// Into:
1610 /// If CntPhi used outside the loop:
1611 /// CountPrev = BitWidth(InitX) - CTLZ(InitX >> 1)
1612 /// Count = CountPrev + 1
1613 /// else
1614 /// Count = BitWidth(InitX) - CTLZ(InitX)
1615 /// loop:
1616 /// CntPhi = PHI [Cnt0, CntInst]
1617 /// PhiX = PHI [InitX, DefX]
1618 /// PhiCount = PHI [Count, Dec]
1619 /// CntInst = CntPhi + 1
1620 /// DefX = PhiX >> 1
1621 /// Dec = PhiCount - 1
1622 /// LOOP_BODY
1623 /// Br: loop if (Dec != 0)
1624 /// Use(CountPrev + Cnt0) // Use(CntPhi)
1625 /// or
1626 /// Use(Count + Cnt0) // Use(CntInst)
1627 ///
1628 /// If LOOP_BODY is empty the loop will be deleted.
1629 /// If CntInst and DefX are not used in LOOP_BODY they will be removed.
1630 void LoopIdiomRecognize::transformLoopToCountable(
1631  Intrinsic::ID IntrinID, BasicBlock *Preheader, Instruction *CntInst,
1632  PHINode *CntPhi, Value *InitX, Instruction *DefX, const DebugLoc &DL,
1633  bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) {
1634  BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator());
1635 
1636  // Step 1: Insert the CTLZ/CTTZ instruction at the end of the preheader block
1637  IRBuilder<> Builder(PreheaderBr);
1638  Builder.SetCurrentDebugLocation(DL);
1639  Value *FFS, *Count, *CountPrev, *NewCount, *InitXNext;
1640 
1641  // Count = BitWidth - CTLZ(InitX);
1642  // If there are uses of CntPhi create:
1643  // CountPrev = BitWidth - CTLZ(InitX >> 1);
1644  if (IsCntPhiUsedOutsideLoop) {
1645  if (DefX->getOpcode() == Instruction::AShr)
1646  InitXNext =
1647  Builder.CreateAShr(InitX, ConstantInt::get(InitX->getType(), 1));
1648  else if (DefX->getOpcode() == Instruction::LShr)
1649  InitXNext =
1650  Builder.CreateLShr(InitX, ConstantInt::get(InitX->getType(), 1));
1651  else if (DefX->getOpcode() == Instruction::Shl) // cttz
1652  InitXNext =
1653  Builder.CreateShl(InitX, ConstantInt::get(InitX->getType(), 1));
1654  else
1655  llvm_unreachable("Unexpected opcode!");
1656  } else
1657  InitXNext = InitX;
1658  FFS = createFFSIntrinsic(Builder, InitXNext, DL, ZeroCheck, IntrinID);
1659  Count = Builder.CreateSub(
1660  ConstantInt::get(FFS->getType(),
1661  FFS->getType()->getIntegerBitWidth()),
1662  FFS);
1663  if (IsCntPhiUsedOutsideLoop) {
1664  CountPrev = Count;
1665  Count = Builder.CreateAdd(
1666  CountPrev,
1667  ConstantInt::get(CountPrev->getType(), 1));
1668  }
1669 
1670  NewCount = Builder.CreateZExtOrTrunc(
1671  IsCntPhiUsedOutsideLoop ? CountPrev : Count,
1672  cast<IntegerType>(CntInst->getType()));
1673 
1674  // If the counter's initial value is not zero, insert Add Inst.
1675  Value *CntInitVal = CntPhi->getIncomingValueForBlock(Preheader);
1676  ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
1677  if (!InitConst || !InitConst->isZero())
1678  NewCount = Builder.CreateAdd(NewCount, CntInitVal);
1679 
1680  // Step 2: Insert new IV and loop condition:
1681  // loop:
1682  // ...
1683  // PhiCount = PHI [Count, Dec]
1684  // ...
1685  // Dec = PhiCount - 1
1686  // ...
1687  // Br: loop if (Dec != 0)
1688  BasicBlock *Body = *(CurLoop->block_begin());
1689  auto *LbBr = cast<BranchInst>(Body->getTerminator());
1690  ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
1691  Type *Ty = Count->getType();
1692 
1693  PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", &Body->front());
1694 
1695  Builder.SetInsertPoint(LbCond);
1696  Instruction *TcDec = cast<Instruction>(
1697  Builder.CreateSub(TcPhi, ConstantInt::get(Ty, 1),
1698  "tcdec", false, true));
1699 
1700  TcPhi->addIncoming(Count, Preheader);
1701  TcPhi->addIncoming(TcDec, Body);
1702 
1703  CmpInst::Predicate Pred =
1704  (LbBr->getSuccessor(0) == Body) ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ;
1705  LbCond->setPredicate(Pred);
1706  LbCond->setOperand(0, TcDec);
1707  LbCond->setOperand(1, ConstantInt::get(Ty, 0));
1708 
1709  // Step 3: All the references to the original counter outside
1710  // the loop are replaced with the NewCount
1711  if (IsCntPhiUsedOutsideLoop)
1712  CntPhi->replaceUsesOutsideBlock(NewCount, Body);
1713  else
1714  CntInst->replaceUsesOutsideBlock(NewCount, Body);
1715 
1716  // step 4: Forget the "non-computable" trip-count SCEV associated with the
1717  // loop. The loop would otherwise not be deleted even if it becomes empty.
1718  SE->forgetLoop(CurLoop);
1719 }
1720 
1721 void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB,
1722  Instruction *CntInst,
1723  PHINode *CntPhi, Value *Var) {
1724  BasicBlock *PreHead = CurLoop->getLoopPreheader();
1725  auto *PreCondBr = cast<BranchInst>(PreCondBB->getTerminator());
1726  const DebugLoc &DL = CntInst->getDebugLoc();
1727 
1728  // Assuming before transformation, the loop is following:
1729  // if (x) // the precondition
1730  // do { cnt++; x &= x - 1; } while(x);
1731 
1732  // Step 1: Insert the ctpop instruction at the end of the precondition block
1733  IRBuilder<> Builder(PreCondBr);
1734  Value *PopCnt, *PopCntZext, *NewCount, *TripCnt;
1735  {
1736  PopCnt = createPopcntIntrinsic(Builder, Var, DL);
1737  NewCount = PopCntZext =
1738  Builder.CreateZExtOrTrunc(PopCnt, cast<IntegerType>(CntPhi->getType()));
1739 
1740  if (NewCount != PopCnt)
1741  (cast<Instruction>(NewCount))->setDebugLoc(DL);
1742 
1743  // TripCnt is exactly the number of iterations the loop has
1744  TripCnt = NewCount;
1745 
1746  // If the population counter's initial value is not zero, insert Add Inst.
1747  Value *CntInitVal = CntPhi->getIncomingValueForBlock(PreHead);
1748  ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
1749  if (!InitConst || !InitConst->isZero()) {
1750  NewCount = Builder.CreateAdd(NewCount, CntInitVal);
1751  (cast<Instruction>(NewCount))->setDebugLoc(DL);
1752  }
1753  }
1754 
1755  // Step 2: Replace the precondition from "if (x == 0) goto loop-exit" to
1756  // "if (NewCount == 0) loop-exit". Without this change, the intrinsic
1757  // function would be partial dead code, and downstream passes will drag
1758  // it back from the precondition block to the preheader.
1759  {
1760  ICmpInst *PreCond = cast<ICmpInst>(PreCondBr->getCondition());
1761 
1762  Value *Opnd0 = PopCntZext;
1763  Value *Opnd1 = ConstantInt::get(PopCntZext->getType(), 0);
1764  if (PreCond->getOperand(0) != Var)
1765  std::swap(Opnd0, Opnd1);
1766 
1767  ICmpInst *NewPreCond = cast<ICmpInst>(
1768  Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1));
1769  PreCondBr->setCondition(NewPreCond);
1770 
1772  }
1773 
1774  // Step 3: Note that the population count is exactly the trip count of the
1775  // loop in question, which enable us to convert the loop from noncountable
1776  // loop into a countable one. The benefit is twofold:
1777  //
1778  // - If the loop only counts population, the entire loop becomes dead after
1779  // the transformation. It is a lot easier to prove a countable loop dead
1780  // than to prove a noncountable one. (In some C dialects, an infinite loop
1781  // isn't dead even if it computes nothing useful. In general, DCE needs
1782  // to prove a noncountable loop finite before safely delete it.)
1783  //
1784  // - If the loop also performs something else, it remains alive.
1785  // Since it is transformed to countable form, it can be aggressively
1786  // optimized by some optimizations which are in general not applicable
1787  // to a noncountable loop.
1788  //
1789  // After this step, this loop (conceptually) would look like following:
1790  // newcnt = __builtin_ctpop(x);
1791  // t = newcnt;
1792  // if (x)
1793  // do { cnt++; x &= x-1; t--) } while (t > 0);
1794  BasicBlock *Body = *(CurLoop->block_begin());
1795  {
1796  auto *LbBr = cast<BranchInst>(Body->getTerminator());
1797  ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
1798  Type *Ty = TripCnt->getType();
1799 
1800  PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", &Body->front());
1801 
1802  Builder.SetInsertPoint(LbCond);
1803  Instruction *TcDec = cast<Instruction>(
1804  Builder.CreateSub(TcPhi, ConstantInt::get(Ty, 1),
1805  "tcdec", false, true));
1806 
1807  TcPhi->addIncoming(TripCnt, PreHead);
1808  TcPhi->addIncoming(TcDec, Body);
1809 
1810  CmpInst::Predicate Pred =
1811  (LbBr->getSuccessor(0) == Body) ? CmpInst::ICMP_UGT : CmpInst::ICMP_SLE;
1812  LbCond->setPredicate(Pred);
1813  LbCond->setOperand(0, TcDec);
1814  LbCond->setOperand(1, ConstantInt::get(Ty, 0));
1815  }
1816 
1817  // Step 4: All the references to the original population counter outside
1818  // the loop are replaced with the NewCount -- the value returned from
1819  // __builtin_ctpop().
1820  CntInst->replaceUsesOutsideBlock(NewCount, Body);
1821 
1822  // step 5: Forget the "non-computable" trip-count SCEV associated with the
1823  // loop. The loop would otherwise not be deleted even if it becomes empty.
1824  SE->forgetLoop(CurLoop);
1825 }
The access may reference and may modify the value stored in memory.
virtual void computeLoopSafetyInfo(const Loop *CurLoop)
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
Definition: MustExecute.cpp:44
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:80
uint64_t CallInst * C
Value * getValueOperand()
Definition: Instructions.h:409
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:606
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2198
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static CallInst * createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val, const DebugLoc &DL)
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:1888
const SCEV * getConstant(ConstantInt *V)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:776
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
This class represents lattice values for constants.
Definition: AllocatorList.h:23
BinaryOps getOpcode() const
Definition: InstrTypes.h:402
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
ArrayRef< BasicBlock *>::const_iterator block_iterator
Definition: LoopInfo.h:155
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:622
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
static constexpr LocationSize unknown()
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:165
bool isUnordered() const
Definition: Instructions.h:403
void push_back(const T &Elt)
Definition: SmallVector.h:211
The main scalar evolution driver.
This class represents a function call, abstracting a target machine&#39;s calling convention.
static PHINode * getRecurrenceVar(Value *VarX, Instruction *DefX, BasicBlock *LoopEntry)
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:160
Value * getValue() const
Like Internal, but omit from symbol table.
Definition: GlobalValue.h:56
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:732
static LocationSize precise(uint64_t Value)
This class wraps the llvm.memset intrinsic.
BasicBlock * getSuccessor(unsigned i) const
STATISTIC(NumFunctions, "Total number of functions")
A debug info location.
Definition: DebugLoc.h:33
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:580
An instruction for reading from memory.
Definition: Instructions.h:167
static Value * matchCondition(BranchInst *BI, BasicBlock *LoopEntry, bool JmpOnZero=false)
Check if the given conditional branch is based on the comparison between a variable and zero...
Value * getCondition() const
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:137
void setAlignment(unsigned Align)
Definition: Globals.cpp:116
Value * getLength() const
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:92
static Constant * get(ArrayType *T, ArrayRef< Constant *> V)
Definition: Constants.cpp:1004
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:268
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
Return hardware support for population count.
Simple and conservative implementation of LoopSafetyInfo that can give false-positive answers to its ...
Definition: MustExecute.h:97
AnalysisUsage & addRequired()
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:133
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:231
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:897
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:779
This file contains the simple types necessary to represent the attributes associated with functions a...
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1118
unsigned getDestAlignment() const
This file implements a class to represent arbitrary precision integral constant values and operations...
const APInt & getAPInt() const
BlockT * getHeader() const
Definition: LoopInfo.h:102
static bool isSimple(Instruction *I)
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:200
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:418
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:181
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
This node represents a polynomial recurrence on the trip count of the specified loop.
static APInt getStoreStride(const SCEVAddRecExpr *StoreEv)
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:126
Class to represent array types.
Definition: DerivedTypes.h:403
static cl::opt< bool > UseLIRCodeSizeHeurs("use-lir-code-size-heurs", cl::desc("Use loop idiom recognition code size heuristics when compiling" "with -Os/-Oz"), cl::init(true), cl::Hidden)
bool has(LibFunc F) const
Tests whether a library function is available.
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:82
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:234
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1135
void initializeLoopIdiomRecognizeLegacyPassPass(PassRegistry &)
An instruction for storing to memory.
Definition: Instructions.h:320
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:208
bool isKnownNonNegative(const Value *V, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Returns true if the give value is known to be non-negative.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
static void deleteDeadInstruction(Instruction *I)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1043
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:132
Value * getOperand(unsigned i) const
Definition: User.h:169
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:210
bool isLoopEntryGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test whether entry to the loop is protected by a conditional between LHS and RHS. ...
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1794
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
bool inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI)
Analyze the name and prototype of the given function and set any applicable attributes.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
const SCEV * getOperand(unsigned i) const
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:189
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:318
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:233
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:323
Conditional or Unconditional Branch instruction.
static CallInst * createFFSIntrinsic(IRBuilder<> &IRBuilder, Value *Val, const DebugLoc &DL, bool ZeroCheck, Intrinsic::ID IID)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
Value * getIncomingValueForBlock(const BasicBlock *BB) const
This file contains the declarations for the subclasses of Constant, which represent the different fla...
const Instruction & front() const
Definition: BasicBlock.h:280
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
const SCEV * getAddExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
size_t size() const
Definition: BasicBlock.h:278
Diagnostic information for applied optimization remarks.
bool isUnordered() const
Definition: Instructions.h:278
Represent the analysis usage information of a pass.
This instruction compares its operands according to the predicate given to the constructor.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:732
static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL, Intrinsic::ID &IntrinID, Value *&InitX, Instruction *&CntInst, PHINode *&CntPhi, Instruction *&DefX)
Return true if the idiom is detected in the loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
Value * getPointerOperand()
Definition: Instructions.h:284
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
self_iterator getIterator()
Definition: ilist_node.h:81
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:434
Used in the streaming interface as the general argument type.
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1436
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
size_t size() const
Definition: SmallVector.h:52
static wasm::ValType getType(const TargetRegisterClass *RC)
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:219
bool isVolatile() const
const SCEV * getMulExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
Representation for a specific memory location.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
typename vector_type::const_iterator iterator
Definition: SetVector.h:48
Iterator for intrusive lists based on ilist_node.
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
Type * getType() const
Return the LLVM type of this SCEV expression.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
iterator end()
Definition: BasicBlock.h:270
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1160
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:248
Module.h This file contains the declarations for the Module class.
Provides information about what library functions are available for the current target.
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:643
bool isConditional() const
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
FunctionCallee getOrInsertFunction(StringRef Name, FunctionType *T, AttributeList AttributeList)
Look up the specified function in the module symbol table.
Definition: Module.cpp:143
static Constant * getMemSetPatternValue(Value *V, const DataLayout *DL)
getMemSetPatternValue - If a strided store of the specified value is safe to turn into a memset_patte...
static const SCEV * getNumBytes(const SCEV *BECount, Type *IntPtr, unsigned StoreSize, Loop *CurLoop, const DataLayout *DL, ScalarEvolution *SE)
Compute the number of bytes as a SCEV from the backedge taken count.
static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB, Instruction *&CntInst, PHINode *&CntPhi, Value *&Var)
Return true iff the idiom is detected in the loop.
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:599
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
The access may modify the value stored in memory.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
signed less or equal
Definition: InstrTypes.h:762
loop Recognize loop idioms
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
Class for arbitrary precision integers.
Definition: APInt.h:69
loop idiom
iterator_range< user_iterator > users()
Definition: Value.h:399
This class uses information about analyze scalars to rewrite expressions in canonical form...
Pass * createLoopIdiomPass()
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type *> ParamTys, const User *U=nullptr) const
Estimate the cost of an intrinsic when lowered.
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition: IRBuilder.h:328
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:601
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:807
bool isVolatile() const
Return true if this is a store to a volatile memory location.
Definition: Instructions.h:353
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:321
INITIALIZE_PASS_BEGIN(LoopIdiomRecognizeLegacyPass, "loop-idiom", "Recognize loop idioms", false, false) INITIALIZE_PASS_END(LoopIdiomRecognizeLegacyPass
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
void setUnnamedAddr(UnnamedAddr Val)
Definition: GlobalValue.h:219
unsigned getAtomicMemIntrinsicMaxElementSize() const
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:240
This class represents an analyzed expression in the program.
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:506
This file provides utility analysis objects describing memory locations.
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1287
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
LLVM_NODISCARD ModRefInfo intersectModRef(const ModRefInfo MRI1, const ModRefInfo MRI2)
#define I(x, y, z)
Definition: MD5.cpp:58
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:192
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass&#39;s AnalysisUsage.
Definition: LoopUtils.cpp:136
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:582
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
block_iterator block_end() const
Definition: LoopInfo.h:157
uint32_t Size
Definition: Profile.cpp:46
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value *> Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2223
const SCEV * getTruncateOrZeroExtend(const SCEV *V, Type *Ty, unsigned Depth=0)
Return a SCEV corresponding to a conversion of the input value to the specified type.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:365
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:290
static const SCEV * getStartForNegStride(const SCEV *Start, const SCEV *BECount, Type *IntPtr, unsigned StoreSize, ScalarEvolution *SE)
const SCEV * getBackedgeTakenCount(const Loop *L)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
The cost of a typical &#39;add&#39; instruction.
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:575
LLVM Value Representation.
Definition: Value.h:72
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
A vector that has set insertion semantics.
Definition: SetVector.h:40
static bool mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, const SCEV *BECount, unsigned StoreSize, AliasAnalysis &AA, SmallPtrSetImpl< Instruction *> &IgnoredStores)
mayLoopAccessLocation - Return true if the specified loop might access the specified pointer location...
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
unsigned greater than
Definition: InstrTypes.h:755
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
A container for analyses that lazily runs them and caches their results.
void replaceUsesOutsideBlock(Value *V, BasicBlock *BB)
replaceUsesOutsideBlock - Go through the uses list for this definition and make each use point to "V"...
Definition: Value.cpp:439
This pass exposes codegen information to IR-level passes.
LLVM_NODISCARD bool isModOrRefSet(const ModRefInfo MRI)
bool isSimple() const
Definition: Instructions.h:401
This header defines various interfaces for pass management in LLVM.
bool isBigEndian() const
Definition: DataLayout.h:233
#define DEBUG_TYPE
ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc)
getModRefInfo (for call sites) - Return information about whether a particular call site modifies or ...
virtual bool anyBlockMayThrow() const
Returns true iff any block of the loop for which this info is contains an instruction that may throw ...
Definition: MustExecute.cpp:40
#define LLVM_DEBUG(X)
Definition: Debug.h:122
bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE)
Return true if the given expression is safe to expand in the sense that all materialized values are s...
block_iterator block_begin() const
Definition: LoopInfo.h:156
bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count...
Value * getPointerOperand()
Definition: Instructions.h:412
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
The optimization diagnostic interface.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
This class represents a constant integer value.