LLVM  13.0.0git
LoopIdiomRecognize.cpp
Go to the documentation of this file.
1 //===- LoopIdiomRecognize.cpp - Loop idiom recognition --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass implements an idiom recognizer that transforms simple loops into a
10 // non-loop form. In cases that this kicks in, it can be a significant
11 // performance win.
12 //
13 // If compiling for code size we avoid idiom recognition if the resulting
14 // code could be larger than the code for the original loop. One way this could
15 // happen is if the loop is not removable after idiom recognition due to the
16 // presence of non-idiom instructions. The initial implementation of the
17 // heuristics applies to idioms in multi-block loops.
18 //
19 //===----------------------------------------------------------------------===//
20 //
21 // TODO List:
22 //
23 // Future loop memory idioms to recognize:
24 // memcmp, memmove, strlen, etc.
25 // Future floating point idioms to recognize in -ffast-math mode:
26 // fpowi
27 // Future integer operation idioms to recognize:
28 // ctpop
29 //
30 // Beware that isel's default lowering for ctpop is highly inefficient for
31 // i64 and larger types when i64 is legal and the value has few bits set. It
32 // would be good to enhance isel to emit a loop for ctpop in this case.
33 //
34 // This could recognize common matrix multiplies and dot product idioms and
35 // replace them with calls to BLAS (if linked in??).
36 //
37 //===----------------------------------------------------------------------===//
38 
40 #include "llvm/ADT/APInt.h"
41 #include "llvm/ADT/ArrayRef.h"
42 #include "llvm/ADT/DenseMap.h"
43 #include "llvm/ADT/MapVector.h"
44 #include "llvm/ADT/SetVector.h"
45 #include "llvm/ADT/SmallPtrSet.h"
46 #include "llvm/ADT/SmallVector.h"
47 #include "llvm/ADT/Statistic.h"
48 #include "llvm/ADT/StringRef.h"
52 #include "llvm/Analysis/LoopInfo.h"
53 #include "llvm/Analysis/LoopPass.h"
64 #include "llvm/IR/Attributes.h"
65 #include "llvm/IR/BasicBlock.h"
66 #include "llvm/IR/Constant.h"
67 #include "llvm/IR/Constants.h"
68 #include "llvm/IR/DataLayout.h"
69 #include "llvm/IR/DebugLoc.h"
70 #include "llvm/IR/DerivedTypes.h"
71 #include "llvm/IR/Dominators.h"
72 #include "llvm/IR/GlobalValue.h"
73 #include "llvm/IR/GlobalVariable.h"
74 #include "llvm/IR/IRBuilder.h"
75 #include "llvm/IR/InstrTypes.h"
76 #include "llvm/IR/Instruction.h"
77 #include "llvm/IR/Instructions.h"
78 #include "llvm/IR/IntrinsicInst.h"
79 #include "llvm/IR/Intrinsics.h"
80 #include "llvm/IR/LLVMContext.h"
81 #include "llvm/IR/Module.h"
82 #include "llvm/IR/PassManager.h"
83 #include "llvm/IR/PatternMatch.h"
84 #include "llvm/IR/Type.h"
85 #include "llvm/IR/User.h"
86 #include "llvm/IR/Value.h"
87 #include "llvm/IR/ValueHandle.h"
88 #include "llvm/InitializePasses.h"
89 #include "llvm/Pass.h"
90 #include "llvm/Support/Casting.h"
92 #include "llvm/Support/Debug.h"
95 #include "llvm/Transforms/Scalar.h"
100 #include <algorithm>
101 #include <cassert>
102 #include <cstdint>
103 #include <utility>
104 #include <vector>
105 
106 using namespace llvm;
107 
108 #define DEBUG_TYPE "loop-idiom"
109 
110 STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
111 STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
112 STATISTIC(
113  NumShiftUntilBitTest,
114  "Number of uncountable loops recognized as 'shift until bitttest' idiom");
115 
116 bool DisableLIRP::All;
117 static cl::opt<bool, true>
118  DisableLIRPAll("disable-" DEBUG_TYPE "-all",
119  cl::desc("Options to disable Loop Idiom Recognize Pass."),
122 
124 static cl::opt<bool, true>
125  DisableLIRPMemset("disable-" DEBUG_TYPE "-memset",
126  cl::desc("Proceed with loop idiom recognize pass, but do "
127  "not convert loop(s) to memset."),
130 
132 static cl::opt<bool, true>
133  DisableLIRPMemcpy("disable-" DEBUG_TYPE "-memcpy",
134  cl::desc("Proceed with loop idiom recognize pass, but do "
135  "not convert loop(s) to memcpy."),
138 
140  "use-lir-code-size-heurs",
141  cl::desc("Use loop idiom recognition code size heuristics when compiling"
142  "with -Os/-Oz"),
143  cl::init(true), cl::Hidden);
144 
145 namespace {
146 
147 class LoopIdiomRecognize {
148  Loop *CurLoop = nullptr;
149  AliasAnalysis *AA;
150  DominatorTree *DT;
151  LoopInfo *LI;
152  ScalarEvolution *SE;
153  TargetLibraryInfo *TLI;
154  const TargetTransformInfo *TTI;
155  const DataLayout *DL;
157  bool ApplyCodeSizeHeuristics;
158  std::unique_ptr<MemorySSAUpdater> MSSAU;
159 
160 public:
161  explicit LoopIdiomRecognize(AliasAnalysis *AA, DominatorTree *DT,
162  LoopInfo *LI, ScalarEvolution *SE,
163  TargetLibraryInfo *TLI,
164  const TargetTransformInfo *TTI, MemorySSA *MSSA,
165  const DataLayout *DL,
167  : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE) {
168  if (MSSA)
169  MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
170  }
171 
172  bool runOnLoop(Loop *L);
173 
174 private:
175  using StoreList = SmallVector<StoreInst *, 8>;
176  using StoreListMap = MapVector<Value *, StoreList>;
177 
178  StoreListMap StoreRefsForMemset;
179  StoreListMap StoreRefsForMemsetPattern;
180  StoreList StoreRefsForMemcpy;
181  bool HasMemset;
182  bool HasMemsetPattern;
183  bool HasMemcpy;
184 
185  /// Return code for isLegalStore()
186  enum LegalStoreKind {
187  None = 0,
188  Memset,
189  MemsetPattern,
190  Memcpy,
191  UnorderedAtomicMemcpy,
192  DontUse // Dummy retval never to be used. Allows catching errors in retval
193  // handling.
194  };
195 
196  /// \name Countable Loop Idiom Handling
197  /// @{
198 
199  bool runOnCountableLoop();
200  bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
201  SmallVectorImpl<BasicBlock *> &ExitBlocks);
202 
203  void collectStores(BasicBlock *BB);
204  LegalStoreKind isLegalStore(StoreInst *SI);
205  enum class ForMemset { No, Yes };
206  bool processLoopStores(SmallVectorImpl<StoreInst *> &SL, const SCEV *BECount,
207  ForMemset For);
208  bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
209 
210  bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
211  MaybeAlign StoreAlignment, Value *StoredVal,
212  Instruction *TheStore,
214  const SCEVAddRecExpr *Ev, const SCEV *BECount,
215  bool NegStride, bool IsLoopMemset = false);
216  bool processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount);
217  bool avoidLIRForMultiBlockLoop(bool IsMemset = false,
218  bool IsLoopMemset = false);
219 
220  /// @}
221  /// \name Noncountable Loop Idiom Handling
222  /// @{
223 
224  bool runOnNoncountableLoop();
225 
226  bool recognizePopcount();
227  void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst,
228  PHINode *CntPhi, Value *Var);
229  bool recognizeAndInsertFFS(); /// Find First Set: ctlz or cttz
230  void transformLoopToCountable(Intrinsic::ID IntrinID, BasicBlock *PreCondBB,
231  Instruction *CntInst, PHINode *CntPhi,
232  Value *Var, Instruction *DefX,
233  const DebugLoc &DL, bool ZeroCheck,
234  bool IsCntPhiUsedOutsideLoop);
235 
236  bool recognizeShiftUntilBitTest();
237 
238  /// @}
239 };
240 
241 class LoopIdiomRecognizeLegacyPass : public LoopPass {
242 public:
243  static char ID;
244 
245  explicit LoopIdiomRecognizeLegacyPass() : LoopPass(ID) {
248  }
249 
250  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
251  if (DisableLIRP::All)
252  return false;
253 
254  if (skipLoop(L))
255  return false;
256 
257  AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
258  DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
259  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
260  ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
261  TargetLibraryInfo *TLI =
262  &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
263  *L->getHeader()->getParent());
264  const TargetTransformInfo *TTI =
265  &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
266  *L->getHeader()->getParent());
267  const DataLayout *DL = &L->getHeader()->getModule()->getDataLayout();
268  auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
269  MemorySSA *MSSA = nullptr;
270  if (MSSAAnalysis)
271  MSSA = &MSSAAnalysis->getMSSA();
272 
273  // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
274  // pass. Function analyses need to be preserved across loop transformations
275  // but ORE cannot be preserved (see comment before the pass definition).
277 
278  LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, MSSA, DL, ORE);
279  return LIR.runOnLoop(L);
280  }
281 
282  /// This transformation requires natural loop information & requires that
283  /// loop preheaders be inserted into the CFG.
284  void getAnalysisUsage(AnalysisUsage &AU) const override {
289  }
290 };
291 
292 } // end anonymous namespace
293 
295 
298  LPMUpdater &) {
299  if (DisableLIRP::All)
300  return PreservedAnalyses::all();
301 
302  const auto *DL = &L.getHeader()->getModule()->getDataLayout();
303 
304  // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
305  // pass. Function analyses need to be preserved across loop transformations
306  // but ORE cannot be preserved (see comment before the pass definition).
308 
309  LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI,
310  AR.MSSA, DL, ORE);
311  if (!LIR.runOnLoop(&L))
312  return PreservedAnalyses::all();
313 
314  auto PA = getLoopPassPreservedAnalyses();
315  if (AR.MSSA)
316  PA.preserve<MemorySSAAnalysis>();
317  return PA;
318 }
319 
320 INITIALIZE_PASS_BEGIN(LoopIdiomRecognizeLegacyPass, "loop-idiom",
321  "Recognize loop idioms", false, false)
325 INITIALIZE_PASS_END(LoopIdiomRecognizeLegacyPass, "loop-idiom",
326  "Recognize loop idioms", false, false)
327 
328 Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognizeLegacyPass(); }
329 
331  I->replaceAllUsesWith(UndefValue::get(I->getType()));
332  I->eraseFromParent();
333 }
334 
335 //===----------------------------------------------------------------------===//
336 //
337 // Implementation of LoopIdiomRecognize
338 //
339 //===----------------------------------------------------------------------===//
340 
341 bool LoopIdiomRecognize::runOnLoop(Loop *L) {
342  CurLoop = L;
343  // If the loop could not be converted to canonical form, it must have an
344  // indirectbr in it, just give up.
345  if (!L->getLoopPreheader())
346  return false;
347 
348  // Disable loop idiom recognition if the function's name is a common idiom.
350  if (Name == "memset" || Name == "memcpy")
351  return false;
352 
353  // Determine if code size heuristics need to be applied.
354  ApplyCodeSizeHeuristics =
356 
357  HasMemset = TLI->has(LibFunc_memset);
358  HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);
359  HasMemcpy = TLI->has(LibFunc_memcpy);
360 
361  if (HasMemset || HasMemsetPattern || HasMemcpy)
363  return runOnCountableLoop();
364 
365  return runOnNoncountableLoop();
366 }
367 
368 bool LoopIdiomRecognize::runOnCountableLoop() {
369  const SCEV *BECount = SE->getBackedgeTakenCount(CurLoop);
370  assert(!isa<SCEVCouldNotCompute>(BECount) &&
371  "runOnCountableLoop() called on a loop without a predictable"
372  "backedge-taken count");
373 
374  // If this loop executes exactly one time, then it should be peeled, not
375  // optimized by this pass.
376  if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
377  if (BECst->getAPInt() == 0)
378  return false;
379 
380  SmallVector<BasicBlock *, 8> ExitBlocks;
381  CurLoop->getUniqueExitBlocks(ExitBlocks);
382 
383  LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F["
384  << CurLoop->getHeader()->getParent()->getName()
385  << "] Countable Loop %" << CurLoop->getHeader()->getName()
386  << "\n");
387 
388  // The following transforms hoist stores/memsets into the loop pre-header.
389  // Give up if the loop has instructions that may throw.
390  SimpleLoopSafetyInfo SafetyInfo;
391  SafetyInfo.computeLoopSafetyInfo(CurLoop);
392  if (SafetyInfo.anyBlockMayThrow())
393  return false;
394 
395  bool MadeChange = false;
396 
397  // Scan all the blocks in the loop that are not in subloops.
398  for (auto *BB : CurLoop->getBlocks()) {
399  // Ignore blocks in subloops.
400  if (LI->getLoopFor(BB) != CurLoop)
401  continue;
402 
403  MadeChange |= runOnLoopBlock(BB, BECount, ExitBlocks);
404  }
405  return MadeChange;
406 }
407 
408 static APInt getStoreStride(const SCEVAddRecExpr *StoreEv) {
409  const SCEVConstant *ConstStride = cast<SCEVConstant>(StoreEv->getOperand(1));
410  return ConstStride->getAPInt();
411 }
412 
413 /// getMemSetPatternValue - If a strided store of the specified value is safe to
414 /// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should
415 /// be passed in. Otherwise, return null.
416 ///
417 /// Note that we don't ever attempt to use memset_pattern8 or 4, because these
418 /// just replicate their input array and then pass on to memset_pattern16.
420  // FIXME: This could check for UndefValue because it can be merged into any
421  // other valid pattern.
422 
423  // If the value isn't a constant, we can't promote it to being in a constant
424  // array. We could theoretically do a store to an alloca or something, but
425  // that doesn't seem worthwhile.
426  Constant *C = dyn_cast<Constant>(V);
427  if (!C)
428  return nullptr;
429 
430  // Only handle simple values that are a power of two bytes in size.
431  uint64_t Size = DL->getTypeSizeInBits(V->getType());
432  if (Size == 0 || (Size & 7) || (Size & (Size - 1)))
433  return nullptr;
434 
435  // Don't care enough about darwin/ppc to implement this.
436  if (DL->isBigEndian())
437  return nullptr;
438 
439  // Convert to size in bytes.
440  Size /= 8;
441 
442  // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
443  // if the top and bottom are the same (e.g. for vectors and large integers).
444  if (Size > 16)
445  return nullptr;
446 
447  // If the constant is exactly 16 bytes, just use it.
448  if (Size == 16)
449  return C;
450 
451  // Otherwise, we'll use an array of the constants.
452  unsigned ArraySize = 16 / Size;
453  ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
454  return ConstantArray::get(AT, std::vector<Constant *>(ArraySize, C));
455 }
456 
457 LoopIdiomRecognize::LegalStoreKind
458 LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
459  // Don't touch volatile stores.
460  if (SI->isVolatile())
461  return LegalStoreKind::None;
462  // We only want simple or unordered-atomic stores.
463  if (!SI->isUnordered())
464  return LegalStoreKind::None;
465 
466  // Avoid merging nontemporal stores.
467  if (SI->getMetadata(LLVMContext::MD_nontemporal))
468  return LegalStoreKind::None;
469 
470  Value *StoredVal = SI->getValueOperand();
471  Value *StorePtr = SI->getPointerOperand();
472 
473  // Don't convert stores of non-integral pointer types to memsets (which stores
474  // integers).
475  if (DL->isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
476  return LegalStoreKind::None;
477 
478  // Reject stores that are so large that they overflow an unsigned.
479  // When storing out scalable vectors we bail out for now, since the code
480  // below currently only works for constant strides.
481  TypeSize SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
482  if (SizeInBits.isScalable() || (SizeInBits.getFixedSize() & 7) ||
483  (SizeInBits.getFixedSize() >> 32) != 0)
484  return LegalStoreKind::None;
485 
486  // See if the pointer expression is an AddRec like {base,+,1} on the current
487  // loop, which indicates a strided store. If we have something else, it's a
488  // random store we can't handle.
489  const SCEVAddRecExpr *StoreEv =
490  dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
491  if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
492  return LegalStoreKind::None;
493 
494  // Check to see if we have a constant stride.
495  if (!isa<SCEVConstant>(StoreEv->getOperand(1)))
496  return LegalStoreKind::None;
497 
498  // See if the store can be turned into a memset.
499 
500  // If the stored value is a byte-wise value (like i32 -1), then it may be
501  // turned into a memset of i8 -1, assuming that all the consecutive bytes
502  // are stored. A store of i32 0x01020304 can never be turned into a memset,
503  // but it can be turned into memset_pattern if the target supports it.
504  Value *SplatValue = isBytewiseValue(StoredVal, *DL);
505  Constant *PatternValue = nullptr;
506 
507  // Note: memset and memset_pattern on unordered-atomic is yet not supported
508  bool UnorderedAtomic = SI->isUnordered() && !SI->isSimple();
509 
510  // If we're allowed to form a memset, and the stored value would be
511  // acceptable for memset, use it.
512  if (!UnorderedAtomic && HasMemset && SplatValue && !DisableLIRP::Memset &&
513  // Verify that the stored value is loop invariant. If not, we can't
514  // promote the memset.
515  CurLoop->isLoopInvariant(SplatValue)) {
516  // It looks like we can use SplatValue.
517  return LegalStoreKind::Memset;
518  } else if (!UnorderedAtomic && HasMemsetPattern && !DisableLIRP::Memset &&
519  // Don't create memset_pattern16s with address spaces.
520  StorePtr->getType()->getPointerAddressSpace() == 0 &&
521  (PatternValue = getMemSetPatternValue(StoredVal, DL))) {
522  // It looks like we can use PatternValue!
523  return LegalStoreKind::MemsetPattern;
524  }
525 
526  // Otherwise, see if the store can be turned into a memcpy.
527  if (HasMemcpy && !DisableLIRP::Memcpy) {
528  // Check to see if the stride matches the size of the store. If so, then we
529  // know that every byte is touched in the loop.
530  APInt Stride = getStoreStride(StoreEv);
531  unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
532  if (StoreSize != Stride && StoreSize != -Stride)
533  return LegalStoreKind::None;
534 
535  // The store must be feeding a non-volatile load.
536  LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
537 
538  // Only allow non-volatile loads
539  if (!LI || LI->isVolatile())
540  return LegalStoreKind::None;
541  // Only allow simple or unordered-atomic loads
542  if (!LI->isUnordered())
543  return LegalStoreKind::None;
544 
545  // See if the pointer expression is an AddRec like {base,+,1} on the current
546  // loop, which indicates a strided load. If we have something else, it's a
547  // random load we can't handle.
548  const SCEVAddRecExpr *LoadEv =
549  dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
550  if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
551  return LegalStoreKind::None;
552 
553  // The store and load must share the same stride.
554  if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
555  return LegalStoreKind::None;
556 
557  // Success. This store can be converted into a memcpy.
558  UnorderedAtomic = UnorderedAtomic || LI->isAtomic();
559  return UnorderedAtomic ? LegalStoreKind::UnorderedAtomicMemcpy
560  : LegalStoreKind::Memcpy;
561  }
562  // This store can't be transformed into a memset/memcpy.
563  return LegalStoreKind::None;
564 }
565 
566 void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
567  StoreRefsForMemset.clear();
568  StoreRefsForMemsetPattern.clear();
569  StoreRefsForMemcpy.clear();
570  for (Instruction &I : *BB) {
571  StoreInst *SI = dyn_cast<StoreInst>(&I);
572  if (!SI)
573  continue;
574 
575  // Make sure this is a strided store with a constant stride.
576  switch (isLegalStore(SI)) {
578  // Nothing to do
579  break;
580  case LegalStoreKind::Memset: {
581  // Find the base pointer.
582  Value *Ptr = getUnderlyingObject(SI->getPointerOperand());
583  StoreRefsForMemset[Ptr].push_back(SI);
584  } break;
585  case LegalStoreKind::MemsetPattern: {
586  // Find the base pointer.
587  Value *Ptr = getUnderlyingObject(SI->getPointerOperand());
588  StoreRefsForMemsetPattern[Ptr].push_back(SI);
589  } break;
590  case LegalStoreKind::Memcpy:
591  case LegalStoreKind::UnorderedAtomicMemcpy:
592  StoreRefsForMemcpy.push_back(SI);
593  break;
594  default:
595  assert(false && "unhandled return value");
596  break;
597  }
598  }
599 }
600 
601 /// runOnLoopBlock - Process the specified block, which lives in a counted loop
602 /// with the specified backedge count. This block is known to be in the current
603 /// loop and not in any subloops.
604 bool LoopIdiomRecognize::runOnLoopBlock(
605  BasicBlock *BB, const SCEV *BECount,
606  SmallVectorImpl<BasicBlock *> &ExitBlocks) {
607  // We can only promote stores in this block if they are unconditionally
608  // executed in the loop. For a block to be unconditionally executed, it has
609  // to dominate all the exit blocks of the loop. Verify this now.
610  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
611  if (!DT->dominates(BB, ExitBlocks[i]))
612  return false;
613 
614  bool MadeChange = false;
615  // Look for store instructions, which may be optimized to memset/memcpy.
616  collectStores(BB);
617 
618  // Look for a single store or sets of stores with a common base, which can be
619  // optimized into a memset (memset_pattern). The latter most commonly happens
620  // with structs and handunrolled loops.
621  for (auto &SL : StoreRefsForMemset)
622  MadeChange |= processLoopStores(SL.second, BECount, ForMemset::Yes);
623 
624  for (auto &SL : StoreRefsForMemsetPattern)
625  MadeChange |= processLoopStores(SL.second, BECount, ForMemset::No);
626 
627  // Optimize the store into a memcpy, if it feeds an similarly strided load.
628  for (auto &SI : StoreRefsForMemcpy)
629  MadeChange |= processLoopStoreOfLoopLoad(SI, BECount);
630 
631  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
632  Instruction *Inst = &*I++;
633  // Look for memset instructions, which may be optimized to a larger memset.
634  if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) {
635  WeakTrackingVH InstPtr(&*I);
636  if (!processLoopMemSet(MSI, BECount))
637  continue;
638  MadeChange = true;
639 
640  // If processing the memset invalidated our iterator, start over from the
641  // top of the block.
642  if (!InstPtr)
643  I = BB->begin();
644  continue;
645  }
646  }
647 
648  return MadeChange;
649 }
650 
651 /// See if this store(s) can be promoted to a memset.
652 bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
653  const SCEV *BECount, ForMemset For) {
654  // Try to find consecutive stores that can be transformed into memsets.
655  SetVector<StoreInst *> Heads, Tails;
657 
658  // Do a quadratic search on all of the given stores and find
659  // all of the pairs of stores that follow each other.
660  SmallVector<unsigned, 16> IndexQueue;
661  for (unsigned i = 0, e = SL.size(); i < e; ++i) {
662  assert(SL[i]->isSimple() && "Expected only non-volatile stores.");
663 
664  Value *FirstStoredVal = SL[i]->getValueOperand();
665  Value *FirstStorePtr = SL[i]->getPointerOperand();
666  const SCEVAddRecExpr *FirstStoreEv =
667  cast<SCEVAddRecExpr>(SE->getSCEV(FirstStorePtr));
668  APInt FirstStride = getStoreStride(FirstStoreEv);
669  unsigned FirstStoreSize = DL->getTypeStoreSize(SL[i]->getValueOperand()->getType());
670 
671  // See if we can optimize just this store in isolation.
672  if (FirstStride == FirstStoreSize || -FirstStride == FirstStoreSize) {
673  Heads.insert(SL[i]);
674  continue;
675  }
676 
677  Value *FirstSplatValue = nullptr;
678  Constant *FirstPatternValue = nullptr;
679 
680  if (For == ForMemset::Yes)
681  FirstSplatValue = isBytewiseValue(FirstStoredVal, *DL);
682  else
683  FirstPatternValue = getMemSetPatternValue(FirstStoredVal, DL);
684 
685  assert((FirstSplatValue || FirstPatternValue) &&
686  "Expected either splat value or pattern value.");
687 
688  IndexQueue.clear();
689  // If a store has multiple consecutive store candidates, search Stores
690  // array according to the sequence: from i+1 to e, then from i-1 to 0.
691  // This is because usually pairing with immediate succeeding or preceding
692  // candidate create the best chance to find memset opportunity.
693  unsigned j = 0;
694  for (j = i + 1; j < e; ++j)
695  IndexQueue.push_back(j);
696  for (j = i; j > 0; --j)
697  IndexQueue.push_back(j - 1);
698 
699  for (auto &k : IndexQueue) {
700  assert(SL[k]->isSimple() && "Expected only non-volatile stores.");
701  Value *SecondStorePtr = SL[k]->getPointerOperand();
702  const SCEVAddRecExpr *SecondStoreEv =
703  cast<SCEVAddRecExpr>(SE->getSCEV(SecondStorePtr));
704  APInt SecondStride = getStoreStride(SecondStoreEv);
705 
706  if (FirstStride != SecondStride)
707  continue;
708 
709  Value *SecondStoredVal = SL[k]->getValueOperand();
710  Value *SecondSplatValue = nullptr;
711  Constant *SecondPatternValue = nullptr;
712 
713  if (For == ForMemset::Yes)
714  SecondSplatValue = isBytewiseValue(SecondStoredVal, *DL);
715  else
716  SecondPatternValue = getMemSetPatternValue(SecondStoredVal, DL);
717 
718  assert((SecondSplatValue || SecondPatternValue) &&
719  "Expected either splat value or pattern value.");
720 
721  if (isConsecutiveAccess(SL[i], SL[k], *DL, *SE, false)) {
722  if (For == ForMemset::Yes) {
723  if (isa<UndefValue>(FirstSplatValue))
724  FirstSplatValue = SecondSplatValue;
725  if (FirstSplatValue != SecondSplatValue)
726  continue;
727  } else {
728  if (isa<UndefValue>(FirstPatternValue))
729  FirstPatternValue = SecondPatternValue;
730  if (FirstPatternValue != SecondPatternValue)
731  continue;
732  }
733  Tails.insert(SL[k]);
734  Heads.insert(SL[i]);
735  ConsecutiveChain[SL[i]] = SL[k];
736  break;
737  }
738  }
739  }
740 
741  // We may run into multiple chains that merge into a single chain. We mark the
742  // stores that we transformed so that we don't visit the same store twice.
743  SmallPtrSet<Value *, 16> TransformedStores;
744  bool Changed = false;
745 
746  // For stores that start but don't end a link in the chain:
747  for (SetVector<StoreInst *>::iterator it = Heads.begin(), e = Heads.end();
748  it != e; ++it) {
749  if (Tails.count(*it))
750  continue;
751 
752  // We found a store instr that starts a chain. Now follow the chain and try
753  // to transform it.
754  SmallPtrSet<Instruction *, 8> AdjacentStores;
755  StoreInst *I = *it;
756 
757  StoreInst *HeadStore = I;
758  unsigned StoreSize = 0;
759 
760  // Collect the chain into a list.
761  while (Tails.count(I) || Heads.count(I)) {
762  if (TransformedStores.count(I))
763  break;
764  AdjacentStores.insert(I);
765 
766  StoreSize += DL->getTypeStoreSize(I->getValueOperand()->getType());
767  // Move to the next value in the chain.
768  I = ConsecutiveChain[I];
769  }
770 
771  Value *StoredVal = HeadStore->getValueOperand();
772  Value *StorePtr = HeadStore->getPointerOperand();
773  const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
774  APInt Stride = getStoreStride(StoreEv);
775 
776  // Check to see if the stride matches the size of the stores. If so, then
777  // we know that every byte is touched in the loop.
778  if (StoreSize != Stride && StoreSize != -Stride)
779  continue;
780 
781  bool NegStride = StoreSize == -Stride;
782 
783  if (processLoopStridedStore(StorePtr, StoreSize,
784  MaybeAlign(HeadStore->getAlignment()),
785  StoredVal, HeadStore, AdjacentStores, StoreEv,
786  BECount, NegStride)) {
787  TransformedStores.insert(AdjacentStores.begin(), AdjacentStores.end());
788  Changed = true;
789  }
790  }
791 
792  return Changed;
793 }
794 
795 /// processLoopMemSet - See if this memset can be promoted to a large memset.
796 bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
797  const SCEV *BECount) {
798  // We can only handle non-volatile memsets with a constant size.
799  if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength()))
800  return false;
801 
802  // If we're not allowed to hack on memset, we fail.
803  if (!HasMemset)
804  return false;
805 
806  Value *Pointer = MSI->getDest();
807 
808  // See if the pointer expression is an AddRec like {base,+,1} on the current
809  // loop, which indicates a strided store. If we have something else, it's a
810  // random store we can't handle.
811  const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer));
812  if (!Ev || Ev->getLoop() != CurLoop || !Ev->isAffine())
813  return false;
814 
815  // Reject memsets that are so large that they overflow an unsigned.
816  uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
817  if ((SizeInBytes >> 32) != 0)
818  return false;
819 
820  // Check to see if the stride matches the size of the memset. If so, then we
821  // know that every byte is touched in the loop.
822  const SCEVConstant *ConstStride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
823  if (!ConstStride)
824  return false;
825 
826  APInt Stride = ConstStride->getAPInt();
827  if (SizeInBytes != Stride && SizeInBytes != -Stride)
828  return false;
829 
830  // Verify that the memset value is loop invariant. If not, we can't promote
831  // the memset.
832  Value *SplatValue = MSI->getValue();
833  if (!SplatValue || !CurLoop->isLoopInvariant(SplatValue))
834  return false;
835 
837  MSIs.insert(MSI);
838  bool NegStride = SizeInBytes == -Stride;
839  return processLoopStridedStore(
840  Pointer, (unsigned)SizeInBytes, MaybeAlign(MSI->getDestAlignment()),
841  SplatValue, MSI, MSIs, Ev, BECount, NegStride, /*IsLoopMemset=*/true);
842 }
843 
844 /// mayLoopAccessLocation - Return true if the specified loop might access the
845 /// specified pointer location, which is a loop-strided access. The 'Access'
846 /// argument specifies what the verboten forms of access are (read or write).
847 static bool
849  const SCEV *BECount, unsigned StoreSize,
850  AliasAnalysis &AA,
851  SmallPtrSetImpl<Instruction *> &IgnoredStores) {
852  // Get the location that may be stored across the loop. Since the access is
853  // strided positively through memory, we say that the modified location starts
854  // at the pointer and has infinite size.
856 
857  // If the loop iterates a fixed number of times, we can refine the access size
858  // to be exactly the size of the memset, which is (BECount+1)*StoreSize
859  if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
860  AccessSize = LocationSize::precise((BECst->getValue()->getZExtValue() + 1) *
861  StoreSize);
862 
863  // TODO: For this to be really effective, we have to dive into the pointer
864  // operand in the store. Store to &A[i] of 100 will always return may alias
865  // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
866  // which will then no-alias a store to &A[100].
867  MemoryLocation StoreLoc(Ptr, AccessSize);
868 
869  for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
870  ++BI)
871  for (Instruction &I : **BI)
872  if (IgnoredStores.count(&I) == 0 &&
874  intersectModRef(AA.getModRefInfo(&I, StoreLoc), Access)))
875  return true;
876 
877  return false;
878 }
879 
880 // If we have a negative stride, Start refers to the end of the memory location
881 // we're trying to memset. Therefore, we need to recompute the base pointer,
882 // which is just Start - BECount*Size.
883 static const SCEV *getStartForNegStride(const SCEV *Start, const SCEV *BECount,
884  Type *IntPtr, unsigned StoreSize,
885  ScalarEvolution *SE) {
886  const SCEV *Index = SE->getTruncateOrZeroExtend(BECount, IntPtr);
887  if (StoreSize != 1)
888  Index = SE->getMulExpr(Index, SE->getConstant(IntPtr, StoreSize),
889  SCEV::FlagNUW);
890  return SE->getMinusSCEV(Start, Index);
891 }
892 
893 /// Compute the number of bytes as a SCEV from the backedge taken count.
894 ///
895 /// This also maps the SCEV into the provided type and tries to handle the
896 /// computation in a way that will fold cleanly.
897 static const SCEV *getNumBytes(const SCEV *BECount, Type *IntPtr,
898  unsigned StoreSize, Loop *CurLoop,
899  const DataLayout *DL, ScalarEvolution *SE) {
900  const SCEV *NumBytesS;
901  // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
902  // pointer size if it isn't already.
903  //
904  // If we're going to need to zero extend the BE count, check if we can add
905  // one to it prior to zero extending without overflow. Provided this is safe,
906  // it allows better simplification of the +1.
907  if (DL->getTypeSizeInBits(BECount->getType()).getFixedSize() <
908  DL->getTypeSizeInBits(IntPtr).getFixedSize() &&
910  CurLoop, ICmpInst::ICMP_NE, BECount,
911  SE->getNegativeSCEV(SE->getOne(BECount->getType())))) {
912  NumBytesS = SE->getZeroExtendExpr(
913  SE->getAddExpr(BECount, SE->getOne(BECount->getType()), SCEV::FlagNUW),
914  IntPtr);
915  } else {
916  NumBytesS = SE->getAddExpr(SE->getTruncateOrZeroExtend(BECount, IntPtr),
917  SE->getOne(IntPtr), SCEV::FlagNUW);
918  }
919 
920  // And scale it based on the store size.
921  if (StoreSize != 1) {
922  NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
923  SCEV::FlagNUW);
924  }
925  return NumBytesS;
926 }
927 
928 /// processLoopStridedStore - We see a strided store of some value. If we can
929 /// transform this into a memset or memset_pattern in the loop preheader, do so.
930 bool LoopIdiomRecognize::processLoopStridedStore(
931  Value *DestPtr, unsigned StoreSize, MaybeAlign StoreAlignment,
932  Value *StoredVal, Instruction *TheStore,
934  const SCEV *BECount, bool NegStride, bool IsLoopMemset) {
935  Value *SplatValue = isBytewiseValue(StoredVal, *DL);
936  Constant *PatternValue = nullptr;
937 
938  if (!SplatValue)
939  PatternValue = getMemSetPatternValue(StoredVal, DL);
940 
941  assert((SplatValue || PatternValue) &&
942  "Expected either splat value or pattern value.");
943 
944  // The trip count of the loop and the base pointer of the addrec SCEV is
945  // guaranteed to be loop invariant, which means that it should dominate the
946  // header. This allows us to insert code for it in the preheader.
947  unsigned DestAS = DestPtr->getType()->getPointerAddressSpace();
948  BasicBlock *Preheader = CurLoop->getLoopPreheader();
949  IRBuilder<> Builder(Preheader->getTerminator());
950  SCEVExpander Expander(*SE, *DL, "loop-idiom");
951  SCEVExpanderCleaner ExpCleaner(Expander, *DT);
952 
953  Type *DestInt8PtrTy = Builder.getInt8PtrTy(DestAS);
954  Type *IntIdxTy = DL->getIndexType(DestPtr->getType());
955 
956  bool Changed = false;
957  const SCEV *Start = Ev->getStart();
958  // Handle negative strided loops.
959  if (NegStride)
960  Start = getStartForNegStride(Start, BECount, IntIdxTy, StoreSize, SE);
961 
962  // TODO: ideally we should still be able to generate memset if SCEV expander
963  // is taught to generate the dependencies at the latest point.
964  if (!isSafeToExpand(Start, *SE))
965  return Changed;
966 
967  // Okay, we have a strided store "p[i]" of a splattable value. We can turn
968  // this into a memset in the loop preheader now if we want. However, this
969  // would be unsafe to do if there is anything else in the loop that may read
970  // or write to the aliased location. Check for any overlap by generating the
971  // base pointer and checking the region.
972  Value *BasePtr =
973  Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->getTerminator());
974 
975  // From here on out, conservatively report to the pass manager that we've
976  // changed the IR, even if we later clean up these added instructions. There
977  // may be structural differences e.g. in the order of use lists not accounted
978  // for in just a textual dump of the IR. This is written as a variable, even
979  // though statically all the places this dominates could be replaced with
980  // 'true', with the hope that anyone trying to be clever / "more precise" with
981  // the return value will read this comment, and leave them alone.
982  Changed = true;
983 
984  if (mayLoopAccessLocation(BasePtr, ModRefInfo::ModRef, CurLoop, BECount,
985  StoreSize, *AA, Stores))
986  return Changed;
987 
988  if (avoidLIRForMultiBlockLoop(/*IsMemset=*/true, IsLoopMemset))
989  return Changed;
990 
991  // Okay, everything looks good, insert the memset.
992 
993  const SCEV *NumBytesS =
994  getNumBytes(BECount, IntIdxTy, StoreSize, CurLoop, DL, SE);
995 
996  // TODO: ideally we should still be able to generate memset if SCEV expander
997  // is taught to generate the dependencies at the latest point.
998  if (!isSafeToExpand(NumBytesS, *SE))
999  return Changed;
1000 
1001  Value *NumBytes =
1002  Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
1003 
1004  CallInst *NewCall;
1005  if (SplatValue) {
1006  NewCall = Builder.CreateMemSet(BasePtr, SplatValue, NumBytes,
1007  MaybeAlign(StoreAlignment));
1008  } else {
1009  // Everything is emitted in default address space
1010  Type *Int8PtrTy = DestInt8PtrTy;
1011 
1012  Module *M = TheStore->getModule();
1013  StringRef FuncName = "memset_pattern16";
1014  FunctionCallee MSP = M->getOrInsertFunction(FuncName, Builder.getVoidTy(),
1015  Int8PtrTy, Int8PtrTy, IntIdxTy);
1016  inferLibFuncAttributes(M, FuncName, *TLI);
1017 
1018  // Otherwise we should form a memset_pattern16. PatternValue is known to be
1019  // an constant array of 16-bytes. Plop the value into a mergable global.
1020  GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true,
1022  PatternValue, ".memset_pattern");
1023  GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); // Ok to merge these.
1024  GV->setAlignment(Align(16));
1025  Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy);
1026  NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes});
1027  }
1028  NewCall->setDebugLoc(TheStore->getDebugLoc());
1029 
1030  if (MSSAU) {
1031  MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
1032  NewCall, nullptr, NewCall->getParent(), MemorySSA::BeforeTerminator);
1033  MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
1034  }
1035 
1036  LLVM_DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n"
1037  << " from store to: " << *Ev << " at: " << *TheStore
1038  << "\n");
1039 
1040  ORE.emit([&]() {
1041  return OptimizationRemark(DEBUG_TYPE, "ProcessLoopStridedStore",
1042  NewCall->getDebugLoc(), Preheader)
1043  << "Transformed loop-strided store into a call to "
1044  << ore::NV("NewFunction", NewCall->getCalledFunction())
1045  << "() function";
1046  });
1047 
1048  // Okay, the memset has been formed. Zap the original store and anything that
1049  // feeds into it.
1050  for (auto *I : Stores) {
1051  if (MSSAU)
1052  MSSAU->removeMemoryAccess(I, true);
1054  }
1055  if (MSSAU && VerifyMemorySSA)
1056  MSSAU->getMemorySSA()->verifyMemorySSA();
1057  ++NumMemSet;
1058  ExpCleaner.markResultUsed();
1059  return true;
1060 }
1061 
1062 /// If the stored value is a strided load in the same loop with the same stride
1063 /// this may be transformable into a memcpy. This kicks in for stuff like
1064 /// for (i) A[i] = B[i];
1065 bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
1066  const SCEV *BECount) {
1067  assert(SI->isUnordered() && "Expected only non-volatile non-ordered stores.");
1068 
1069  Value *StorePtr = SI->getPointerOperand();
1070  const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
1071  APInt Stride = getStoreStride(StoreEv);
1072  unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
1073  bool NegStride = StoreSize == -Stride;
1074 
1075  // The store must be feeding a non-volatile load.
1076  LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
1077  assert(LI->isUnordered() && "Expected only non-volatile non-ordered loads.");
1078 
1079  // See if the pointer expression is an AddRec like {base,+,1} on the current
1080  // loop, which indicates a strided load. If we have something else, it's a
1081  // random load we can't handle.
1082  const SCEVAddRecExpr *LoadEv =
1083  cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
1084 
1085  // The trip count of the loop and the base pointer of the addrec SCEV is
1086  // guaranteed to be loop invariant, which means that it should dominate the
1087  // header. This allows us to insert code for it in the preheader.
1088  BasicBlock *Preheader = CurLoop->getLoopPreheader();
1089  IRBuilder<> Builder(Preheader->getTerminator());
1090  SCEVExpander Expander(*SE, *DL, "loop-idiom");
1091 
1092  SCEVExpanderCleaner ExpCleaner(Expander, *DT);
1093 
1094  bool Changed = false;
1095  const SCEV *StrStart = StoreEv->getStart();
1096  unsigned StrAS = SI->getPointerAddressSpace();
1097  Type *IntIdxTy = Builder.getIntNTy(DL->getIndexSizeInBits(StrAS));
1098 
1099  // Handle negative strided loops.
1100  if (NegStride)
1101  StrStart = getStartForNegStride(StrStart, BECount, IntIdxTy, StoreSize, SE);
1102 
1103  // Okay, we have a strided store "p[i]" of a loaded value. We can turn
1104  // this into a memcpy in the loop preheader now if we want. However, this
1105  // would be unsafe to do if there is anything else in the loop that may read
1106  // or write the memory region we're storing to. This includes the load that
1107  // feeds the stores. Check for an alias by generating the base address and
1108  // checking everything.
1109  Value *StoreBasePtr = Expander.expandCodeFor(
1110  StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator());
1111 
1112  // From here on out, conservatively report to the pass manager that we've
1113  // changed the IR, even if we later clean up these added instructions. There
1114  // may be structural differences e.g. in the order of use lists not accounted
1115  // for in just a textual dump of the IR. This is written as a variable, even
1116  // though statically all the places this dominates could be replaced with
1117  // 'true', with the hope that anyone trying to be clever / "more precise" with
1118  // the return value will read this comment, and leave them alone.
1119  Changed = true;
1120 
1122  Stores.insert(SI);
1123  if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
1124  StoreSize, *AA, Stores))
1125  return Changed;
1126 
1127  const SCEV *LdStart = LoadEv->getStart();
1128  unsigned LdAS = LI->getPointerAddressSpace();
1129 
1130  // Handle negative strided loops.
1131  if (NegStride)
1132  LdStart = getStartForNegStride(LdStart, BECount, IntIdxTy, StoreSize, SE);
1133 
1134  // For a memcpy, we have to make sure that the input array is not being
1135  // mutated by the loop.
1136  Value *LoadBasePtr = Expander.expandCodeFor(
1137  LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator());
1138 
1139  if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
1140  StoreSize, *AA, Stores))
1141  return Changed;
1142 
1143  if (avoidLIRForMultiBlockLoop())
1144  return Changed;
1145 
1146  // Okay, everything is safe, we can transform this!
1147 
1148  const SCEV *NumBytesS =
1149  getNumBytes(BECount, IntIdxTy, StoreSize, CurLoop, DL, SE);
1150 
1151  Value *NumBytes =
1152  Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
1153 
1154  CallInst *NewCall = nullptr;
1155  // Check whether to generate an unordered atomic memcpy:
1156  // If the load or store are atomic, then they must necessarily be unordered
1157  // by previous checks.
1158  if (!SI->isAtomic() && !LI->isAtomic())
1159  NewCall = Builder.CreateMemCpy(StoreBasePtr, SI->getAlign(), LoadBasePtr,
1160  LI->getAlign(), NumBytes);
1161  else {
1162  // We cannot allow unaligned ops for unordered load/store, so reject
1163  // anything where the alignment isn't at least the element size.
1164  const Align StoreAlign = SI->getAlign();
1165  const Align LoadAlign = LI->getAlign();
1166  if (StoreAlign < StoreSize || LoadAlign < StoreSize)
1167  return Changed;
1168 
1169  // If the element.atomic memcpy is not lowered into explicit
1170  // loads/stores later, then it will be lowered into an element-size
1171  // specific lib call. If the lib call doesn't exist for our store size, then
1172  // we shouldn't generate the memcpy.
1173  if (StoreSize > TTI->getAtomicMemIntrinsicMaxElementSize())
1174  return Changed;
1175 
1176  // Create the call.
1177  // Note that unordered atomic loads/stores are *required* by the spec to
1178  // have an alignment but non-atomic loads/stores may not.
1179  NewCall = Builder.CreateElementUnorderedAtomicMemCpy(
1180  StoreBasePtr, StoreAlign, LoadBasePtr, LoadAlign, NumBytes,
1181  StoreSize);
1182  }
1183  NewCall->setDebugLoc(SI->getDebugLoc());
1184 
1185  if (MSSAU) {
1186  MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
1187  NewCall, nullptr, NewCall->getParent(), MemorySSA::BeforeTerminator);
1188  MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
1189  }
1190 
1191  LLVM_DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n"
1192  << " from load ptr=" << *LoadEv << " at: " << *LI << "\n"
1193  << " from store ptr=" << *StoreEv << " at: " << *SI
1194  << "\n");
1195 
1196  ORE.emit([&]() {
1197  return OptimizationRemark(DEBUG_TYPE, "ProcessLoopStoreOfLoopLoad",
1198  NewCall->getDebugLoc(), Preheader)
1199  << "Formed a call to "
1200  << ore::NV("NewFunction", NewCall->getCalledFunction())
1201  << "() function";
1202  });
1203 
1204  // Okay, the memcpy has been formed. Zap the original store and anything that
1205  // feeds into it.
1206  if (MSSAU)
1207  MSSAU->removeMemoryAccess(SI, true);
1209  if (MSSAU && VerifyMemorySSA)
1210  MSSAU->getMemorySSA()->verifyMemorySSA();
1211  ++NumMemCpy;
1212  ExpCleaner.markResultUsed();
1213  return true;
1214 }
1215 
1216 // When compiling for codesize we avoid idiom recognition for a multi-block loop
1217 // unless it is a loop_memset idiom or a memset/memcpy idiom in a nested loop.
1218 //
1219 bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset,
1220  bool IsLoopMemset) {
1221  if (ApplyCodeSizeHeuristics && CurLoop->getNumBlocks() > 1) {
1222  if (CurLoop->isOutermost() && (!IsMemset || !IsLoopMemset)) {
1223  LLVM_DEBUG(dbgs() << " " << CurLoop->getHeader()->getParent()->getName()
1224  << " : LIR " << (IsMemset ? "Memset" : "Memcpy")
1225  << " avoided: multi-block top-level loop\n");
1226  return true;
1227  }
1228  }
1229 
1230  return false;
1231 }
1232 
1233 bool LoopIdiomRecognize::runOnNoncountableLoop() {
1234  LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F["
1235  << CurLoop->getHeader()->getParent()->getName()
1236  << "] Noncountable Loop %"
1237  << CurLoop->getHeader()->getName() << "\n");
1238 
1239  return recognizePopcount() || recognizeAndInsertFFS() ||
1240  recognizeShiftUntilBitTest();
1241 }
1242 
1243 /// Check if the given conditional branch is based on the comparison between
1244 /// a variable and zero, and if the variable is non-zero or zero (JmpOnZero is
1245 /// true), the control yields to the loop entry. If the branch matches the
1246 /// behavior, the variable involved in the comparison is returned. This function
1247 /// will be called to see if the precondition and postcondition of the loop are
1248 /// in desirable form.
1249 static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
1250  bool JmpOnZero = false) {
1251  if (!BI || !BI->isConditional())
1252  return nullptr;
1253 
1254  ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
1255  if (!Cond)
1256  return nullptr;
1257 
1258  ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
1259  if (!CmpZero || !CmpZero->isZero())
1260  return nullptr;
1261 
1262  BasicBlock *TrueSucc = BI->getSuccessor(0);
1263  BasicBlock *FalseSucc = BI->getSuccessor(1);
1264  if (JmpOnZero)
1265  std::swap(TrueSucc, FalseSucc);
1266 
1267  ICmpInst::Predicate Pred = Cond->getPredicate();
1268  if ((Pred == ICmpInst::ICMP_NE && TrueSucc == LoopEntry) ||
1269  (Pred == ICmpInst::ICMP_EQ && FalseSucc == LoopEntry))
1270  return Cond->getOperand(0);
1271 
1272  return nullptr;
1273 }
1274 
1275 // Check if the recurrence variable `VarX` is in the right form to create
1276 // the idiom. Returns the value coerced to a PHINode if so.
1278  BasicBlock *LoopEntry) {
1279  auto *PhiX = dyn_cast<PHINode>(VarX);
1280  if (PhiX && PhiX->getParent() == LoopEntry &&
1281  (PhiX->getOperand(0) == DefX || PhiX->getOperand(1) == DefX))
1282  return PhiX;
1283  return nullptr;
1284 }
1285 
1286 /// Return true iff the idiom is detected in the loop.
1287 ///
1288 /// Additionally:
1289 /// 1) \p CntInst is set to the instruction counting the population bit.
1290 /// 2) \p CntPhi is set to the corresponding phi node.
1291 /// 3) \p Var is set to the value whose population bits are being counted.
1292 ///
1293 /// The core idiom we are trying to detect is:
1294 /// \code
1295 /// if (x0 != 0)
1296 /// goto loop-exit // the precondition of the loop
1297 /// cnt0 = init-val;
1298 /// do {
1299 /// x1 = phi (x0, x2);
1300 /// cnt1 = phi(cnt0, cnt2);
1301 ///
1302 /// cnt2 = cnt1 + 1;
1303 /// ...
1304 /// x2 = x1 & (x1 - 1);
1305 /// ...
1306 /// } while(x != 0);
1307 ///
1308 /// loop-exit:
1309 /// \endcode
1310 static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB,
1311  Instruction *&CntInst, PHINode *&CntPhi,
1312  Value *&Var) {
1313  // step 1: Check to see if the look-back branch match this pattern:
1314  // "if (a!=0) goto loop-entry".
1315  BasicBlock *LoopEntry;
1316  Instruction *DefX2, *CountInst;
1317  Value *VarX1, *VarX0;
1318  PHINode *PhiX, *CountPhi;
1319 
1320  DefX2 = CountInst = nullptr;
1321  VarX1 = VarX0 = nullptr;
1322  PhiX = CountPhi = nullptr;
1323  LoopEntry = *(CurLoop->block_begin());
1324 
1325  // step 1: Check if the loop-back branch is in desirable form.
1326  {
1327  if (Value *T = matchCondition(
1328  dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry))
1329  DefX2 = dyn_cast<Instruction>(T);
1330  else
1331  return false;
1332  }
1333 
1334  // step 2: detect instructions corresponding to "x2 = x1 & (x1 - 1)"
1335  {
1336  if (!DefX2 || DefX2->getOpcode() != Instruction::And)
1337  return false;
1338 
1339  BinaryOperator *SubOneOp;
1340 
1341  if ((SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(0))))
1342  VarX1 = DefX2->getOperand(1);
1343  else {
1344  VarX1 = DefX2->getOperand(0);
1345  SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(1));
1346  }
1347  if (!SubOneOp || SubOneOp->getOperand(0) != VarX1)
1348  return false;
1349 
1350  ConstantInt *Dec = dyn_cast<ConstantInt>(SubOneOp->getOperand(1));
1351  if (!Dec ||
1352  !((SubOneOp->getOpcode() == Instruction::Sub && Dec->isOne()) ||
1353  (SubOneOp->getOpcode() == Instruction::Add &&
1354  Dec->isMinusOne()))) {
1355  return false;
1356  }
1357  }
1358 
1359  // step 3: Check the recurrence of variable X
1360  PhiX = getRecurrenceVar(VarX1, DefX2, LoopEntry);
1361  if (!PhiX)
1362  return false;
1363 
1364  // step 4: Find the instruction which count the population: cnt2 = cnt1 + 1
1365  {
1366  CountInst = nullptr;
1367  for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI()->getIterator(),
1368  IterE = LoopEntry->end();
1369  Iter != IterE; Iter++) {
1370  Instruction *Inst = &*Iter;
1371  if (Inst->getOpcode() != Instruction::Add)
1372  continue;
1373 
1374  ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
1375  if (!Inc || !Inc->isOne())
1376  continue;
1377 
1378  PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry);
1379  if (!Phi)
1380  continue;
1381 
1382  // Check if the result of the instruction is live of the loop.
1383  bool LiveOutLoop = false;
1384  for (User *U : Inst->users()) {
1385  if ((cast<Instruction>(U))->getParent() != LoopEntry) {
1386  LiveOutLoop = true;
1387  break;
1388  }
1389  }
1390 
1391  if (LiveOutLoop) {
1392  CountInst = Inst;
1393  CountPhi = Phi;
1394  break;
1395  }
1396  }
1397 
1398  if (!CountInst)
1399  return false;
1400  }
1401 
1402  // step 5: check if the precondition is in this form:
1403  // "if (x != 0) goto loop-head ; else goto somewhere-we-don't-care;"
1404  {
1405  auto *PreCondBr = dyn_cast<BranchInst>(PreCondBB->getTerminator());
1406  Value *T = matchCondition(PreCondBr, CurLoop->getLoopPreheader());
1407  if (T != PhiX->getOperand(0) && T != PhiX->getOperand(1))
1408  return false;
1409 
1410  CntInst = CountInst;
1411  CntPhi = CountPhi;
1412  Var = T;
1413  }
1414 
1415  return true;
1416 }
1417 
1418 /// Return true if the idiom is detected in the loop.
1419 ///
1420 /// Additionally:
1421 /// 1) \p CntInst is set to the instruction Counting Leading Zeros (CTLZ)
1422 /// or nullptr if there is no such.
1423 /// 2) \p CntPhi is set to the corresponding phi node
1424 /// or nullptr if there is no such.
1425 /// 3) \p Var is set to the value whose CTLZ could be used.
1426 /// 4) \p DefX is set to the instruction calculating Loop exit condition.
1427 ///
1428 /// The core idiom we are trying to detect is:
1429 /// \code
1430 /// if (x0 == 0)
1431 /// goto loop-exit // the precondition of the loop
1432 /// cnt0 = init-val;
1433 /// do {
1434 /// x = phi (x0, x.next); //PhiX
1435 /// cnt = phi(cnt0, cnt.next);
1436 ///
1437 /// cnt.next = cnt + 1;
1438 /// ...
1439 /// x.next = x >> 1; // DefX
1440 /// ...
1441 /// } while(x.next != 0);
1442 ///
1443 /// loop-exit:
1444 /// \endcode
1445 static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
1446  Intrinsic::ID &IntrinID, Value *&InitX,
1447  Instruction *&CntInst, PHINode *&CntPhi,
1448  Instruction *&DefX) {
1449  BasicBlock *LoopEntry;
1450  Value *VarX = nullptr;
1451 
1452  DefX = nullptr;
1453  CntInst = nullptr;
1454  CntPhi = nullptr;
1455  LoopEntry = *(CurLoop->block_begin());
1456 
1457  // step 1: Check if the loop-back branch is in desirable form.
1458  if (Value *T = matchCondition(
1459  dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry))
1460  DefX = dyn_cast<Instruction>(T);
1461  else
1462  return false;
1463 
1464  // step 2: detect instructions corresponding to "x.next = x >> 1 or x << 1"
1465  if (!DefX || !DefX->isShift())
1466  return false;
1467  IntrinID = DefX->getOpcode() == Instruction::Shl ? Intrinsic::cttz :
1468  Intrinsic::ctlz;
1469  ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1));
1470  if (!Shft || !Shft->isOne())
1471  return false;
1472  VarX = DefX->getOperand(0);
1473 
1474  // step 3: Check the recurrence of variable X
1475  PHINode *PhiX = getRecurrenceVar(VarX, DefX, LoopEntry);
1476  if (!PhiX)
1477  return false;
1478 
1479  InitX = PhiX->getIncomingValueForBlock(CurLoop->getLoopPreheader());
1480 
1481  // Make sure the initial value can't be negative otherwise the ashr in the
1482  // loop might never reach zero which would make the loop infinite.
1483  if (DefX->getOpcode() == Instruction::AShr && !isKnownNonNegative(InitX, DL))
1484  return false;
1485 
1486  // step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
1487  // or cnt.next = cnt + -1.
1488  // TODO: We can skip the step. If loop trip count is known (CTLZ),
1489  // then all uses of "cnt.next" could be optimized to the trip count
1490  // plus "cnt0". Currently it is not optimized.
1491  // This step could be used to detect POPCNT instruction:
1492  // cnt.next = cnt + (x.next & 1)
1493  for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI()->getIterator(),
1494  IterE = LoopEntry->end();
1495  Iter != IterE; Iter++) {
1496  Instruction *Inst = &*Iter;
1497  if (Inst->getOpcode() != Instruction::Add)
1498  continue;
1499 
1500  ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
1501  if (!Inc || (!Inc->isOne() && !Inc->isMinusOne()))
1502  continue;
1503 
1504  PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry);
1505  if (!Phi)
1506  continue;
1507 
1508  CntInst = Inst;
1509  CntPhi = Phi;
1510  break;
1511  }
1512  if (!CntInst)
1513  return false;
1514 
1515  return true;
1516 }
1517 
1518 /// Recognize CTLZ or CTTZ idiom in a non-countable loop and convert the loop
1519 /// to countable (with CTLZ / CTTZ trip count). If CTLZ / CTTZ inserted as a new
1520 /// trip count returns true; otherwise, returns false.
1521 bool LoopIdiomRecognize::recognizeAndInsertFFS() {
1522  // Give up if the loop has multiple blocks or multiple backedges.
1523  if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
1524  return false;
1525 
1526  Intrinsic::ID IntrinID;
1527  Value *InitX;
1528  Instruction *DefX = nullptr;
1529  PHINode *CntPhi = nullptr;
1530  Instruction *CntInst = nullptr;
1531  // Help decide if transformation is profitable. For ShiftUntilZero idiom,
1532  // this is always 6.
1533  size_t IdiomCanonicalSize = 6;
1534 
1535  if (!detectShiftUntilZeroIdiom(CurLoop, *DL, IntrinID, InitX,
1536  CntInst, CntPhi, DefX))
1537  return false;
1538 
1539  bool IsCntPhiUsedOutsideLoop = false;
1540  for (User *U : CntPhi->users())
1541  if (!CurLoop->contains(cast<Instruction>(U))) {
1542  IsCntPhiUsedOutsideLoop = true;
1543  break;
1544  }
1545  bool IsCntInstUsedOutsideLoop = false;
1546  for (User *U : CntInst->users())
1547  if (!CurLoop->contains(cast<Instruction>(U))) {
1548  IsCntInstUsedOutsideLoop = true;
1549  break;
1550  }
1551  // If both CntInst and CntPhi are used outside the loop the profitability
1552  // is questionable.
1553  if (IsCntInstUsedOutsideLoop && IsCntPhiUsedOutsideLoop)
1554  return false;
1555 
1556  // For some CPUs result of CTLZ(X) intrinsic is undefined
1557  // when X is 0. If we can not guarantee X != 0, we need to check this
1558  // when expand.
1559  bool ZeroCheck = false;
1560  // It is safe to assume Preheader exist as it was checked in
1561  // parent function RunOnLoop.
1562  BasicBlock *PH = CurLoop->getLoopPreheader();
1563 
1564  // If we are using the count instruction outside the loop, make sure we
1565  // have a zero check as a precondition. Without the check the loop would run
1566  // one iteration for before any check of the input value. This means 0 and 1
1567  // would have identical behavior in the original loop and thus
1568  if (!IsCntPhiUsedOutsideLoop) {
1569  auto *PreCondBB = PH->getSinglePredecessor();
1570  if (!PreCondBB)
1571  return false;
1572  auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
1573  if (!PreCondBI)
1574  return false;
1575  if (matchCondition(PreCondBI, PH) != InitX)
1576  return false;
1577  ZeroCheck = true;
1578  }
1579 
1580  // Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
1581  // profitable if we delete the loop.
1582 
1583  // the loop has only 6 instructions:
1584  // %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
1585  // %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
1586  // %shr = ashr %n.addr.0, 1
1587  // %tobool = icmp eq %shr, 0
1588  // %inc = add nsw %i.0, 1
1589  // br i1 %tobool
1590 
1591  const Value *Args[] = {InitX,
1592  ConstantInt::getBool(InitX->getContext(), ZeroCheck)};
1593 
1594  // @llvm.dbg doesn't count as they have no semantic effect.
1595  auto InstWithoutDebugIt = CurLoop->getHeader()->instructionsWithoutDebug();
1597  std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end());
1598 
1599  IntrinsicCostAttributes Attrs(IntrinID, InitX->getType(), Args);
1600  InstructionCost Cost =
1602  if (HeaderSize != IdiomCanonicalSize &&
1604  return false;
1605 
1606  transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
1607  DefX->getDebugLoc(), ZeroCheck,
1608  IsCntPhiUsedOutsideLoop);
1609  return true;
1610 }
1611 
1612 /// Recognizes a population count idiom in a non-countable loop.
1613 ///
1614 /// If detected, transforms the relevant code to issue the popcount intrinsic
1615 /// function call, and returns true; otherwise, returns false.
1616 bool LoopIdiomRecognize::recognizePopcount() {
1618  return false;
1619 
1620  // Counting population are usually conducted by few arithmetic instructions.
1621  // Such instructions can be easily "absorbed" by vacant slots in a
1622  // non-compact loop. Therefore, recognizing popcount idiom only makes sense
1623  // in a compact loop.
1624 
1625  // Give up if the loop has multiple blocks or multiple backedges.
1626  if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
1627  return false;
1628 
1629  BasicBlock *LoopBody = *(CurLoop->block_begin());
1630  if (LoopBody->size() >= 20) {
1631  // The loop is too big, bail out.
1632  return false;
1633  }
1634 
1635  // It should have a preheader containing nothing but an unconditional branch.
1636  BasicBlock *PH = CurLoop->getLoopPreheader();
1637  if (!PH || &PH->front() != PH->getTerminator())
1638  return false;
1639  auto *EntryBI = dyn_cast<BranchInst>(PH->getTerminator());
1640  if (!EntryBI || EntryBI->isConditional())
1641  return false;
1642 
1643  // It should have a precondition block where the generated popcount intrinsic
1644  // function can be inserted.
1645  auto *PreCondBB = PH->getSinglePredecessor();
1646  if (!PreCondBB)
1647  return false;
1648  auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
1649  if (!PreCondBI || PreCondBI->isUnconditional())
1650  return false;
1651 
1652  Instruction *CntInst;
1653  PHINode *CntPhi;
1654  Value *Val;
1655  if (!detectPopcountIdiom(CurLoop, PreCondBB, CntInst, CntPhi, Val))
1656  return false;
1657 
1658  transformLoopToPopcount(PreCondBB, CntInst, CntPhi, Val);
1659  return true;
1660 }
1661 
1663  const DebugLoc &DL) {
1664  Value *Ops[] = {Val};
1665  Type *Tys[] = {Val->getType()};
1666 
1668  Function *Func = Intrinsic::getDeclaration(M, Intrinsic::ctpop, Tys);
1669  CallInst *CI = IRBuilder.CreateCall(Func, Ops);
1670  CI->setDebugLoc(DL);
1671 
1672  return CI;
1673 }
1674 
1676  const DebugLoc &DL, bool ZeroCheck,
1677  Intrinsic::ID IID) {
1678  Value *Ops[] = {Val, IRBuilder.getInt1(ZeroCheck)};
1679  Type *Tys[] = {Val->getType()};
1680 
1682  Function *Func = Intrinsic::getDeclaration(M, IID, Tys);
1683  CallInst *CI = IRBuilder.CreateCall(Func, Ops);
1684  CI->setDebugLoc(DL);
1685 
1686  return CI;
1687 }
1688 
1689 /// Transform the following loop (Using CTLZ, CTTZ is similar):
1690 /// loop:
1691 /// CntPhi = PHI [Cnt0, CntInst]
1692 /// PhiX = PHI [InitX, DefX]
1693 /// CntInst = CntPhi + 1
1694 /// DefX = PhiX >> 1
1695 /// LOOP_BODY
1696 /// Br: loop if (DefX != 0)
1697 /// Use(CntPhi) or Use(CntInst)
1698 ///
1699 /// Into:
1700 /// If CntPhi used outside the loop:
1701 /// CountPrev = BitWidth(InitX) - CTLZ(InitX >> 1)
1702 /// Count = CountPrev + 1
1703 /// else
1704 /// Count = BitWidth(InitX) - CTLZ(InitX)
1705 /// loop:
1706 /// CntPhi = PHI [Cnt0, CntInst]
1707 /// PhiX = PHI [InitX, DefX]
1708 /// PhiCount = PHI [Count, Dec]
1709 /// CntInst = CntPhi + 1
1710 /// DefX = PhiX >> 1
1711 /// Dec = PhiCount - 1
1712 /// LOOP_BODY
1713 /// Br: loop if (Dec != 0)
1714 /// Use(CountPrev + Cnt0) // Use(CntPhi)
1715 /// or
1716 /// Use(Count + Cnt0) // Use(CntInst)
1717 ///
1718 /// If LOOP_BODY is empty the loop will be deleted.
1719 /// If CntInst and DefX are not used in LOOP_BODY they will be removed.
1720 void LoopIdiomRecognize::transformLoopToCountable(
1721  Intrinsic::ID IntrinID, BasicBlock *Preheader, Instruction *CntInst,
1722  PHINode *CntPhi, Value *InitX, Instruction *DefX, const DebugLoc &DL,
1723  bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) {
1724  BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator());
1725 
1726  // Step 1: Insert the CTLZ/CTTZ instruction at the end of the preheader block
1727  IRBuilder<> Builder(PreheaderBr);
1728  Builder.SetCurrentDebugLocation(DL);
1729 
1730  // If there are no uses of CntPhi crate:
1731  // Count = BitWidth - CTLZ(InitX);
1732  // NewCount = Count;
1733  // If there are uses of CntPhi create:
1734  // NewCount = BitWidth - CTLZ(InitX >> 1);
1735  // Count = NewCount + 1;
1736  Value *InitXNext;
1737  if (IsCntPhiUsedOutsideLoop) {
1738  if (DefX->getOpcode() == Instruction::AShr)
1739  InitXNext = Builder.CreateAShr(InitX, 1);
1740  else if (DefX->getOpcode() == Instruction::LShr)
1741  InitXNext = Builder.CreateLShr(InitX, 1);
1742  else if (DefX->getOpcode() == Instruction::Shl) // cttz
1743  InitXNext = Builder.CreateShl(InitX, 1);
1744  else
1745  llvm_unreachable("Unexpected opcode!");
1746  } else
1747  InitXNext = InitX;
1748  Value *Count =
1749  createFFSIntrinsic(Builder, InitXNext, DL, ZeroCheck, IntrinID);
1750  Type *CountTy = Count->getType();
1751  Count = Builder.CreateSub(
1752  ConstantInt::get(CountTy, CountTy->getIntegerBitWidth()), Count);
1753  Value *NewCount = Count;
1754  if (IsCntPhiUsedOutsideLoop)
1755  Count = Builder.CreateAdd(Count, ConstantInt::get(CountTy, 1));
1756 
1757  NewCount = Builder.CreateZExtOrTrunc(NewCount, CntInst->getType());
1758 
1759  Value *CntInitVal = CntPhi->getIncomingValueForBlock(Preheader);
1760  if (cast<ConstantInt>(CntInst->getOperand(1))->isOne()) {
1761  // If the counter was being incremented in the loop, add NewCount to the
1762  // counter's initial value, but only if the initial value is not zero.
1763  ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
1764  if (!InitConst || !InitConst->isZero())
1765  NewCount = Builder.CreateAdd(NewCount, CntInitVal);
1766  } else {
1767  // If the count was being decremented in the loop, subtract NewCount from
1768  // the counter's initial value.
1769  NewCount = Builder.CreateSub(CntInitVal, NewCount);
1770  }
1771 
1772  // Step 2: Insert new IV and loop condition:
1773  // loop:
1774  // ...
1775  // PhiCount = PHI [Count, Dec]
1776  // ...
1777  // Dec = PhiCount - 1
1778  // ...
1779  // Br: loop if (Dec != 0)
1780  BasicBlock *Body = *(CurLoop->block_begin());
1781  auto *LbBr = cast<BranchInst>(Body->getTerminator());
1782  ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
1783 
1784  PHINode *TcPhi = PHINode::Create(CountTy, 2, "tcphi", &Body->front());
1785 
1786  Builder.SetInsertPoint(LbCond);
1787  Instruction *TcDec = cast<Instruction>(Builder.CreateSub(
1788  TcPhi, ConstantInt::get(CountTy, 1), "tcdec", false, true));
1789 
1790  TcPhi->addIncoming(Count, Preheader);
1791  TcPhi->addIncoming(TcDec, Body);
1792 
1793  CmpInst::Predicate Pred =
1794  (LbBr->getSuccessor(0) == Body) ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ;
1795  LbCond->setPredicate(Pred);
1796  LbCond->setOperand(0, TcDec);
1797  LbCond->setOperand(1, ConstantInt::get(CountTy, 0));
1798 
1799  // Step 3: All the references to the original counter outside
1800  // the loop are replaced with the NewCount
1801  if (IsCntPhiUsedOutsideLoop)
1802  CntPhi->replaceUsesOutsideBlock(NewCount, Body);
1803  else
1804  CntInst->replaceUsesOutsideBlock(NewCount, Body);
1805 
1806  // step 4: Forget the "non-computable" trip-count SCEV associated with the
1807  // loop. The loop would otherwise not be deleted even if it becomes empty.
1808  SE->forgetLoop(CurLoop);
1809 }
1810 
1811 void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB,
1812  Instruction *CntInst,
1813  PHINode *CntPhi, Value *Var) {
1814  BasicBlock *PreHead = CurLoop->getLoopPreheader();
1815  auto *PreCondBr = cast<BranchInst>(PreCondBB->getTerminator());
1816  const DebugLoc &DL = CntInst->getDebugLoc();
1817 
1818  // Assuming before transformation, the loop is following:
1819  // if (x) // the precondition
1820  // do { cnt++; x &= x - 1; } while(x);
1821 
1822  // Step 1: Insert the ctpop instruction at the end of the precondition block
1823  IRBuilder<> Builder(PreCondBr);
1824  Value *PopCnt, *PopCntZext, *NewCount, *TripCnt;
1825  {
1826  PopCnt = createPopcntIntrinsic(Builder, Var, DL);
1827  NewCount = PopCntZext =
1828  Builder.CreateZExtOrTrunc(PopCnt, cast<IntegerType>(CntPhi->getType()));
1829 
1830  if (NewCount != PopCnt)
1831  (cast<Instruction>(NewCount))->setDebugLoc(DL);
1832 
1833  // TripCnt is exactly the number of iterations the loop has
1834  TripCnt = NewCount;
1835 
1836  // If the population counter's initial value is not zero, insert Add Inst.
1837  Value *CntInitVal = CntPhi->getIncomingValueForBlock(PreHead);
1838  ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
1839  if (!InitConst || !InitConst->isZero()) {
1840  NewCount = Builder.CreateAdd(NewCount, CntInitVal);
1841  (cast<Instruction>(NewCount))->setDebugLoc(DL);
1842  }
1843  }
1844 
1845  // Step 2: Replace the precondition from "if (x == 0) goto loop-exit" to
1846  // "if (NewCount == 0) loop-exit". Without this change, the intrinsic
1847  // function would be partial dead code, and downstream passes will drag
1848  // it back from the precondition block to the preheader.
1849  {
1850  ICmpInst *PreCond = cast<ICmpInst>(PreCondBr->getCondition());
1851 
1852  Value *Opnd0 = PopCntZext;
1853  Value *Opnd1 = ConstantInt::get(PopCntZext->getType(), 0);
1854  if (PreCond->getOperand(0) != Var)
1855  std::swap(Opnd0, Opnd1);
1856 
1857  ICmpInst *NewPreCond = cast<ICmpInst>(
1858  Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1));
1859  PreCondBr->setCondition(NewPreCond);
1860 
1862  }
1863 
1864  // Step 3: Note that the population count is exactly the trip count of the
1865  // loop in question, which enable us to convert the loop from noncountable
1866  // loop into a countable one. The benefit is twofold:
1867  //
1868  // - If the loop only counts population, the entire loop becomes dead after
1869  // the transformation. It is a lot easier to prove a countable loop dead
1870  // than to prove a noncountable one. (In some C dialects, an infinite loop
1871  // isn't dead even if it computes nothing useful. In general, DCE needs
1872  // to prove a noncountable loop finite before safely delete it.)
1873  //
1874  // - If the loop also performs something else, it remains alive.
1875  // Since it is transformed to countable form, it can be aggressively
1876  // optimized by some optimizations which are in general not applicable
1877  // to a noncountable loop.
1878  //
1879  // After this step, this loop (conceptually) would look like following:
1880  // newcnt = __builtin_ctpop(x);
1881  // t = newcnt;
1882  // if (x)
1883  // do { cnt++; x &= x-1; t--) } while (t > 0);
1884  BasicBlock *Body = *(CurLoop->block_begin());
1885  {
1886  auto *LbBr = cast<BranchInst>(Body->getTerminator());
1887  ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
1888  Type *Ty = TripCnt->getType();
1889 
1890  PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", &Body->front());
1891 
1892  Builder.SetInsertPoint(LbCond);
1893  Instruction *TcDec = cast<Instruction>(
1894  Builder.CreateSub(TcPhi, ConstantInt::get(Ty, 1),
1895  "tcdec", false, true));
1896 
1897  TcPhi->addIncoming(TripCnt, PreHead);
1898  TcPhi->addIncoming(TcDec, Body);
1899 
1900  CmpInst::Predicate Pred =
1901  (LbBr->getSuccessor(0) == Body) ? CmpInst::ICMP_UGT : CmpInst::ICMP_SLE;
1902  LbCond->setPredicate(Pred);
1903  LbCond->setOperand(0, TcDec);
1904  LbCond->setOperand(1, ConstantInt::get(Ty, 0));
1905  }
1906 
1907  // Step 4: All the references to the original population counter outside
1908  // the loop are replaced with the NewCount -- the value returned from
1909  // __builtin_ctpop().
1910  CntInst->replaceUsesOutsideBlock(NewCount, Body);
1911 
1912  // step 5: Forget the "non-computable" trip-count SCEV associated with the
1913  // loop. The loop would otherwise not be deleted even if it becomes empty.
1914  SE->forgetLoop(CurLoop);
1915 }
1916 
1917 /// Match loop-invariant value.
1918 template <typename SubPattern_t> struct match_LoopInvariant {
1919  SubPattern_t SubPattern;
1920  const Loop *L;
1921 
1922  match_LoopInvariant(const SubPattern_t &SP, const Loop *L)
1923  : SubPattern(SP), L(L) {}
1924 
1925  template <typename ITy> bool match(ITy *V) {
1926  return L->isLoopInvariant(V) && SubPattern.match(V);
1927  }
1928 };
1929 
1930 /// Matches if the value is loop-invariant.
1931 template <typename Ty>
1932 inline match_LoopInvariant<Ty> m_LoopInvariant(const Ty &M, const Loop *L) {
1933  return match_LoopInvariant<Ty>(M, L);
1934 }
1935 
1936 /// Return true if the idiom is detected in the loop.
1937 ///
1938 /// The core idiom we are trying to detect is:
1939 /// \code
1940 /// entry:
1941 /// <...>
1942 /// %bitmask = shl i32 1, %bitpos
1943 /// br label %loop
1944 ///
1945 /// loop:
1946 /// %x.curr = phi i32 [ %x, %entry ], [ %x.next, %loop ]
1947 /// %x.curr.bitmasked = and i32 %x.curr, %bitmask
1948 /// %x.curr.isbitunset = icmp eq i32 %x.curr.bitmasked, 0
1949 /// %x.next = shl i32 %x.curr, 1
1950 /// <...>
1951 /// br i1 %x.curr.isbitunset, label %loop, label %end
1952 ///
1953 /// end:
1954 /// %x.curr.res = phi i32 [ %x.curr, %loop ] <...>
1955 /// %x.next.res = phi i32 [ %x.next, %loop ] <...>
1956 /// <...>
1957 /// \endcode
1958 static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX,
1959  Value *&BitMask, Value *&BitPos,
1960  Value *&CurrX, Instruction *&NextX) {
1962  " Performing shift-until-bittest idiom detection.\n");
1963 
1964  // Give up if the loop has multiple blocks or multiple backedges.
1965  if (CurLoop->getNumBlocks() != 1 || CurLoop->getNumBackEdges() != 1) {
1966  LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad block/backedge count.\n");
1967  return false;
1968  }
1969 
1970  BasicBlock *LoopHeaderBB = CurLoop->getHeader();
1971  BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader();
1972  assert(LoopPreheaderBB && "There is always a loop preheader.");
1973 
1974  using namespace PatternMatch;
1975 
1976  // Step 1: Check if the loop backedge is in desirable form.
1977 
1978  ICmpInst::Predicate Pred;
1979  Value *CmpLHS, *CmpRHS;
1980  BasicBlock *TrueBB, *FalseBB;
1981  if (!match(LoopHeaderBB->getTerminator(),
1982  m_Br(m_ICmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS)),
1983  m_BasicBlock(TrueBB), m_BasicBlock(FalseBB)))) {
1984  LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge structure.\n");
1985  return false;
1986  }
1987 
1988  // Step 2: Check if the backedge's condition is in desirable form.
1989 
1990  auto MatchVariableBitMask = [&]() {
1991  return ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero()) &&
1992  match(CmpLHS,
1993  m_c_And(m_Value(CurrX),
1994  m_CombineAnd(
1995  m_Value(BitMask),
1996  m_LoopInvariant(m_Shl(m_One(), m_Value(BitPos)),
1997  CurLoop))));
1998  };
1999  auto MatchConstantBitMask = [&]() {
2000  return ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero()) &&
2001  match(CmpLHS, m_And(m_Value(CurrX),
2002  m_CombineAnd(m_Value(BitMask), m_Power2()))) &&
2003  (BitPos = ConstantExpr::getExactLogBase2(cast<Constant>(BitMask)));
2004  };
2005  auto MatchDecomposableConstantBitMask = [&]() {
2006  APInt Mask;
2007  return llvm::decomposeBitTestICmp(CmpLHS, CmpRHS, Pred, CurrX, Mask) &&
2008  ICmpInst::isEquality(Pred) && Mask.isPowerOf2() &&
2009  (BitMask = ConstantInt::get(CurrX->getType(), Mask)) &&
2010  (BitPos = ConstantInt::get(CurrX->getType(), Mask.logBase2()));
2011  };
2012 
2013  if (!MatchVariableBitMask() && !MatchConstantBitMask() &&
2014  !MatchDecomposableConstantBitMask()) {
2015  LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge comparison.\n");
2016  return false;
2017  }
2018 
2019  // Step 3: Check if the recurrence is in desirable form.
2020  auto *CurrXPN = dyn_cast<PHINode>(CurrX);
2021  if (!CurrXPN || CurrXPN->getParent() != LoopHeaderBB) {
2022  LLVM_DEBUG(dbgs() << DEBUG_TYPE " Not an expected PHI node.\n");
2023  return false;
2024  }
2025 
2026  BaseX = CurrXPN->getIncomingValueForBlock(LoopPreheaderBB);
2027  NextX =
2028  dyn_cast<Instruction>(CurrXPN->getIncomingValueForBlock(LoopHeaderBB));
2029 
2030  if (!NextX || !match(NextX, m_Shl(m_Specific(CurrX), m_One()))) {
2031  // FIXME: support right-shift?
2032  LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad recurrence.\n");
2033  return false;
2034  }
2035 
2036  // Step 4: Check if the backedge's destinations are in desirable form.
2037 
2038  assert(ICmpInst::isEquality(Pred) &&
2039  "Should only get equality predicates here.");
2040 
2041  // cmp-br is commutative, so canonicalize to a single variant.
2042  if (Pred != ICmpInst::Predicate::ICMP_EQ) {
2043  Pred = ICmpInst::getInversePredicate(Pred);
2044  std::swap(TrueBB, FalseBB);
2045  }
2046 
2047  // We expect to exit loop when comparison yields false,
2048  // so when it yields true we should branch back to loop header.
2049  if (TrueBB != LoopHeaderBB) {
2050  LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge flow.\n");
2051  return false;
2052  }
2053 
2054  // Okay, idiom checks out.
2055  return true;
2056 }
2057 
2058 /// Look for the following loop:
2059 /// \code
2060 /// entry:
2061 /// <...>
2062 /// %bitmask = shl i32 1, %bitpos
2063 /// br label %loop
2064 ///
2065 /// loop:
2066 /// %x.curr = phi i32 [ %x, %entry ], [ %x.next, %loop ]
2067 /// %x.curr.bitmasked = and i32 %x.curr, %bitmask
2068 /// %x.curr.isbitunset = icmp eq i32 %x.curr.bitmasked, 0
2069 /// %x.next = shl i32 %x.curr, 1
2070 /// <...>
2071 /// br i1 %x.curr.isbitunset, label %loop, label %end
2072 ///
2073 /// end:
2074 /// %x.curr.res = phi i32 [ %x.curr, %loop ] <...>
2075 /// %x.next.res = phi i32 [ %x.next, %loop ] <...>
2076 /// <...>
2077 /// \endcode
2078 ///
2079 /// And transform it into:
2080 /// \code
2081 /// entry:
2082 /// %bitmask = shl i32 1, %bitpos
2083 /// %lowbitmask = add i32 %bitmask, -1
2084 /// %mask = or i32 %lowbitmask, %bitmask
2085 /// %x.masked = and i32 %x, %mask
2086 /// %x.masked.numleadingzeros = call i32 @llvm.ctlz.i32(i32 %x.masked,
2087 /// i1 true)
2088 /// %x.masked.numactivebits = sub i32 32, %x.masked.numleadingzeros
2089 /// %x.masked.leadingonepos = add i32 %x.masked.numactivebits, -1
2090 /// %backedgetakencount = sub i32 %bitpos, %x.masked.leadingonepos
2091 /// %tripcount = add i32 %backedgetakencount, 1
2092 /// %x.curr = shl i32 %x, %backedgetakencount
2093 /// %x.next = shl i32 %x, %tripcount
2094 /// br label %loop
2095 ///
2096 /// loop:
2097 /// %loop.iv = phi i32 [ 0, %entry ], [ %loop.iv.next, %loop ]
2098 /// %loop.iv.next = add nuw i32 %loop.iv, 1
2099 /// %loop.ivcheck = icmp eq i32 %loop.iv.next, %tripcount
2100 /// <...>
2101 /// br i1 %loop.ivcheck, label %end, label %loop
2102 ///
2103 /// end:
2104 /// %x.curr.res = phi i32 [ %x.curr, %loop ] <...>
2105 /// %x.next.res = phi i32 [ %x.next, %loop ] <...>
2106 /// <...>
2107 /// \endcode
2108 bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
2109  bool MadeChange = false;
2110 
2111  Value *X, *BitMask, *BitPos, *XCurr;
2112  Instruction *XNext;
2113  if (!detectShiftUntilBitTestIdiom(CurLoop, X, BitMask, BitPos, XCurr,
2114  XNext)) {
2116  " shift-until-bittest idiom detection failed.\n");
2117  return MadeChange;
2118  }
2119  LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-bittest idiom detected!\n");
2120 
2121  // Ok, it is the idiom we were looking for, we *could* transform this loop,
2122  // but is it profitable to transform?
2123 
2124  BasicBlock *LoopHeaderBB = CurLoop->getHeader();
2125  BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader();
2126  assert(LoopPreheaderBB && "There is always a loop preheader.");
2127 
2128  BasicBlock *SuccessorBB = CurLoop->getExitBlock();
2129  assert(LoopPreheaderBB && "There is only a single successor.");
2130 
2131  IRBuilder<> Builder(LoopPreheaderBB->getTerminator());
2132  Builder.SetCurrentDebugLocation(cast<Instruction>(XCurr)->getDebugLoc());
2133 
2134  Intrinsic::ID IntrID = Intrinsic::ctlz;
2135  Type *Ty = X->getType();
2136  unsigned Bitwidth = Ty->getScalarSizeInBits();
2137 
2140 
2141  // The rewrite is considered to be unprofitable iff and only iff the
2142  // intrinsic/shift we'll use are not cheap. Note that we are okay with *just*
2143  // making the loop countable, even if nothing else changes.
2145  IntrID, Ty, {UndefValue::get(Ty), /*is_zero_undef=*/Builder.getTrue()});
2147  if (Cost > TargetTransformInfo::TCC_Basic) {
2149  " Intrinsic is too costly, not beneficial\n");
2150  return MadeChange;
2151  }
2152  if (TTI->getArithmeticInstrCost(Instruction::Shl, Ty, CostKind) >
2154  LLVM_DEBUG(dbgs() << DEBUG_TYPE " Shift is too costly, not beneficial\n");
2155  return MadeChange;
2156  }
2157 
2158  // Ok, transform appears worthwhile.
2159  MadeChange = true;
2160 
2161  // Step 1: Compute the loop trip count.
2162 
2163  Value *LowBitMask = Builder.CreateAdd(BitMask, Constant::getAllOnesValue(Ty),
2164  BitPos->getName() + ".lowbitmask");
2165  Value *Mask =
2166  Builder.CreateOr(LowBitMask, BitMask, BitPos->getName() + ".mask");
2167  Value *XMasked = Builder.CreateAnd(X, Mask, X->getName() + ".masked");
2168  CallInst *XMaskedNumLeadingZeros = Builder.CreateIntrinsic(
2169  IntrID, Ty, {XMasked, /*is_zero_undef=*/Builder.getTrue()},
2170  /*FMFSource=*/nullptr, XMasked->getName() + ".numleadingzeros");
2171  Value *XMaskedNumActiveBits = Builder.CreateSub(
2172  ConstantInt::get(Ty, Ty->getScalarSizeInBits()), XMaskedNumLeadingZeros,
2173  XMasked->getName() + ".numactivebits", /*HasNUW=*/true,
2174  /*HasNSW=*/Bitwidth != 2);
2175  Value *XMaskedLeadingOnePos =
2176  Builder.CreateAdd(XMaskedNumActiveBits, Constant::getAllOnesValue(Ty),
2177  XMasked->getName() + ".leadingonepos", /*HasNUW=*/false,
2178  /*HasNSW=*/Bitwidth > 2);
2179 
2180  Value *LoopBackedgeTakenCount = Builder.CreateSub(
2181  BitPos, XMaskedLeadingOnePos, CurLoop->getName() + ".backedgetakencount",
2182  /*HasNUW=*/true, /*HasNSW=*/true);
2183  // We know loop's backedge-taken count, but what's loop's trip count?
2184  // Note that while NUW is always safe, while NSW is only for bitwidths != 2.
2185  Value *LoopTripCount =
2186  Builder.CreateAdd(LoopBackedgeTakenCount, ConstantInt::get(Ty, 1),
2187  CurLoop->getName() + ".tripcount", /*HasNUW=*/true,
2188  /*HasNSW=*/Bitwidth != 2);
2189 
2190  // Step 2: Compute the recurrence's final value without a loop.
2191 
2192  // NewX is always safe to compute, because `LoopBackedgeTakenCount`
2193  // will always be smaller than `bitwidth(X)`, i.e. we never get poison.
2194  Value *NewX = Builder.CreateShl(X, LoopBackedgeTakenCount);
2195  NewX->takeName(XCurr);
2196  if (auto *I = dyn_cast<Instruction>(NewX))
2197  I->copyIRFlags(XNext, /*IncludeWrapFlags=*/true);
2198 
2199  Value *NewXNext;
2200  // Rewriting XNext is more complicated, however, because `X << LoopTripCount`
2201  // will be poison iff `LoopTripCount == bitwidth(X)` (which will happen
2202  // iff `BitPos` is `bitwidth(x) - 1` and `X` is `1`). So unless we know
2203  // that isn't the case, we'll need to emit an alternative, safe IR.
2204  if (XNext->hasNoSignedWrap() || XNext->hasNoUnsignedWrap() ||
2208  Ty->getScalarSizeInBits() - 1))))
2209  NewXNext = Builder.CreateShl(X, LoopTripCount);
2210  else {
2211  // Otherwise, just additionally shift by one. It's the smallest solution,
2212  // alternatively, we could check that NewX is INT_MIN (or BitPos is )
2213  // and select 0 instead.
2214  NewXNext = Builder.CreateShl(NewX, ConstantInt::get(Ty, 1));
2215  }
2216 
2217  NewXNext->takeName(XNext);
2218  if (auto *I = dyn_cast<Instruction>(NewXNext))
2219  I->copyIRFlags(XNext, /*IncludeWrapFlags=*/true);
2220 
2221  // Step 3: Adjust the successor basic block to recieve the computed
2222  // recurrence's final value instead of the recurrence itself.
2223 
2224  XCurr->replaceUsesOutsideBlock(NewX, LoopHeaderBB);
2225  XNext->replaceUsesOutsideBlock(NewXNext, LoopHeaderBB);
2226 
2227  // Step 4: Rewrite the loop into a countable form, with canonical IV.
2228 
2229  // The new canonical induction variable.
2230  Builder.SetInsertPoint(&LoopHeaderBB->front());
2231  auto *IV = Builder.CreatePHI(Ty, 2, CurLoop->getName() + ".iv");
2232 
2233  // The induction itself.
2234  // Note that while NUW is always safe, while NSW is only for bitwidths != 2.
2235  Builder.SetInsertPoint(LoopHeaderBB->getTerminator());
2236  auto *IVNext =
2237  Builder.CreateAdd(IV, ConstantInt::get(Ty, 1), IV->getName() + ".next",
2238  /*HasNUW=*/true, /*HasNSW=*/Bitwidth != 2);
2239 
2240  // The loop trip count check.
2241  auto *IVCheck = Builder.CreateICmpEQ(IVNext, LoopTripCount,
2242  CurLoop->getName() + ".ivcheck");
2243  Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
2244  LoopHeaderBB->getTerminator()->eraseFromParent();
2245 
2246  // Populate the IV PHI.
2247  IV->addIncoming(ConstantInt::get(Ty, 0), LoopPreheaderBB);
2248  IV->addIncoming(IVNext, LoopHeaderBB);
2249 
2250  // Step 5: Forget the "non-computable" trip-count SCEV associated with the
2251  // loop. The loop would otherwise not be deleted even if it becomes empty.
2252 
2253  SE->forgetLoop(CurLoop);
2254 
2255  // Other passes will take care of actually deleting the loop if possible.
2256 
2257  LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-bittest idiom optimized!\n");
2258 
2259  ++NumShiftUntilBitTest;
2260  return MadeChange;
2261 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
i
i
Definition: README.txt:29
llvm::InstructionCost
Definition: InstructionCost.h:26
DisableLIRPAll
static cl::opt< bool, true > DisableLIRPAll("disable-" DEBUG_TYPE "-all", cl::desc("Options to disable Loop Idiom Recognize Pass."), cl::location(DisableLIRP::All), cl::init(false), cl::ReallyHidden)
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::TargetTransformInfo::PSK_FastHardware
@ PSK_FastHardware
Definition: TargetTransformInfo.h:586
llvm::RecursivelyDeleteTriviallyDeadInstructions
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:487
llvm::ScalarEvolution::hasLoopInvariantBackedgeTakenCount
bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count.
Definition: ScalarEvolution.cpp:12209
llvm::BasicBlock::end
iterator end()
Definition: BasicBlock.h:298
Attrs
Function Attrs
Definition: README_ALTIVEC.txt:215
llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition: TargetTransformInfo.h:211
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:63
llvm::MemIntrinsicBase::getDestAlignment
unsigned getDestAlignment() const
FIXME: Remove this function once transition to Align is over.
Definition: IntrinsicInst.h:612
llvm::ScalarEvolution::isLoopEntryGuardedByCond
bool isLoopEntryGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test whether entry to the loop is protected by a conditional between LHS and RHS.
Definition: ScalarEvolution.cpp:10200
llvm::ScalarEvolution::getNegativeSCEV
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
Definition: ScalarEvolution.cpp:3932
llvm
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:743
llvm::isKnownNonNegative
bool isKnownNonNegative(const Value *V, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Returns true if the give value is known to be non-negative.
Definition: ValueTracking.cpp:315
match_LoopInvariant
Match loop-invariant value.
Definition: LoopIdiomRecognize.cpp:1918
llvm::LoopBase::getExitBlock
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:82
llvm::WeakTrackingVH
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:204
match_LoopInvariant::L
const Loop * L
Definition: LoopIdiomRecognize.cpp:1920
CmpInstAnalysis.h
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::LocationSize::afterPointer
constexpr static LocationSize afterPointer()
Any location after the base pointer (but still within the underlying object).
Definition: MemoryLocation.h:123
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1295
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:722
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::SCEVAddRecExpr::isAffine
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
Definition: ScalarEvolutionExpressions.h:378
llvm::isModOrRefSet
LLVM_NODISCARD bool isModOrRefSet(const ModRefInfo MRI)
Definition: AliasAnalysis.h:189
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
IntrinsicInst.h
ScalarEvolutionExpander.h
Scalar.h
llvm::SCEVAddRecExpr::getStart
const SCEV * getStart() const
Definition: ScalarEvolutionExpressions.h:362
llvm::TypeSize::getFixedSize
ScalarTy getFixedSize() const
Definition: TypeSize.h:426
MemorySSAUpdater.h
llvm::Function
Definition: Function.h:61
llvm::cl::location
LocationClass< Ty > location(Ty &L)
Definition: CommandLine.h:456
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
StringRef.h
Pass.h
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
llvm::SCEVExpander
This class uses information about analyze scalars to rewrite expressions in canonical form.
Definition: ScalarEvolutionExpander.h:63
it
Reference model for inliner Oz decision policy Note this model is also referenced by test Transforms Inline ML tests if replacing it
Definition: README.txt:3
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:317
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
detectPopcountIdiom
static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB, Instruction *&CntInst, PHINode *&CntPhi, Value *&Var)
Return true iff the idiom is detected in the loop.
Definition: LoopIdiomRecognize.cpp:1310
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
LoopAccessAnalysis.h
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:693
llvm::IRBuilder<>
MapVector.h
llvm::GlobalVariable
Definition: GlobalVariable.h:40
llvm::SmallDenseMap
Definition: DenseMap.h:880
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:443
llvm::TargetTransformInfo::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
Definition: TargetTransformInfo.cpp:860
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:744
llvm::Instruction::hasNoUnsignedWrap
bool hasNoUnsignedWrap() const
Determine whether the no unsigned wrap flag is set.
Definition: Instruction.cpp:132
llvm::CmpInst::getInversePredicate
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:823
llvm::ConstantExpr::getBitCast
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2207
llvm::initializeLoopIdiomRecognizeLegacyPassPass
void initializeLoopIdiomRecognizeLegacyPassPass(PassRegistry &)
ValueTracking.h
Local.h
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::SetVector::iterator
typename vector_type::const_iterator iterator
Definition: SetVector.h:48
llvm::getLoopAnalysisUsage
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:150
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::PatternMatch::m_Br
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
Definition: PatternMatch.h:1692
APInt.h
ScalarEvolution.h
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
DenseMap.h
llvm::ICmpInst::isEquality
bool isEquality() const
Return true if this predicate is either EQ or NE.
Definition: Instructions.h:1273
Module.h
llvm::createLoopIdiomPass
Pass * createLoopIdiomPass()
Definition: LoopIdiomRecognize.cpp:328
llvm::Instruction::isShift
bool isShift() const
Definition: Instruction.h:167
llvm::isBytewiseValue
Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
Definition: ValueTracking.cpp:3738
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:52
match_LoopInvariant::match
bool match(ITy *V)
Definition: LoopIdiomRecognize.cpp:1925
llvm::GlobalValue::UnnamedAddr::Global
@ Global
llvm::isConsecutiveAccess
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
Definition: LoopAccessAnalysis.cpp:1235
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::MapVector< Value *, StoreList >
llvm::SmallPtrSet< Value *, 16 >
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:128
llvm::DisableLIRP::All
static bool All
When true, the entire pass is disabled.
Definition: LoopIdiomRecognize.h:30
llvm::CmpInst::ICMP_SLE
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:752
llvm::ConstantExpr::getExactLogBase2
static Constant * getExactLogBase2(Constant *C)
If C is a scalar/fixed width vector of known powers of 2, then this function returns a new scalar/fix...
Definition: Constants.cpp:2757
llvm::BasicBlock::getSinglePredecessor
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:264
llvm::GlobalValue::setUnnamedAddr
void setUnnamedAddr(UnnamedAddr Val)
Definition: GlobalValue.h:212
llvm::ArrayType
Class to represent array types.
Definition: DerivedTypes.h:359
llvm::LoadInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:266
llvm::ScalarEvolution::getTruncateOrZeroExtend
const SCEV * getTruncateOrZeroExtend(const SCEV *V, Type *Ty, unsigned Depth=0)
Return a SCEV corresponding to a conversion of the input value to the specified type.
Definition: ScalarEvolution.cpp:4025
llvm::PatternMatch::m_c_And
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
Definition: PatternMatch.h:2185
llvm::LoopStandardAnalysisResults::DT
DominatorTree & DT
Definition: LoopAnalysisManager.h:55
llvm::LoadInst::getAlign
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:222
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::SimpleLoopSafetyInfo
Simple and conservative implementation of LoopSafetyInfo that can give false-positive answers to its ...
Definition: MustExecute.h:110
llvm::codeview::PointerMode::Pointer
@ Pointer
llvm::cl::ReallyHidden
@ ReallyHidden
Definition: CommandLine.h:141
INITIALIZE_PASS_END
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
Definition: RegBankSelect.cpp:69
llvm::LinearPolySize::isScalable
bool isScalable() const
Returns whether the size is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:299
llvm::IRBuilderBase::getInt1
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Definition: IRBuilder.h:453
MustExecute.h
llvm::LoopBase::getNumBlocks
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:185
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
llvm::MemSetBase::getValue
Value * getValue() const
Definition: IntrinsicInst.h:718
llvm::LoopBase::block_end
block_iterator block_end() const
Definition: LoopInfo.h:177
loop
Analysis the ScalarEvolution expression for r is< loop > Outside the loop
Definition: README.txt:8
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
AliasAnalysis.h
llvm::BTF::HeaderSize
@ HeaderSize
Definition: BTF.h:58
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
llvm::ScalarEvolution::getMulExpr
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:2816
llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:115
Instruction.h
llvm::isSafeToExpand
bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE)
Return true if the given expression is safe to expand in the sense that all materialized values are s...
Definition: ScalarEvolutionExpander.cpp:2678
CommandLine.h
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:77
llvm::Intrinsic::getType
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
Definition: Function.cpp:1251
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:160
llvm::StoreInst::getValueOperand
Value * getValueOperand()
Definition: Instructions.h:398
GlobalValue.h
llvm::InlinerFunctionImportStatsOpts::No
@ No
llvm::MemorySSAWrapperPass
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:961
llvm::SetVector::begin
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:82
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::ScalarEvolution::getOne
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
Definition: ScalarEvolution.h:596
Constants.h
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::AAResults
Definition: AliasAnalysis.h:456
LoopIdiomRecognize.h
llvm::TargetTransformInfo::getPopcntSupport
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
Return hardware support for population count.
Definition: TargetTransformInfo.cpp:522
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::LocationSize
Definition: MemoryLocation.h:65
llvm::User
Definition: User.h:44
Intrinsics.h
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
InstrTypes.h
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1396
SI
@ SI
Definition: SIInstrInfo.cpp:7342
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::StoreInst::getAlignment
unsigned getAlignment() const
Return the alignment of the access that is being performed FIXME: Remove this function once transitio...
Definition: Instructions.h:351
TargetLibraryInfo.h
false
Definition: StackSlotColoring.cpp:142
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:119
llvm::PHINode::getIncomingValueForBlock
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Definition: Instructions.h:2755
llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:171
llvm::Constant::getAllOnesValue
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:405
llvm::BinaryOperator::getOpcode
BinaryOps getOpcode() const
Definition: InstrTypes.h:395
llvm::Instruction
Definition: Instruction.h:45
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:154
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::codeview::EncodedFramePtrReg::BasePtr
@ BasePtr
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1770
llvm::LocationSize::precise
static LocationSize precise(uint64_t Value)
Definition: MemoryLocation.h:100
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:885
LoopUtils.h
DEBUG_TYPE
#define DEBUG_TYPE
Definition: LoopIdiomRecognize.cpp:108
llvm::getUnderlyingObject
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
Definition: ValueTracking.cpp:4272
DebugLoc.h
llvm::LPPassManager
Definition: LoopPass.h:75
SmallPtrSet.h
m_LoopInvariant
match_LoopInvariant< Ty > m_LoopInvariant(const Ty &M, const Loop *L)
Matches if the value is loop-invariant.
Definition: LoopIdiomRecognize.cpp:1932
llvm::BasicBlock::getModule
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:144
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
PatternMatch.h
llvm::BasicBlock::getFirstNonPHI
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:212
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
getStartForNegStride
static const SCEV * getStartForNegStride(const SCEV *Start, const SCEV *BECount, Type *IntPtr, unsigned StoreSize, ScalarEvolution *SE)
Definition: LoopIdiomRecognize.cpp:883
llvm::Loop::getName
StringRef getName() const
Definition: LoopInfo.h:863
llvm::None
const NoneType None
Definition: None.h:23
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:96
Type.h
llvm::IntrinsicCostAttributes
Definition: TargetTransformInfo.h:116
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::MemSetInst
This class wraps the llvm.memset intrinsic.
Definition: IntrinsicInst.h:857
llvm::PatternMatch::m_One
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:469
LoopInfo.h
llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3086
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:3885
llvm::LoopBase::block_begin
block_iterator block_begin() const
Definition: LoopInfo.h:176
llvm::PatternMatch::m_Power2
cst_pred_ty< is_power2 > m_Power2()
Match an integer or vector power-of-2.
Definition: PatternMatch.h:500
getStoreStride
static APInt getStoreStride(const SCEVAddRecExpr *StoreEv)
Definition: LoopIdiomRecognize.cpp:408
UseLIRCodeSizeHeurs
static cl::opt< bool > UseLIRCodeSizeHeurs("use-lir-code-size-heurs", cl::desc("Use loop idiom recognition code size heuristics when compiling" "with -Os/-Oz"), cl::init(true), cl::Hidden)
llvm::getLoopPassPreservedAnalyses
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Definition: LoopAnalysisManager.cpp:140
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1419
llvm::LoopBase< BasicBlock, Loop >::block_iterator
ArrayRef< BasicBlock * >::const_iterator block_iterator
Definition: LoopInfo.h:175
llvm::LoadInst::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:272
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:78
llvm::PatternMatch::m_Zero
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:491
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:303
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:78
llvm::ICmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1178
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:446
llvm::LoopPass
Definition: LoopPass.h:27
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2321
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:572
llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition: LoopPassManager.h:243
llvm::SmallPtrSetImpl::end
iterator end() const
Definition: SmallPtrSet.h:407
llvm::LoopBase::getUniqueExitBlocks
void getUniqueExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all unique successor blocks of this loop.
Definition: LoopInfoImpl.h:122
createPopcntIntrinsic
static CallInst * createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val, const DebugLoc &DL)
Definition: LoopIdiomRecognize.cpp:1662
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2720
mayLoopAccessLocation
static bool mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, const SCEV *BECount, unsigned StoreSize, AliasAnalysis &AA, SmallPtrSetImpl< Instruction * > &IgnoredStores)
mayLoopAccessLocation - Return true if the specified loop might access the specified pointer location...
Definition: LoopIdiomRecognize.cpp:848
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
MemoryLocation.h
llvm::MemorySSA
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:704
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:964
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:440
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:167
llvm::PatternMatch::m_And
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1074
llvm::SmallPtrSetImpl::begin
iterator begin() const
Definition: SmallPtrSet.h:402
ArrayRef.h
llvm::Instruction::setDebugLoc
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:362
llvm::SCEVConstant
This class represents a constant integer value.
Definition: ScalarEvolutionExpressions.h:47
llvm::ModRefInfo
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition: AliasAnalysis.h:148
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::Value::replaceUsesOutsideBlock
void replaceUsesOutsideBlock(Value *V, BasicBlock *BB)
replaceUsesOutsideBlock - Go through the uses list for this definition and make each use point to "V"...
Definition: Value.cpp:536
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
match_LoopInvariant::SubPattern
SubPattern_t SubPattern
Definition: LoopIdiomRecognize.cpp:1919
detectShiftUntilZeroIdiom
static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL, Intrinsic::ID &IntrinID, Value *&InitX, Instruction *&CntInst, PHINode *&CntPhi, Instruction *&DefX)
Return true if the idiom is detected in the loop.
Definition: LoopIdiomRecognize.cpp:1445
llvm::TargetLibraryInfo::has
bool has(LibFunc F) const
Tests whether a library function is available.
Definition: TargetLibraryInfo.h:311
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:163
llvm::SCEVExpanderCleaner
Helper to remove instructions inserted during SCEV expansion, unless they are marked as used.
Definition: ScalarEvolutionExpander.h:497
llvm::ArrayType::get
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:605
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::MemorySSAAnalysis
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:922
llvm::SimpleLoopSafetyInfo::computeLoopSafetyInfo
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
Definition: MustExecute.cpp:54
getNumBytes
static const SCEV * getNumBytes(const SCEV *BECount, Type *IntPtr, unsigned StoreSize, Loop *CurLoop, const DataLayout *DL, ScalarEvolution *SE)
Compute the number of bytes as a SCEV from the backedge taken count.
Definition: LoopIdiomRecognize.cpp:897
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:649
InstructionCost.h
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:71
llvm::User::setOperand
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
DisableLIRPMemcpy
static cl::opt< bool, true > DisableLIRPMemcpy("disable-" DEBUG_TYPE "-memcpy", cl::desc("Proceed with loop idiom recognize pass, but do " "not convert loop(s) to memcpy."), cl::location(DisableLIRP::Memcpy), cl::init(false), cl::ReallyHidden)
getDebugLoc
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
Definition: MachineInstrBundle.cpp:109
llvm::PatternMatch::m_CombineAnd
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
Definition: PatternMatch.h:172
llvm::MemIntrinsicBase::getDest
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
Definition: IntrinsicInst.h:604
llvm::LoopInfo
Definition: LoopInfo.h:1080
llvm::BasicBlock::instructionsWithoutDebug
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=false) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:100
llvm::BinaryOperator
Definition: InstrTypes.h:190
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
DataLayout.h
llvm::PatternMatch::m_SpecificInt_ICMP
cst_pred_ty< icmp_pred_with_threshold > m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold)
Match an integer or vector with every element comparing 'pred' (eg/ne/...) to Threshold.
Definition: PatternMatch.h:582
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:167
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
llvm::TargetTransformInfo::TCK_SizeAndLatency
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Definition: TargetTransformInfo.h:215
llvm::ScalarEvolution::getConstant
const SCEV * getConstant(ConstantInt *V)
Definition: ScalarEvolution.cpp:444
LoopPass.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::ConstantInt::isZero
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:192
llvm::LoopBase::isOutermost
bool isOutermost() const
Return true if the loop does not have a parent (natural) loop.
Definition: LoopInfo.h:168
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
CostKind
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
getParent
static const Function * getParent(const Value *V)
Definition: BasicAliasAnalysis.cpp:759
uint32_t
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:940
llvm::ModRefInfo::Mod
@ Mod
The access may modify the value stored in memory.
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::SCEV::FlagNUW
@ FlagNUW
Definition: ScalarEvolution.h:118
llvm::ConstantInt::isMinusOne
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:204
llvm::LoopStandardAnalysisResults::LI
LoopInfo & LI
Definition: LoopAnalysisManager.h:56
llvm::MemoryAccess
Definition: MemorySSA.h:140
idiom
loop idiom
Definition: LoopIdiomRecognize.cpp:325
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:298
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:174
ValueHandle.h
matchCondition
static Value * matchCondition(BranchInst *BI, BasicBlock *LoopEntry, bool JmpOnZero=false)
Check if the given conditional branch is based on the comparison between a variable and zero,...
Definition: LoopIdiomRecognize.cpp:1249
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:148
llvm::SimpleLoopSafetyInfo::anyBlockMayThrow
bool anyBlockMayThrow() const override
Returns true iff any block of the loop for which this info is contains an instruction that may throw ...
Definition: MustExecute.cpp:50
llvm::Function::hasOptSize
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:706
detectShiftUntilBitTestIdiom
static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX, Value *&BitMask, Value *&BitPos, Value *&CurrX, Instruction *&NextX)
Return true if the idiom is detected in the loop.
Definition: LoopIdiomRecognize.cpp:1958
llvm::BasicBlock::front
const Instruction & front() const
Definition: BasicBlock.h:308
llvm::LoopIdiomRecognizePass::run
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LoopIdiomRecognize.cpp:296
llvm::DisableLIRP::Memcpy
static bool Memcpy
When true, Memcpy is disabled.
Definition: LoopIdiomRecognize.h:36
Attributes.h
j
return j(j<< 16)
llvm::Instruction::isAtomic
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
Definition: Instruction.cpp:583
Constant.h
llvm::ScalarEvolution::forgetLoop
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
Definition: ScalarEvolution.cpp:7173
llvm::SCEVAddRecExpr::getLoop
const Loop * getLoop() const
Definition: ScalarEvolutionExpressions.h:363
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:52
llvm::decomposeBitTestICmp
bool decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate &Pred, Value *&X, APInt &Mask, bool LookThroughTrunc=true)
Decompose an icmp into the form ((X & Mask) pred 0) if possible.
Definition: CmpInstAnalysis.cpp:66
llvm::ScalarEvolution::getMinusSCEV
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
Definition: ScalarEvolution.cpp:3986
llvm::MemIntrinsicBase::getLength
Value * getLength() const
Definition: IntrinsicInst.h:595
llvm::LoopStandardAnalysisResults::TTI
TargetTransformInfo & TTI
Definition: LoopAnalysisManager.h:59
getMemSetPatternValue
static Constant * getMemSetPatternValue(Value *V, const DataLayout *DL)
getMemSetPatternValue - If a strided store of the specified value is safe to turn into a memset_patte...
Definition: LoopIdiomRecognize.cpp:419
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition: Instructions.h:2612
llvm::ConstantInt::getBool
static ConstantInt * getBool(LLVMContext &Context, bool V)
Definition: Constants.cpp:847
GlobalVariable.h
llvm::IRBuilderBase::GetInsertBlock
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:178
llvm::MemorySSA::BeforeTerminator
@ BeforeTerminator
Definition: MemorySSA.h:791
llvm::TypeSize
Definition: TypeSize.h:417
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:352
Casting.h
idioms
loop Recognize loop idioms
Definition: LoopIdiomRecognize.cpp:326
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
PassManager.h
llvm::SetVector::count
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:215
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:207
llvm::ConstantArray::get
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1248
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
llvm::PrevailingType::Yes
@ Yes
llvm::LoopBase::getNumBackEdges
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
Definition: LoopInfo.h:250
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:184
llvm::CmpInst::setPredicate
void setPredicate(Predicate P)
Set the predicate for this instruction to the specified value.
Definition: InstrTypes.h:802
isSimple
static bool isSimple(Instruction *I)
Definition: SLPVectorizer.cpp:524
llvm::LoopStandardAnalysisResults::SE
ScalarEvolution & SE
Definition: LoopAnalysisManager.h:57
llvm::LoopStandardAnalysisResults::AA
AAResults & AA
Definition: LoopAnalysisManager.h:53
llvm::SCEVConstant::getAPInt
const APInt & getAPInt() const
Definition: ScalarEvolutionExpressions.h:57
ScalarEvolutionExpressions.h
llvm::SetVector::end
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:92
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:684
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
MemorySSA.h
llvm::ScalarEvolution::getZeroExtendExpr
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
Definition: ScalarEvolution.cpp:1553
llvm::GlobalValue::PrivateLinkage
@ PrivateLinkage
Like Internal, but omit from symbol table.
Definition: GlobalValue.h:56
Instructions.h
llvm::LoadInst::isVolatile
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:211
llvm::MemIntrinsic::isVolatile
bool isVolatile() const
Definition: IntrinsicInst.h:835
SmallVector.h
match_LoopInvariant::match_LoopInvariant
match_LoopInvariant(const SubPattern_t &SP, const Loop *L)
Definition: LoopIdiomRecognize.cpp:1922
llvm::PatternMatch::m_Specific
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:758
llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:365
User.h
llvm::StoreInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:401
llvm::PatternMatch::m_ICmp
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
Definition: PatternMatch.h:1355
llvm::Instruction::hasNoSignedWrap
bool hasNoSignedWrap() const
Determine whether the no signed wrap flag is set.
Definition: Instruction.cpp:136
llvm::CmpInst::ICMP_UGT
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:745
Dominators.h
llvm::inferLibFuncAttributes
bool inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI)
Analyze the name and prototype of the given function and set any applicable attributes.
Definition: BuildLibCalls.cpp:219
llvm::BasicBlock::size
size_t size() const
Definition: BasicBlock.h:306
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::CmpInst::getPredicate
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:799
TargetTransformInfo.h
llvm::LoopStandardAnalysisResults::MSSA
MemorySSA * MSSA
Definition: LoopAnalysisManager.h:61
llvm::FunctionCallee
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:164
llvm::PHINode
Definition: Instructions.h:2572
llvm::SmallVectorImpl< BasicBlock * >
llvm::TargetTransformInfo::getAtomicMemIntrinsicMaxElementSize
unsigned getAtomicMemIntrinsicMaxElementSize() const
Definition: TargetTransformInfo.cpp:929
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(LoopIdiomRecognizeLegacyPass, "loop-idiom", "Recognize loop idioms", false, false) INITIALIZE_PASS_END(LoopIdiomRecognizeLegacyPass
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:397
DerivedTypes.h
llvm::SmallPtrSetImpl< Instruction * >
llvm::SCEV::getType
Type * getType() const
Return the LLVM type of this SCEV expression.
Definition: ScalarEvolution.cpp:379
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:43
llvm::ScalarEvolution::getAddExpr
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:2300
createFFSIntrinsic
static CallInst * createFFSIntrinsic(IRBuilder<> &IRBuilder, Value *Val, const DebugLoc &DL, bool ZeroCheck, Intrinsic::ID IID)
Definition: LoopIdiomRecognize.cpp:1675
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1450
llvm::ConstantInt::isOne
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:198
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::LoadInst::isUnordered
bool isUnordered() const
Definition: Instructions.h:260
DisableLIRPMemset
static cl::opt< bool, true > DisableLIRPMemset("disable-" DEBUG_TYPE "-memset", cl::desc("Proceed with loop idiom recognize pass, but do " "not convert loop(s) to memset."), cl::location(DisableLIRP::Memset), cl::init(false), cl::ReallyHidden)
deleteDeadInstruction
static void deleteDeadInstruction(Instruction *I)
Definition: LoopIdiomRecognize.cpp:330
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::VerifyMemorySSA
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:91
LLVMContext.h
llvm::TargetTransformInfo::TCC_Basic
@ TCC_Basic
The cost of a typical 'add' instruction.
Definition: TargetTransformInfo.h:263
llvm::Value::takeName
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:376
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::PatternMatch::m_BasicBlock
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
Definition: PatternMatch.h:116
llvm::TargetTransformInfo::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueKind Opd1Info=OK_AnyValue, OperandValueKind Opd2Info=OK_AnyValue, OperandValueProperties Opd1PropInfo=OP_None, OperandValueProperties Opd2PropInfo=OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
Definition: TargetTransformInfo.cpp:706
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:411
llvm::GlobalObject::setAlignment
void setAlignment(MaybeAlign Align)
Definition: Globals.cpp:117
llvm::SCEVNAryExpr::getOperand
const SCEV * getOperand(unsigned i) const
Definition: ScalarEvolutionExpressions.h:199
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3005
raw_ostream.h
llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:40
llvm::ScalarEvolution::getBackedgeTakenCount
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
Definition: ScalarEvolution.cpp:7005
llvm::intersectModRef
LLVM_NODISCARD ModRefInfo intersectModRef(const ModRefInfo MRI1, const ModRefInfo MRI2)
Definition: AliasAnalysis.h:236
Value.h
getRecurrenceVar
static PHINode * getRecurrenceVar(Value *VarX, Instruction *DefX, BasicBlock *LoopEntry)
Definition: LoopIdiomRecognize.cpp:1277
llvm::LoopStandardAnalysisResults::TLI
TargetLibraryInfo & TLI
Definition: LoopAnalysisManager.h:58
InitializePasses.h
BuildLibCalls.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::PatternMatch::m_Shl
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1092
Debug.h
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:434
llvm::AAResults::getModRefInfo
ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc)
getModRefInfo (for call sites) - Return information about whether a particular call site modifies or ...
Definition: AliasAnalysis.cpp:217
llvm::BranchInst::isConditional
bool isConditional() const
Definition: Instructions.h:3084
llvm::ModRefInfo::ModRef
@ ModRef
The access may reference and may modify the value stored in memory.
llvm::IRBuilderBase::CreateCall
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2344
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3098
llvm::MemoryLocation
Representation for a specific memory location.
Definition: MemoryLocation.h:209
llvm::DisableLIRP::Memset
static bool Memset
When true, Memset is disabled.
Definition: LoopIdiomRecognize.h:33
SetVector.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38