104#define DEBUG_TYPE "loop-idiom"
106STATISTIC(NumMemSet,
"Number of memset's formed from loop stores");
107STATISTIC(NumMemCpy,
"Number of memcpy's formed from loop load+stores");
108STATISTIC(NumMemMove,
"Number of memmove's formed from loop load+stores");
110 NumShiftUntilBitTest,
111 "Number of uncountable loops recognized as 'shift until bitttest' idiom");
113 "Number of uncountable loops recognized as 'shift until zero' idiom");
118 cl::desc(
"Options to disable Loop Idiom Recognize Pass."),
125 cl::desc(
"Proceed with loop idiom recognize pass, but do "
126 "not convert loop(s) to memset."),
133 cl::desc(
"Proceed with loop idiom recognize pass, but do "
134 "not convert loop(s) to memcpy."),
139 "use-lir-code-size-heurs",
140 cl::desc(
"Use loop idiom recognition code size heuristics when compiling"
146class LoopIdiomRecognize {
147 Loop *CurLoop =
nullptr;
156 bool ApplyCodeSizeHeuristics;
157 std::unique_ptr<MemorySSAUpdater> MSSAU;
166 : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI),
TTI(
TTI),
DL(
DL), ORE(ORE) {
168 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
171 bool runOnLoop(
Loop *L);
177 StoreListMap StoreRefsForMemset;
178 StoreListMap StoreRefsForMemsetPattern;
179 StoreList StoreRefsForMemcpy;
181 bool HasMemsetPattern;
185 enum LegalStoreKind {
190 UnorderedAtomicMemcpy,
198 bool runOnCountableLoop();
203 LegalStoreKind isLegalStore(
StoreInst *SI);
204 enum class ForMemset {
No,
Yes };
208 template <
typename MemInst>
209 bool processLoopMemIntrinsic(
211 bool (LoopIdiomRecognize::*Processor)(MemInst *,
const SCEV *),
212 const SCEV *BECount);
216 bool processLoopStridedStore(
Value *DestPtr,
const SCEV *StoreSizeSCEV,
221 bool IsNegStride,
bool IsLoopMemset =
false);
222 bool processLoopStoreOfLoopLoad(
StoreInst *SI,
const SCEV *BECount);
223 bool processLoopStoreOfLoopLoad(
Value *DestPtr,
Value *SourcePtr,
229 const SCEV *BECount);
230 bool avoidLIRForMultiBlockLoop(
bool IsMemset =
false,
231 bool IsLoopMemset =
false);
237 bool runOnNoncountableLoop();
239 bool recognizePopcount();
242 bool recognizeAndInsertFFS();
247 bool IsCntPhiUsedOutsideLoop);
249 bool recognizeShiftUntilBitTest();
250 bool recognizeShiftUntilZero();
262 const auto *
DL = &L.getHeader()->getModule()->getDataLayout();
269 LoopIdiomRecognize LIR(&AR.
AA, &AR.
DT, &AR.
LI, &AR.
SE, &AR.
TLI, &AR.
TTI,
271 if (!LIR.runOnLoop(&L))
282 I->eraseFromParent();
291bool LoopIdiomRecognize::runOnLoop(
Loop *L) {
295 if (!
L->getLoopPreheader())
300 if (
Name ==
"memset" ||
Name ==
"memcpy")
304 ApplyCodeSizeHeuristics =
307 HasMemset = TLI->
has(LibFunc_memset);
308 HasMemsetPattern = TLI->
has(LibFunc_memset_pattern16);
309 HasMemcpy = TLI->
has(LibFunc_memcpy);
311 if (HasMemset || HasMemsetPattern || HasMemcpy)
313 return runOnCountableLoop();
315 return runOnNoncountableLoop();
318bool LoopIdiomRecognize::runOnCountableLoop() {
320 assert(!isa<SCEVCouldNotCompute>(BECount) &&
321 "runOnCountableLoop() called on a loop without a predictable"
322 "backedge-taken count");
326 if (
const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
327 if (BECst->getAPInt() == 0)
345 bool MadeChange =
false;
353 MadeChange |= runOnLoopBlock(BB, BECount, ExitBlocks);
377 if (!
C || isa<ConstantExpr>(
C))
386 if (
DL->isBigEndian())
402 unsigned ArraySize = 16 /
Size;
407LoopIdiomRecognize::LegalStoreKind
408LoopIdiomRecognize::isLegalStore(
StoreInst *SI) {
410 if (
SI->isVolatile())
411 return LegalStoreKind::None;
413 if (!
SI->isUnordered())
414 return LegalStoreKind::None;
417 if (
SI->getMetadata(LLVMContext::MD_nontemporal))
418 return LegalStoreKind::None;
420 Value *StoredVal =
SI->getValueOperand();
421 Value *StorePtr =
SI->getPointerOperand();
426 return LegalStoreKind::None;
434 return LegalStoreKind::None;
440 dyn_cast<SCEVAddRecExpr>(SE->
getSCEV(StorePtr));
442 return LegalStoreKind::None;
445 if (!isa<SCEVConstant>(StoreEv->
getOperand(1)))
446 return LegalStoreKind::None;
457 bool UnorderedAtomic =
SI->isUnordered() && !
SI->isSimple();
466 return LegalStoreKind::Memset;
473 return LegalStoreKind::MemsetPattern;
481 unsigned StoreSize =
DL->getTypeStoreSize(
SI->getValueOperand()->getType());
482 if (StoreSize != Stride && StoreSize != -Stride)
483 return LegalStoreKind::None;
486 LoadInst *LI = dyn_cast<LoadInst>(
SI->getValueOperand());
490 return LegalStoreKind::None;
493 return LegalStoreKind::None;
501 return LegalStoreKind::None;
505 return LegalStoreKind::None;
508 UnorderedAtomic = UnorderedAtomic || LI->
isAtomic();
509 return UnorderedAtomic ? LegalStoreKind::UnorderedAtomicMemcpy
510 : LegalStoreKind::Memcpy;
513 return LegalStoreKind::None;
516void LoopIdiomRecognize::collectStores(
BasicBlock *BB) {
517 StoreRefsForMemset.clear();
518 StoreRefsForMemsetPattern.clear();
519 StoreRefsForMemcpy.clear();
526 switch (isLegalStore(SI)) {
527 case LegalStoreKind::None:
530 case LegalStoreKind::Memset: {
533 StoreRefsForMemset[
Ptr].push_back(SI);
535 case LegalStoreKind::MemsetPattern: {
538 StoreRefsForMemsetPattern[
Ptr].push_back(SI);
540 case LegalStoreKind::Memcpy:
541 case LegalStoreKind::UnorderedAtomicMemcpy:
542 StoreRefsForMemcpy.push_back(SI);
545 assert(
false &&
"unhandled return value");
554bool LoopIdiomRecognize::runOnLoopBlock(
564 bool MadeChange =
false;
571 for (
auto &SL : StoreRefsForMemset)
572 MadeChange |= processLoopStores(SL.second, BECount, ForMemset::Yes);
574 for (
auto &SL : StoreRefsForMemsetPattern)
575 MadeChange |= processLoopStores(SL.second, BECount, ForMemset::No);
578 for (
auto &SI : StoreRefsForMemcpy)
579 MadeChange |= processLoopStoreOfLoopLoad(SI, BECount);
581 MadeChange |= processLoopMemIntrinsic<MemCpyInst>(
582 BB, &LoopIdiomRecognize::processLoopMemCpy, BECount);
583 MadeChange |= processLoopMemIntrinsic<MemSetInst>(
584 BB, &LoopIdiomRecognize::processLoopMemSet, BECount);
591 const SCEV *BECount, ForMemset For) {
599 for (
unsigned i = 0, e = SL.
size(); i < e; ++i) {
600 assert(SL[i]->
isSimple() &&
"Expected only non-volatile stores.");
602 Value *FirstStoredVal = SL[i]->getValueOperand();
603 Value *FirstStorePtr = SL[i]->getPointerOperand();
605 cast<SCEVAddRecExpr>(SE->
getSCEV(FirstStorePtr));
607 unsigned FirstStoreSize =
DL->getTypeStoreSize(SL[i]->getValueOperand()->
getType());
610 if (FirstStride == FirstStoreSize || -FirstStride == FirstStoreSize) {
615 Value *FirstSplatValue =
nullptr;
616 Constant *FirstPatternValue =
nullptr;
618 if (For == ForMemset::Yes)
623 assert((FirstSplatValue || FirstPatternValue) &&
624 "Expected either splat value or pattern value.");
632 for (j = i + 1;
j <
e; ++
j)
634 for (j = i;
j > 0; --
j)
637 for (
auto &k : IndexQueue) {
638 assert(SL[k]->
isSimple() &&
"Expected only non-volatile stores.");
639 Value *SecondStorePtr = SL[
k]->getPointerOperand();
641 cast<SCEVAddRecExpr>(SE->
getSCEV(SecondStorePtr));
644 if (FirstStride != SecondStride)
647 Value *SecondStoredVal = SL[
k]->getValueOperand();
648 Value *SecondSplatValue =
nullptr;
649 Constant *SecondPatternValue =
nullptr;
651 if (For == ForMemset::Yes)
656 assert((SecondSplatValue || SecondPatternValue) &&
657 "Expected either splat value or pattern value.");
660 if (For == ForMemset::Yes) {
661 if (isa<UndefValue>(FirstSplatValue))
662 FirstSplatValue = SecondSplatValue;
663 if (FirstSplatValue != SecondSplatValue)
666 if (isa<UndefValue>(FirstPatternValue))
667 FirstPatternValue = SecondPatternValue;
668 if (FirstPatternValue != SecondPatternValue)
673 ConsecutiveChain[SL[i]] = SL[
k];
682 bool Changed =
false;
693 unsigned StoreSize = 0;
696 while (Tails.
count(
I) || Heads.count(
I)) {
697 if (TransformedStores.
count(
I))
701 StoreSize +=
DL->getTypeStoreSize(
I->getValueOperand()->getType());
703 I = ConsecutiveChain[
I];
713 if (StoreSize != Stride && StoreSize != -Stride)
716 bool IsNegStride = StoreSize == -Stride;
720 if (processLoopStridedStore(StorePtr, StoreSizeSCEV,
722 HeadStore, AdjacentStores, StoreEv, BECount,
724 TransformedStores.
insert(AdjacentStores.
begin(), AdjacentStores.
end());
734template <
typename MemInst>
735bool LoopIdiomRecognize::processLoopMemIntrinsic(
737 bool (LoopIdiomRecognize::*Processor)(MemInst *,
const SCEV *),
738 const SCEV *BECount) {
739 bool MadeChange =
false;
743 if (MemInst *
MI = dyn_cast<MemInst>(Inst)) {
745 if (!(this->*Processor)(
MI, BECount))
759bool LoopIdiomRecognize::processLoopMemCpy(
MemCpyInst *MCI,
760 const SCEV *BECount) {
771 if (!Dest || !Source)
786 if ((SizeInBytes >> 32) != 0)
792 dyn_cast<SCEVConstant>(StoreEv->
getOperand(1));
794 dyn_cast<SCEVConstant>(LoadEv->
getOperand(1));
795 if (!ConstStoreStride || !ConstLoadStride)
804 if (SizeInBytes != StoreStrideValue && SizeInBytes != -StoreStrideValue) {
807 <<
ore::NV(
"Inst",
"memcpy") <<
" in "
809 <<
" function will not be hoisted: "
810 <<
ore::NV(
"Reason",
"memcpy size is not equal to stride");
815 int64_t StoreStrideInt = StoreStrideValue.
getSExtValue();
818 if (StoreStrideInt != LoadStrideInt)
821 return processLoopStoreOfLoopLoad(
828bool LoopIdiomRecognize::processLoopMemSet(
MemSetInst *MSI,
829 const SCEV *BECount) {
844 if (!Ev || Ev->
getLoop() != CurLoop)
853 if (!PointerStrideSCEV || !MemsetSizeSCEV)
856 bool IsNegStride =
false;
857 const bool IsConstantSize = isa<ConstantInt>(MSI->
getLength());
859 if (IsConstantSize) {
870 if (SizeInBytes != Stride && SizeInBytes != -Stride)
873 IsNegStride = SizeInBytes == -Stride;
881 if (
Pointer->getType()->getPointerAddressSpace() != 0) {
894 const SCEV *PositiveStrideSCEV =
897 LLVM_DEBUG(
dbgs() <<
" MemsetSizeSCEV: " << *MemsetSizeSCEV <<
"\n"
898 <<
" PositiveStrideSCEV: " << *PositiveStrideSCEV
901 if (PositiveStrideSCEV != MemsetSizeSCEV) {
904 const SCEV *FoldedPositiveStride =
906 const SCEV *FoldedMemsetSize =
910 <<
" FoldedMemsetSize: " << *FoldedMemsetSize <<
"\n"
911 <<
" FoldedPositiveStride: " << *FoldedPositiveStride
914 if (FoldedPositiveStride != FoldedMemsetSize) {
931 BECount, IsNegStride,
true);
939 const SCEV *BECount,
const SCEV *StoreSizeSCEV,
949 const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount);
950 const SCEVConstant *ConstSize = dyn_cast<SCEVConstant>(StoreSizeSCEV);
951 if (BECst && ConstSize)
973 Type *IntPtr,
const SCEV *StoreSizeSCEV,
976 if (!StoreSizeSCEV->
isOne()) {
991 const SCEV *StoreSizeSCEV,
Loop *CurLoop,
993 const SCEV *TripCountSCEV =
1002bool LoopIdiomRecognize::processLoopStridedStore(
1006 const SCEV *BECount,
bool IsNegStride,
bool IsLoopMemset) {
1014 assert((SplatValue || PatternValue) &&
1015 "Expected either splat value or pattern value.");
1026 Type *DestInt8PtrTy =
Builder.getInt8PtrTy(DestAS);
1029 bool Changed =
false;
1037 if (!Expander.isSafeToExpand(Start))
1046 Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->
getTerminator());
1058 StoreSizeSCEV, *AA, Stores))
1061 if (avoidLIRForMultiBlockLoop(
true, IsLoopMemset))
1066 const SCEV *NumBytesS =
1067 getNumBytes(BECount, IntIdxTy, StoreSizeSCEV, CurLoop,
DL, SE);
1071 if (!Expander.isSafeToExpand(NumBytesS))
1075 Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->
getTerminator());
1082 AATags = AATags.
merge(
Store->getAAMetadata());
1083 if (
auto CI = dyn_cast<ConstantInt>(NumBytes))
1084 AATags = AATags.
extendTo(CI->getZExtValue());
1090 NewCall =
Builder.CreateMemSet(
1091 BasePtr, SplatValue, NumBytes,
MaybeAlign(StoreAlignment),
1096 Type *Int8PtrTy = DestInt8PtrTy;
1098 StringRef FuncName =
"memset_pattern16";
1100 Builder.getVoidTy(), Int8PtrTy, Int8PtrTy, IntIdxTy);
1107 PatternValue,
".memset_pattern");
1111 NewCall =
Builder.CreateCall(MSP, {
BasePtr, PatternPtr, NumBytes});
1127 MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
1129 MSSAU->insertDef(cast<MemoryDef>(NewMemAcc),
true);
1133 <<
" from store to: " << *Ev <<
" at: " << *TheStore
1139 R <<
"Transformed loop-strided store in "
1141 <<
" function into a call to "
1144 if (!Stores.empty())
1146 for (
auto *
I : Stores) {
1147 R <<
ore::NV(
"FromBlock",
I->getParent()->getName())
1155 for (
auto *
I : Stores) {
1157 MSSAU->removeMemoryAccess(
I,
true);
1161 MSSAU->getMemorySSA()->verifyMemorySSA();
1163 ExpCleaner.markResultUsed();
1170bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
StoreInst *SI,
1171 const SCEV *BECount) {
1172 assert(
SI->isUnordered() &&
"Expected only non-volatile non-ordered stores.");
1174 Value *StorePtr =
SI->getPointerOperand();
1176 unsigned StoreSize =
DL->getTypeStoreSize(
SI->getValueOperand()->getType());
1179 LoadInst *LI = cast<LoadInst>(
SI->getValueOperand());
1189 return processLoopStoreOfLoopLoad(StorePtr, LoadPtr, StoreSizeSCEV,
1191 StoreEv, LoadEv, BECount);
1195class MemmoveVerifier {
1197 explicit MemmoveVerifier(
const Value &LoadBasePtr,
const Value &StoreBasePtr,
1200 LoadBasePtr.stripPointerCasts(), LoadOff,
DL)),
1202 StoreBasePtr.stripPointerCasts(), StoreOff,
DL)),
1203 IsSameObject(BP1 == BP2) {}
1205 bool loadAndStoreMayFormMemmove(
unsigned StoreSize,
bool IsNegStride,
1207 bool IsMemCpy)
const {
1211 if ((!IsNegStride && LoadOff <= StoreOff) ||
1212 (IsNegStride && LoadOff >= StoreOff))
1218 DL.getTypeSizeInBits(TheLoad.
getType()).getFixedValue() / 8;
1219 if (BP1 != BP2 || LoadSize != int64_t(StoreSize))
1221 if ((!IsNegStride && LoadOff < StoreOff + int64_t(StoreSize)) ||
1222 (IsNegStride && LoadOff + LoadSize > StoreOff))
1230 int64_t LoadOff = 0;
1231 int64_t StoreOff = 0;
1236 const bool IsSameObject;
1240bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
1249 if (isa<MemCpyInlineInst>(TheStore))
1261 bool Changed =
false;
1264 Type *IntIdxTy =
Builder.getIntNTy(
DL->getIndexSizeInBits(StrAS));
1267 const SCEVConstant *ConstStoreSize = dyn_cast<SCEVConstant>(StoreSizeSCEV);
1270 assert(ConstStoreSize &&
"store size is expected to be a constant");
1273 bool IsNegStride = StoreSize == -Stride;
1286 Value *StoreBasePtr = Expander.expandCodeFor(
1299 IgnoredInsts.
insert(TheStore);
1301 bool IsMemCpy = isa<MemCpyInst>(TheStore);
1302 const StringRef InstRemark = IsMemCpy ?
"memcpy" :
"load and store";
1304 bool LoopAccessStore =
1306 StoreSizeSCEV, *AA, IgnoredInsts);
1307 if (LoopAccessStore) {
1313 IgnoredInsts.
insert(TheLoad);
1315 BECount, StoreSizeSCEV, *AA, IgnoredInsts)) {
1319 <<
ore::NV(
"Inst", InstRemark) <<
" in "
1321 <<
" function will not be hoisted: "
1322 <<
ore::NV(
"Reason",
"The loop may access store location");
1326 IgnoredInsts.
erase(TheLoad);
1339 Value *LoadBasePtr = Expander.expandCodeFor(
1344 MemmoveVerifier
Verifier(*LoadBasePtr, *StoreBasePtr, *
DL);
1345 if (IsMemCpy && !
Verifier.IsSameObject)
1346 IgnoredInsts.
erase(TheStore);
1348 StoreSizeSCEV, *AA, IgnoredInsts)) {
1351 <<
ore::NV(
"Inst", InstRemark) <<
" in "
1353 <<
" function will not be hoisted: "
1354 <<
ore::NV(
"Reason",
"The loop may access load location");
1359 bool UseMemMove = IsMemCpy ?
Verifier.IsSameObject : LoopAccessStore;
1361 if (!
Verifier.loadAndStoreMayFormMemmove(StoreSize, IsNegStride, *TheLoad,
1365 if (avoidLIRForMultiBlockLoop())
1370 const SCEV *NumBytesS =
1371 getNumBytes(BECount, IntIdxTy, StoreSizeSCEV, CurLoop,
DL, SE);
1374 Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->
getTerminator());
1378 AATags = AATags.
merge(StoreAATags);
1379 if (
auto CI = dyn_cast<ConstantInt>(NumBytes))
1380 AATags = AATags.
extendTo(CI->getZExtValue());
1390 NewCall =
Builder.CreateMemMove(
1391 StoreBasePtr, StoreAlign, LoadBasePtr, LoadAlign, NumBytes,
1395 Builder.CreateMemCpy(StoreBasePtr, StoreAlign, LoadBasePtr, LoadAlign,
1396 NumBytes,
false, AATags.
TBAA,
1404 assert((StoreAlign && LoadAlign) &&
1405 "Expect unordered load/store to have align.");
1406 if (*StoreAlign < StoreSize || *LoadAlign < StoreSize)
1419 NewCall =
Builder.CreateElementUnorderedAtomicMemCpy(
1420 StoreBasePtr, *StoreAlign, LoadBasePtr, *LoadAlign, NumBytes, StoreSize,
1426 MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
1428 MSSAU->insertDef(cast<MemoryDef>(NewMemAcc),
true);
1432 <<
" from load ptr=" << *LoadEv <<
" at: " << *TheLoad
1434 <<
" from store ptr=" << *StoreEv <<
" at: " << *TheStore
1440 <<
"Formed a call to "
1442 <<
"() intrinsic from " <<
ore::NV(
"Inst", InstRemark)
1453 MSSAU->removeMemoryAccess(TheStore,
true);
1456 MSSAU->getMemorySSA()->verifyMemorySSA();
1461 ExpCleaner.markResultUsed();
1468bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(
bool IsMemset,
1469 bool IsLoopMemset) {
1470 if (ApplyCodeSizeHeuristics && CurLoop->
getNumBlocks() > 1) {
1471 if (CurLoop->
isOutermost() && (!IsMemset || !IsLoopMemset)) {
1473 <<
" : LIR " << (IsMemset ?
"Memset" :
"Memcpy")
1474 <<
" avoided: multi-block top-level loop\n");
1482bool LoopIdiomRecognize::runOnNoncountableLoop() {
1485 <<
"] Noncountable Loop %"
1488 return recognizePopcount() || recognizeAndInsertFFS() ||
1489 recognizeShiftUntilBitTest() || recognizeShiftUntilZero();
1499 bool JmpOnZero =
false) {
1508 if (!CmpZero || !CmpZero->
isZero())
1519 return Cond->getOperand(0);
1528 auto *PhiX = dyn_cast<PHINode>(VarX);
1529 if (PhiX && PhiX->getParent() == LoopEntry &&
1530 (PhiX->getOperand(0) == DefX || PhiX->
getOperand(1) == DefX))
1566 Value *VarX1, *VarX0;
1569 DefX2 = CountInst =
nullptr;
1570 VarX1 = VarX0 =
nullptr;
1571 PhiX = CountPhi =
nullptr;
1577 dyn_cast<BranchInst>(LoopEntry->
getTerminator()), LoopEntry))
1578 DefX2 = dyn_cast<Instruction>(
T);
1585 if (!DefX2 || DefX2->
getOpcode() != Instruction::And)
1590 if ((SubOneOp = dyn_cast<BinaryOperator>(DefX2->
getOperand(0))))
1594 SubOneOp = dyn_cast<BinaryOperator>(DefX2->
getOperand(1));
1596 if (!SubOneOp || SubOneOp->
getOperand(0) != VarX1)
1602 (SubOneOp->
getOpcode() == Instruction::Add &&
1615 CountInst =
nullptr;
1618 if (Inst.
getOpcode() != Instruction::Add)
1622 if (!Inc || !Inc->
isOne())
1630 bool LiveOutLoop =
false;
1632 if ((cast<Instruction>(U))->
getParent() != LoopEntry) {
1652 auto *PreCondBr = dyn_cast<BranchInst>(PreCondBB->
getTerminator());
1657 CntInst = CountInst;
1697 Value *VarX =
nullptr;
1706 dyn_cast<BranchInst>(LoopEntry->
getTerminator()), LoopEntry))
1707 DefX = dyn_cast<Instruction>(
T);
1712 if (!DefX || !DefX->
isShift())
1714 IntrinID = DefX->
getOpcode() == Instruction::Shl ? Intrinsic::cttz :
1717 if (!Shft || !Shft->
isOne())
1742 if (Inst.
getOpcode() != Instruction::Add)
1766bool LoopIdiomRecognize::recognizeAndInsertFFS() {
1778 size_t IdiomCanonicalSize = 6;
1781 CntInst, CntPhi, DefX))
1784 bool IsCntPhiUsedOutsideLoop =
false;
1786 if (!CurLoop->
contains(cast<Instruction>(U))) {
1787 IsCntPhiUsedOutsideLoop =
true;
1790 bool IsCntInstUsedOutsideLoop =
false;
1792 if (!CurLoop->
contains(cast<Instruction>(U))) {
1793 IsCntInstUsedOutsideLoop =
true;
1798 if (IsCntInstUsedOutsideLoop && IsCntPhiUsedOutsideLoop)
1804 bool ZeroCheck =
false;
1813 if (!IsCntPhiUsedOutsideLoop) {
1817 auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
1842 std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end());
1847 if (HeaderSize != IdiomCanonicalSize &&
1851 transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
1853 IsCntPhiUsedOutsideLoop);
1861bool LoopIdiomRecognize::recognizePopcount() {
1875 if (LoopBody->
size() >= 20) {
1885 if (!EntryBI || EntryBI->isConditional())
1893 auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
1894 if (!PreCondBI || PreCondBI->isUnconditional())
1903 transformLoopToPopcount(PreCondBB, CntInst, CntPhi, Val);
1909 Value *Ops[] = {Val};
1965void LoopIdiomRecognize::transformLoopToCountable(
1968 bool ZeroCheck,
bool IsCntPhiUsedOutsideLoop) {
1982 if (IsCntPhiUsedOutsideLoop) {
1983 if (DefX->
getOpcode() == Instruction::AShr)
1984 InitXNext =
Builder.CreateAShr(InitX, 1);
1985 else if (DefX->
getOpcode() == Instruction::LShr)
1986 InitXNext =
Builder.CreateLShr(InitX, 1);
1987 else if (DefX->
getOpcode() == Instruction::Shl)
1988 InitXNext =
Builder.CreateShl(InitX, 1);
1998 Value *NewCount = Count;
1999 if (IsCntPhiUsedOutsideLoop)
2002 NewCount =
Builder.CreateZExtOrTrunc(NewCount, CntInst->
getType());
2005 if (cast<ConstantInt>(CntInst->
getOperand(1))->isOne()) {
2008 ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
2009 if (!InitConst || !InitConst->
isZero())
2010 NewCount =
Builder.CreateAdd(NewCount, CntInitVal);
2014 NewCount =
Builder.CreateSub(CntInitVal, NewCount);
2027 ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
2032 Builder.SetInsertPoint(LbCond);
2047 if (IsCntPhiUsedOutsideLoop)
2057void LoopIdiomRecognize::transformLoopToPopcount(
BasicBlock *PreCondBB,
2061 auto *PreCondBr = cast<BranchInst>(PreCondBB->
getTerminator());
2070 Value *PopCnt, *PopCntZext, *NewCount, *TripCnt;
2073 NewCount = PopCntZext =
2074 Builder.CreateZExtOrTrunc(PopCnt, cast<IntegerType>(CntPhi->
getType()));
2076 if (NewCount != PopCnt)
2077 (cast<Instruction>(NewCount))->setDebugLoc(
DL);
2084 ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
2085 if (!InitConst || !InitConst->
isZero()) {
2086 NewCount =
Builder.CreateAdd(NewCount, CntInitVal);
2087 (cast<Instruction>(NewCount))->setDebugLoc(
DL);
2096 ICmpInst *PreCond = cast<ICmpInst>(PreCondBr->getCondition());
2098 Value *Opnd0 = PopCntZext;
2103 ICmpInst *NewPreCond = cast<ICmpInst>(
2105 PreCondBr->setCondition(NewPreCond);
2133 ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
2139 Builder.SetInsertPoint(LbCond);
2142 "tcdec",
false,
true));
2170 : SubPattern(SP), L(L) {}
2172 template <
typename ITy>
bool match(ITy *V) {
2173 return L->isLoopInvariant(V) && SubPattern.match(V);
2178template <
typename Ty>
2209 " Performing shift-until-bittest idiom detection.\n");
2219 assert(LoopPreheaderBB &&
"There is always a loop preheader.");
2221 using namespace PatternMatch;
2226 Value *CmpLHS, *CmpRHS;
2237 auto MatchVariableBitMask = [&]() {
2246 auto MatchConstantBitMask = [&]() {
2252 auto MatchDecomposableConstantBitMask = [&]() {
2260 if (!MatchVariableBitMask() && !MatchConstantBitMask() &&
2261 !MatchDecomposableConstantBitMask()) {
2267 auto *CurrXPN = dyn_cast<PHINode>(CurrX);
2268 if (!CurrXPN || CurrXPN->getParent() != LoopHeaderBB) {
2273 BaseX = CurrXPN->getIncomingValueForBlock(LoopPreheaderBB);
2275 dyn_cast<Instruction>(CurrXPN->getIncomingValueForBlock(LoopHeaderBB));
2278 "Expected BaseX to be avaliable in the preheader!");
2289 "Should only get equality predicates here.");
2299 if (TrueBB != LoopHeaderBB) {
2358bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
2359 bool MadeChange =
false;
2361 Value *
X, *BitMask, *BitPos, *XCurr;
2366 " shift-until-bittest idiom detection failed.\n");
2376 assert(LoopPreheaderBB &&
"There is always a loop preheader.");
2379 assert(SuccessorBB &&
"There is only a single successor.");
2385 Type *Ty =
X->getType();
2399 " Intrinsic is too costly, not beneficial\n");
2415 if (
auto *BitPosI = dyn_cast<Instruction>(BitPos))
2424 return U.getUser() != BitPosFrozen;
2426 BitPos = BitPosFrozen;
2432 BitPos->
getName() +
".lowbitmask");
2434 Builder.CreateOr(LowBitMask, BitMask, BitPos->
getName() +
".mask");
2435 Value *XMasked =
Builder.CreateAnd(
X, Mask,
X->getName() +
".masked");
2437 IntrID, Ty, {XMasked,
Builder.getTrue()},
2438 nullptr, XMasked->
getName() +
".numleadingzeros");
2441 XMasked->
getName() +
".numactivebits",
true,
2443 Value *XMaskedLeadingOnePos =
2445 XMasked->
getName() +
".leadingonepos",
false,
2449 BitPos, XMaskedLeadingOnePos, CurLoop->
getName() +
".backedgetakencount",
2453 Value *LoopTripCount =
2455 CurLoop->
getName() +
".tripcount",
true,
2464 if (
auto *
I = dyn_cast<Instruction>(NewX))
2465 I->copyIRFlags(XNext,
true);
2477 NewXNext =
Builder.CreateShl(
X, LoopTripCount);
2486 if (
auto *
I = dyn_cast<Instruction>(NewXNext))
2487 I->copyIRFlags(XNext,
true);
2498 Builder.SetInsertPoint(LoopHeaderBB, LoopHeaderBB->
begin());
2506 true, Bitwidth != 2);
2509 auto *IVCheck =
Builder.CreateICmpEQ(IVNext, LoopTripCount,
2510 CurLoop->
getName() +
".ivcheck");
2511 Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
2516 IV->addIncoming(IVNext, LoopHeaderBB);
2527 ++NumShiftUntilBitTest;
2563 const SCEV *&ExtraOffsetExpr,
2564 bool &InvertedCond) {
2566 " Performing shift-until-zero idiom detection.\n");
2579 assert(LoopPreheaderBB &&
"There is always a loop preheader.");
2581 using namespace PatternMatch;
2590 !
match(ValShiftedIsZero,
2604 IntrinID = ValShifted->
getOpcode() == Instruction::Shl ? Intrinsic::cttz
2613 else if (
match(NBits,
2617 ExtraOffsetExpr = SE->
getSCEV(ExtraOffset);
2624 auto *IVPN = dyn_cast<PHINode>(
IV);
2625 if (!IVPN || IVPN->getParent() != LoopHeaderBB) {
2630 Start = IVPN->getIncomingValueForBlock(LoopPreheaderBB);
2631 IVNext = dyn_cast<Instruction>(IVPN->getIncomingValueForBlock(LoopHeaderBB));
2641 "Should only get equality predicates here.");
2652 if (FalseBB != LoopHeaderBB) {
2663 if (ValShifted->
getOpcode() == Instruction::AShr &&
2727bool LoopIdiomRecognize::recognizeShiftUntilZero() {
2728 bool MadeChange =
false;
2734 const SCEV *ExtraOffsetExpr;
2737 Start, Val, ExtraOffsetExpr, InvertedCond)) {
2739 " shift-until-zero idiom detection failed.\n");
2749 assert(LoopPreheaderBB &&
"There is always a loop preheader.");
2752 assert(SuccessorBB &&
"There is only a single successor.");
2755 Builder.SetCurrentDebugLocation(
IV->getDebugLoc());
2771 " Intrinsic is too costly, not beneficial\n");
2778 bool OffsetIsZero =
false;
2779 if (
auto *ExtraOffsetExprC = dyn_cast<SCEVConstant>(ExtraOffsetExpr))
2780 OffsetIsZero = ExtraOffsetExprC->isZero();
2785 IntrID, Ty, {Val,
Builder.getFalse()},
2786 nullptr, Val->
getName() +
".numleadingzeros");
2789 Val->
getName() +
".numactivebits",
true,
2793 Expander.setInsertPoint(&*
Builder.GetInsertPoint());
2794 Value *ExtraOffset = Expander.expandCodeFor(ExtraOffsetExpr);
2797 ValNumActiveBits, ExtraOffset, ValNumActiveBits->
getName() +
".offset",
2798 OffsetIsZero,
true);
2799 Value *IVFinal =
Builder.CreateIntrinsic(Intrinsic::smax, {Ty},
2800 {ValNumActiveBitsOffset, Start},
2801 nullptr,
"iv.final");
2803 auto *LoopBackedgeTakenCount = cast<Instruction>(
Builder.CreateSub(
2804 IVFinal, Start, CurLoop->
getName() +
".backedgetakencount",
2805 OffsetIsZero,
true));
2809 Value *LoopTripCount =
2811 CurLoop->
getName() +
".tripcount",
true,
2817 IV->replaceUsesOutsideBlock(IVFinal, LoopHeaderBB);
2822 Builder.SetInsertPoint(LoopHeaderBB, LoopHeaderBB->
begin());
2823 auto *CIV =
Builder.CreatePHI(Ty, 2, CurLoop->
getName() +
".iv");
2829 true, Bitwidth != 2);
2832 auto *CIVCheck =
Builder.CreateICmpEQ(CIVNext, LoopTripCount,
2833 CurLoop->
getName() +
".ivcheck");
2834 auto *NewIVCheck = CIVCheck;
2836 NewIVCheck =
Builder.CreateNot(CIVCheck);
2837 NewIVCheck->takeName(ValShiftedIsZero);
2841 auto *IVDePHId =
Builder.CreateAdd(CIV, Start,
"",
false,
2843 IVDePHId->takeName(
IV);
2847 Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);
2852 CIV->addIncoming(CIVNext, LoopHeaderBB);
2860 IV->replaceAllUsesWith(IVDePHId);
2861 IV->eraseFromParent();
2870 ++NumShiftUntilZero;
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements a class to represent arbitrary precision integral constant values and operations...
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
This file defines the DenseMap class.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, const SCEV *BECount, unsigned StoreSize, AliasAnalysis &AA, SmallPtrSetImpl< Instruction * > &Ignored)
mayLoopAccessLocation - Return true if the specified loop might access the specified pointer location...
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
static Value * matchCondition(BranchInst *BI, BasicBlock *LoopEntry, bool JmpOnZero=false)
Check if the given conditional branch is based on the comparison between a variable and zero,...
static PHINode * getRecurrenceVar(Value *VarX, Instruction *DefX, BasicBlock *LoopEntry)
static cl::opt< bool, true > DisableLIRPMemset("disable-" DEBUG_TYPE "-memset", cl::desc("Proceed with loop idiom recognize pass, but do " "not convert loop(s) to memset."), cl::location(DisableLIRP::Memset), cl::init(false), cl::ReallyHidden)
static cl::opt< bool > UseLIRCodeSizeHeurs("use-lir-code-size-heurs", cl::desc("Use loop idiom recognition code size heuristics when compiling" "with -Os/-Oz"), cl::init(true), cl::Hidden)
static CallInst * createFFSIntrinsic(IRBuilder<> &IRBuilder, Value *Val, const DebugLoc &DL, bool ZeroCheck, Intrinsic::ID IID)
static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX, Value *&BitMask, Value *&BitPos, Value *&CurrX, Instruction *&NextX)
Return true if the idiom is detected in the loop.
static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB, Instruction *&CntInst, PHINode *&CntPhi, Value *&Var)
Return true iff the idiom is detected in the loop.
static Constant * getMemSetPatternValue(Value *V, const DataLayout *DL)
getMemSetPatternValue - If a strided store of the specified value is safe to turn into a memset_patte...
static cl::opt< bool, true > DisableLIRPMemcpy("disable-" DEBUG_TYPE "-memcpy", cl::desc("Proceed with loop idiom recognize pass, but do " "not convert loop(s) to memcpy."), cl::location(DisableLIRP::Memcpy), cl::init(false), cl::ReallyHidden)
static CallInst * createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val, const DebugLoc &DL)
static const SCEV * getNumBytes(const SCEV *BECount, Type *IntPtr, const SCEV *StoreSizeSCEV, Loop *CurLoop, const DataLayout *DL, ScalarEvolution *SE)
Compute the number of bytes as a SCEV from the backedge taken count.
static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL, Intrinsic::ID &IntrinID, Value *&InitX, Instruction *&CntInst, PHINode *&CntPhi, Instruction *&DefX)
Return true if the idiom is detected in the loop.
static const SCEV * getStartForNegStride(const SCEV *Start, const SCEV *BECount, Type *IntPtr, const SCEV *StoreSizeSCEV, ScalarEvolution *SE)
static APInt getStoreStride(const SCEVAddRecExpr *StoreEv)
match_LoopInvariant< Ty > m_LoopInvariant(const Ty &M, const Loop *L)
Matches if the value is loop-invariant.
static cl::opt< bool, true > DisableLIRPAll("disable-" DEBUG_TYPE "-all", cl::desc("Options to disable Loop Idiom Recognize Pass."), cl::location(DisableLIRP::All), cl::init(false), cl::ReallyHidden)
static void deleteDeadInstruction(Instruction *I)
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Module.h This file contains the declarations for the Module class.
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
This header defines various interfaces for pass management in LLVM.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
verify safepoint Safepoint IR Verifier
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
static const uint32_t IV[8]
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
unsigned getBitWidth() const
Return the number of bits in the APInt.
int64_t getSExtValue() const
Get sign extended value.
A container for analyses that lazily runs them and caches their results.
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
InstListType::const_iterator getFirstNonPHIIt() const
Iterator returning form of getFirstNonPHI.
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const Instruction & front() const
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
const_iterator getFirstNonPHIOrDbgOrAlloca() const
Returns an iterator to the first instruction in this block that is not a PHINode, a debug intrinsic,...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
BinaryOps getOpcode() const
Conditional or Unconditional Branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
This class represents a function call, abstracting a target machine's calling convention.
void setPredicate(Predicate P)
Set the predicate for this instruction to the specified value.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLE
signed less or equal
@ ICMP_UGT
unsigned greater than
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Predicate getPredicate() const
Return the predicate for this instruction.
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static Constant * getExactLogBase2(Constant *C)
If C is a scalar/fixed width vector of known powers of 2, then this function returns a new scalar/fix...
This is the shared class of boolean and integer constants.
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static ConstantInt * getBool(LLVMContext &Context, bool V)
This is an important base class in LLVM.
static Constant * getAllOnesValue(Type *Ty)
A parsed version of the target data layout string in and methods for querying it.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This class represents a freeze function that returns random concrete value if an operand is either a ...
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalObject.
void setUnnamedAddr(UnnamedAddr Val)
Module * getParent()
Get the module that this global value is contained inside of...
@ PrivateLinkage
Like Internal, but omit from symbol table.
This instruction compares its operands according to the predicate given to the constructor.
bool isEquality() const
Return true if this predicate is either EQ or NE.
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
BasicBlock * GetInsertBlock() const
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
const BasicBlock * getParent() const
const Function * getFunction() const
Return the function this instruction belongs to.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Instruction * getInsertionPointAfterDef()
Get the first insertion point at which the result of this instruction is defined.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
An instruction for reading from memory.
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Align getAlign() const
Return the alignment of the access that is being performed.
static LocationSize precise(uint64_t Value)
static constexpr LocationSize afterPointer()
Any location after the base pointer (but still within the underlying object).
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
bool isOutermost() const
Return true if the loop does not have a parent (natural) loop.
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
BlockT * getHeader() const
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
void getUniqueExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all unique successor blocks of this loop.
block_iterator block_begin() const
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Represents a single loop in the control flow graph.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
StringRef getName() const
This class implements a map that also provides access to all stored values in a deterministic order.
This class wraps the llvm.memcpy intrinsic.
Value * getLength() const
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
MaybeAlign getDestAlign() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
MaybeAlign getSourceAlign() const
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it,...
Representation for a specific memory location.
An analysis that produces MemorySSA for a function.
Encapsulates MemorySSA, including all data associated with memory accesses.
A Module instance is used to store all the information related to an LLVM module.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Value * getIncomingValueForBlock(const BasicBlock *BB) const
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStart() const
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const Loop * getLoop() const
This class represents a constant integer value.
ConstantInt * getValue() const
const APInt & getAPInt() const
Helper to remove instructions inserted during SCEV expansion, unless they are marked as used.
This class uses information about analyze scalars to rewrite expressions in canonical form.
const SCEV * getOperand(unsigned i) const
This class represents an analyzed expression in the program.
bool isOne() const
Return true if the expression is a constant one.
bool isNonConstantNegative() const
Return true if the specified scev is negated, but not a constant.
The main scalar evolution driver.
bool isKnownNonNegative(const SCEV *S)
Test if the given expression is known to be non-negative.
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
const SCEV * getConstant(ConstantInt *V)
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getTripCountFromExitCount(const SCEV *ExitCount)
A version of getTripCountFromExitCount below which always picks an evaluation type which can not resu...
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count.
const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
const SCEV * getTruncateOrZeroExtend(const SCEV *V, Type *Ty, unsigned Depth=0)
Return a SCEV corresponding to a conversion of the input value to the specified type.
A vector that has set insertion semantics.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
Simple and conservative implementation of LoopSafetyInfo that can give false-positive answers to its ...
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
bool anyBlockMayThrow() const override
Returns true iff any block of the loop for which this info is contains an instruction that may throw ...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Value * getValueOperand()
Value * getPointerOperand()
StringRef - Represent a constant reference to a string, i.e.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
void replaceUsesOutsideBlock(Value *V, BasicBlock *BB)
replaceUsesOutsideBlock - Go through the uses list for this definition and make each use point to "V"...
LLVMContext & getContext() const
All values hold a context through their type.
StringRef getName() const
Return a constant reference to the value's name.
void takeName(Value *V)
Transfer the name from V to this value.
Value handle that is nullable, but tries to track the Value.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
cst_pred_ty< is_power2 > m_Power2()
Match an integer or vector power-of-2.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
cst_pred_ty< icmp_pred_with_threshold > m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold)
Match an integer or vector with every element comparing 'pred' (eg/ne/...) to Threshold.
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
DiagnosticInfoOptimizationBase::setExtraArgs setExtraArgs
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
bool inferNonMandatoryLibFuncAttrs(Module *M, StringRef Name, const TargetLibraryInfo &TLI)
Analyze the name and prototype of the given function and set any applicable attributes.
bool isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI, LibFunc TheLibFunc)
Check whether the library function is available on target and also that it in the current Module is a...
bool isMustProgress(const Loop *L)
Return true if this loop can be assumed to make progress.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isModOrRefSet(const ModRefInfo MRI)
FunctionCallee getOrInsertLibFunc(Module *M, const TargetLibraryInfo &TLI, LibFunc TheLibFunc, FunctionType *T, AttributeList AttributeList)
Calls getOrInsertFunction() and then makes sure to add mandatory argument attributes.
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
@ ModRef
The access may reference and may modify the value stored in memory.
@ Mod
The access may modify the value stored in memory.
bool VerifyMemorySSA
Enables verification of MemorySSA.
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
bool isKnownNonNegative(const Value *V, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Returns true if the give value is known to be non-negative.
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
bool decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate &Pred, Value *&X, APInt &Mask, bool LookThroughTrunc=true)
Decompose an icmp into the form ((X & Mask) pred 0) if possible.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
MDNode * TBAAStruct
The tag for type-based alias analysis (tbaa struct).
MDNode * Scope
The tag for alias scope specification (used with noalias).
MDNode * TBAA
The tag for type-based alias analysis.
AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
MDNode * NoAlias
The tag specifying the noalias scope.
AAMDNodes extendTo(ssize_t Len) const
Create a new AAMDNode that describes this AAMDNode after extending it to apply to a series of bytes o...
This struct is a compact representation of a valid (non-zero power of two) alignment.
static bool Memset
When true, Memset is disabled.
static bool All
When true, the entire pass is disabled.
static bool Memcpy
When true, Memcpy is disabled.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Match loop-invariant value.
match_LoopInvariant(const SubPattern_t &SP, const Loop *L)