95 #include "llvm/IR/IntrinsicsPowerPC.h"
114 #define DEBUG_TYPE "ppc-loop-instr-form-prep"
116 using namespace llvm;
121 cl::desc(
"Potential common base number threshold per function "
122 "for PPC loop prep"));
126 cl::desc(
"prefer update form when ds form is also a update form"));
130 cl::desc(
"prepare update form when the load/store increment is a loop "
131 "invariant non-const value."));
135 cl::desc(
"Enable chain commoning in PPC loop prepare pass."));
142 cl::desc(
"Potential PHI threshold per loop for PPC loop prep of update "
147 cl::desc(
"Potential PHI threshold per loop for PPC loop prep of DS form"));
151 cl::desc(
"Potential PHI threshold per loop for PPC loop prep of DQ form"));
162 cl::desc(
"Bucket number per loop for PPC loop chain common"));
170 cl::desc(
"Minimal common base load/store instructions triggering DS/DQ form "
175 cl::desc(
"Minimal common base load/store instructions triggering chain "
176 "commoning preparation. Must be not smaller than 4"));
178 STATISTIC(PHINodeAlreadyExistsUpdate,
"PHI node already in pre-increment form");
179 STATISTIC(PHINodeAlreadyExistsDS,
"PHI node already in DS form");
180 STATISTIC(PHINodeAlreadyExistsDQ,
"PHI node already in DQ form");
181 STATISTIC(DSFormChainRewritten,
"Num of DS form chain rewritten");
182 STATISTIC(DQFormChainRewritten,
"Num of DQ form chain rewritten");
183 STATISTIC(UpdFormChainRewritten,
"Num of update form chain rewritten");
184 STATISTIC(ChainCommoningRewritten,
"Num of commoning chains");
187 struct BucketElement {
197 : BaseSCEV(
B), Elements(1, BucketElement(
I)) {
202 const SCEV *BaseSCEV;
220 enum PrepForm { UpdateForm = 1, DSForm = 4, DQForm = 16, ChainCommoning };
250 bool HasCandidateForPrepare;
256 unsigned SuccPrepCount;
258 bool runOnLoop(
Loop *L);
262 const SCEV *BasePtrStartSCEV,
263 const SCEV *BasePtrIncSCEV, PrepForm
Form);
267 const SCEV *BasePtrIncSCEV);
273 bool prepareBasesForCommoningChains(Bucket &BucketChain);
277 rewriteLoadStoresForCommoningChains(
Loop *L, Bucket &Bucket,
287 unsigned MaxCandidateNum);
294 unsigned MaxCandidateNum);
309 bool prepareBaseForDispFormChain(Bucket &BucketChain, PrepForm
Form);
316 bool prepareBaseForUpdateFormChain(Bucket &BucketChain);
320 bool rewriteLoadStores(
Loop *L, Bucket &BucketChain,
325 std::pair<Instruction *, Instruction *>
333 rewriteForBucketElement(std::pair<Instruction *, Instruction *>
Base,
334 const BucketElement &Element,
Value *OffToBase,
341 static const char *
name =
"Prepare loop for ppc preferred instruction forms";
353 return new PPCLoopInstrFormPrep(
TM);
357 Value *StrippedBasePtr = BasePtr;
358 while (
BitCastInst *BC = dyn_cast<BitCastInst>(StrippedBasePtr))
359 StrippedBasePtr = BC->getOperand(0);
361 return GEP->isInBounds();
367 assert(
I &&
"Invalid paramater!");
369 return (
I->getName() + Suffix).str();
375 Type **PtrElementType =
nullptr) {
377 Value *PtrValue =
nullptr;
378 Type *PointerElementType =
nullptr;
380 if (
LoadInst *LMemI = dyn_cast<LoadInst>(MemI)) {
381 PtrValue = LMemI->getPointerOperand();
382 PointerElementType = LMemI->getType();
383 }
else if (
StoreInst *SMemI = dyn_cast<StoreInst>(MemI)) {
384 PtrValue = SMemI->getPointerOperand();
385 PointerElementType = SMemI->getValueOperand()->getType();
386 }
else if (
IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(MemI)) {
389 IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp) {
390 PtrValue = IMemI->getArgOperand(0);
391 }
else if (IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp) {
392 PtrValue = IMemI->getArgOperand(1);
397 *PtrElementType = PointerElementType;
406 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
407 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
408 auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
409 DT = DTWP ? &DTWP->getDomTree() :
nullptr;
410 PreserveLCSSA = mustPreserveAnalysisID(
LCSSAID);
411 ST =
TM ?
TM->getSubtargetImpl(
F) :
nullptr;
414 bool MadeChange =
false;
418 MadeChange |= runOnLoop(L);
429 bool PPCLoopInstrFormPrep::prepareBasesForCommoningChains(Bucket &CBucket) {
446 "Thredhold can not be smaller than 4!\n");
452 const SCEV *FirstOffset = CBucket.Elements[1].Offset;
457 unsigned FirstOffsetReusedCount = 1;
461 unsigned FirstOffsetReusedCountInFirstChain = 1;
463 unsigned EleNum = CBucket.Elements.size();
464 bool SawChainSeparater =
false;
465 for (
unsigned j = 2;
j != EleNum; ++
j) {
467 CBucket.Elements[
j - 1].Offset) == FirstOffset) {
468 if (!SawChainSeparater)
469 FirstOffsetReusedCountInFirstChain++;
470 FirstOffsetReusedCount++;
488 SawChainSeparater =
true;
492 if (FirstOffsetReusedCount == 1)
496 FirstOffsetReusedCount / FirstOffsetReusedCountInFirstChain;
500 if (!SawChainSeparater)
501 ChainNum = (unsigned)sqrt((
double)EleNum);
503 CBucket.ChainSize = (unsigned)(EleNum / ChainNum);
507 if (CBucket.ChainSize * ChainNum != EleNum)
510 if (SawChainSeparater) {
512 for (
unsigned i = 1;
i < CBucket.ChainSize;
i++)
513 for (
unsigned j = 1;
j < ChainNum;
j++)
514 if (CBucket.Elements[
i].Offset !=
515 SE->
getMinusSCEV(CBucket.Elements[
i +
j * CBucket.ChainSize].Offset,
516 CBucket.Elements[
j * CBucket.ChainSize].Offset))
520 for (
unsigned i = 0;
i < ChainNum;
i++)
521 CBucket.ChainBases.push_back(CBucket.Elements[
i * CBucket.ChainSize]);
528 bool PPCLoopInstrFormPrep::chainCommoning(
Loop *L,
530 bool MadeChange =
false;
537 for (
auto &Bucket : Buckets) {
538 if (prepareBasesForCommoningChains(Bucket))
539 MadeChange |= rewriteLoadStoresForCommoningChains(L, Bucket, BBChanged);
543 for (
auto *
BB : BBChanged)
548 bool PPCLoopInstrFormPrep::rewriteLoadStoresForCommoningChains(
550 bool MadeChange =
false;
552 assert(Bucket.Elements.size() ==
553 Bucket.ChainBases.size() * Bucket.ChainSize &&
554 "invalid bucket for chain commoning!\n");
561 "loopprepare-chaincommon");
563 for (
unsigned ChainIdx = 0; ChainIdx < Bucket.ChainBases.size(); ++ChainIdx) {
564 unsigned BaseElemIdx = Bucket.ChainSize * ChainIdx;
565 const SCEV *BaseSCEV =
567 Bucket.Elements[BaseElemIdx].Offset)
569 const SCEVAddRecExpr *BasePtrSCEV = cast<SCEVAddRecExpr>(BaseSCEV);
576 "Invalid SCEV type for the base ptr for a candidate chain!\n");
578 std::pair<Instruction *, Instruction *>
Base = rewriteForBase(
579 L, BasePtrSCEV, Bucket.Elements[BaseElemIdx].Instr,
580 false , ChainCommoning, SCEVE, DeletedPtrs);
590 for (
unsigned Idx = BaseElemIdx + 1; Idx < BaseElemIdx + Bucket.ChainSize;
592 BucketElement &
I = Bucket.Elements[Idx];
594 assert(Ptr &&
"No pointer operand");
595 if (NewPtrs.
count(Ptr))
598 const SCEV *OffsetSCEV =
599 BaseElemIdx ? SE->
getMinusSCEV(Bucket.Elements[Idx].Offset,
600 Bucket.Elements[BaseElemIdx].Offset)
601 : Bucket.Elements[Idx].Offset;
609 Value *OffsetValue = SCEVE.expandCodeFor(
612 Instruction *NewPtr = rewriteForBucketElement(
Base, Bucket.Elements[Idx],
613 OffsetValue, DeletedPtrs);
615 assert(NewPtr &&
"Wrong rewrite!\n");
619 ++ChainCommoningRewritten;
626 for (
auto *Ptr : DeletedPtrs) {
627 if (
Instruction *IDel = dyn_cast<Instruction>(Ptr))
628 BBChanged.
insert(IDel->getParent());
648 std::pair<Instruction *, Instruction *>
654 LLVM_DEBUG(
dbgs() <<
"PIP: Transforming: " << *BasePtrSCEV <<
"\n");
656 assert(BasePtrSCEV->
getLoop() == L &&
"AddRec for the wrong loop?");
659 assert(BasePtr &&
"No pointer operand");
664 BasePtr->getType()->getPointerAddressSpace());
666 bool IsConstantInc =
false;
668 Value *IncNode = getNodeForInc(L, BaseMemI, BasePtrIncSCEV);
671 dyn_cast<SCEVConstant>(BasePtrIncSCEV);
672 if (BasePtrIncConstantSCEV)
673 IsConstantInc =
true;
677 LLVM_DEBUG(
dbgs() <<
"Loop Increasement can not be represented!\n");
678 return std::make_pair(
nullptr,
nullptr);
684 <<
"Update form prepare for non-const increment is not enabled!\n");
685 return std::make_pair(
nullptr,
nullptr);
688 const SCEV *BasePtrStartSCEV =
nullptr;
691 "Increment is not loop invariant!\n");
693 IsConstantInc ? BasePtrIncConstantSCEV
696 BasePtrStartSCEV = BasePtrSCEV->
getStart();
698 if (alreadyPrepared(L, BaseMemI, BasePtrStartSCEV, BasePtrIncSCEV,
Form)) {
700 return std::make_pair(
nullptr,
nullptr);
703 LLVM_DEBUG(
dbgs() <<
"PIP: New start is: " << *BasePtrStartSCEV <<
"\n");
706 unsigned HeaderLoopPredCount =
pred_size(Header);
719 if (PI != LoopPredecessor)
722 NewPHI->
addIncoming(BasePtrStart, LoopPredecessor);
732 cast<GetElementPtrInst>(PtrInc)->setIsInBounds(
IsPtrInBounds(BasePtr));
734 if (PI == LoopPredecessor)
749 if (PI == LoopPredecessor)
759 cast<GetElementPtrInst>(PtrInc)->setIsInBounds(
IsPtrInBounds(BasePtr));
772 BasePtr->replaceAllUsesWith(NewBasePtr);
774 DeletedPtrs.
insert(BasePtr);
776 return std::make_pair(NewBasePtr, PtrInc);
779 Instruction *PPCLoopInstrFormPrep::rewriteForBucketElement(
780 std::pair<Instruction *, Instruction *>
Base,
const BucketElement &Element,
784 assert((NewBasePtr && PtrInc) &&
"base does not exist!\n");
789 assert(Ptr &&
"No pointer operand");
792 if (!Element.Offset ||
793 (isa<SCEVConstant>(Element.Offset) &&
794 cast<SCEVConstant>(Element.Offset)->getValue()->isZero())) {
795 RealNewPtr = NewBasePtr;
798 if (PtrIP && isa<Instruction>(NewBasePtr) &&
801 else if (PtrIP && isa<PHINode>(PtrIP))
804 PtrIP = Element.Instr;
806 assert(OffToBase &&
"There should be an offset for non base element!\n");
808 I8Ty, PtrInc, OffToBase,
822 ReplNewPtr = RealNewPtr;
830 void PPCLoopInstrFormPrep::addOneCandidate(
834 "Candidate should be a memory instruction.");
835 assert(LSCEV &&
"Invalid SCEV for Ptr value.");
837 bool FoundBucket =
false;
838 for (
auto &
B : Buckets) {
839 if (cast<SCEVAddRecExpr>(
B.BaseSCEV)->getStepRecurrence(*SE) !=
840 cast<SCEVAddRecExpr>(LSCEV)->getStepRecurrence(*SE))
843 if (isValidDiff(Diff)) {
844 B.Elements.push_back(BucketElement(Diff, MemI));
851 if (Buckets.size() == MaxCandidateNum) {
852 LLVM_DEBUG(
dbgs() <<
"Can not prepare more chains, reach maximum limit "
853 << MaxCandidateNum <<
"\n");
856 Buckets.push_back(Bucket(LSCEV, MemI));
868 for (
auto &J : *
BB) {
869 Value *PtrValue =
nullptr;
870 Type *PointerElementType =
nullptr;
884 if (!LARSCEV || LARSCEV->
getLoop() != L)
888 HasCandidateForPrepare =
true;
890 if (isValidCandidate(&J, PtrValue, PointerElementType))
891 addOneCandidate(&J, LSCEV, Buckets, isValidDiff, MaxCandidateNum);
896 bool PPCLoopInstrFormPrep::prepareBaseForDispFormChain(Bucket &BucketChain,
906 for (
unsigned j = 0, je = BucketChain.Elements.size();
j != je; ++
j) {
907 if (!BucketChain.Elements[
j].Offset)
908 RemainderOffsetInfo[0] = std::make_pair(0, 1);
910 unsigned Remainder = cast<SCEVConstant>(BucketChain.Elements[
j].Offset)
913 if (RemainderOffsetInfo.
find(Remainder) == RemainderOffsetInfo.
end())
914 RemainderOffsetInfo[Remainder] = std::make_pair(
j, 1);
916 RemainderOffsetInfo[Remainder].second++;
934 unsigned MaxCountRemainder = 0;
935 for (
unsigned j = 0;
j < (unsigned)
Form;
j++)
936 if ((RemainderOffsetInfo.
find(
j) != RemainderOffsetInfo.
end()) &&
937 RemainderOffsetInfo[
j].second >
938 RemainderOffsetInfo[MaxCountRemainder].second)
939 MaxCountRemainder =
j;
947 if (MaxCountRemainder == 0)
952 BucketChain.Elements[RemainderOffsetInfo[MaxCountRemainder].first].Offset;
953 BucketChain.BaseSCEV = SE->
getAddExpr(BucketChain.BaseSCEV, Offset);
954 for (
auto &
E : BucketChain.Elements) {
956 E.Offset = cast<SCEVConstant>(SE->
getMinusSCEV(
E.Offset, Offset));
961 std::swap(BucketChain.Elements[RemainderOffsetInfo[MaxCountRemainder].first],
962 BucketChain.Elements[0]);
972 bool PPCLoopInstrFormPrep::prepareBaseForUpdateFormChain(Bucket &BucketChain) {
982 for (
int j = 0, je = BucketChain.Elements.size();
j != je; ++
j) {
983 if (
auto *II = dyn_cast<IntrinsicInst>(BucketChain.Elements[
j].Instr))
993 if (!BucketChain.Elements[
j].Offset ||
994 cast<SCEVConstant>(BucketChain.Elements[
j].Offset)->isZero())
997 const SCEV *
Offset = BucketChain.Elements[
j].Offset;
998 BucketChain.BaseSCEV = SE->
getAddExpr(BucketChain.BaseSCEV, Offset);
999 for (
auto &
E : BucketChain.Elements) {
1001 E.Offset = cast<SCEVConstant>(SE->
getMinusSCEV(
E.Offset, Offset));
1006 std::swap(BucketChain.Elements[
j], BucketChain.Elements[0]);
1012 bool PPCLoopInstrFormPrep::rewriteLoadStores(
1015 bool MadeChange =
false;
1018 cast<SCEVAddRecExpr>(BucketChain.BaseSCEV);
1029 "loopprepare-formrewrite");
1034 bool CanPreInc = (
Form == UpdateForm ||
1035 ((
Form == DSForm) &&
1042 std::pair<Instruction *, Instruction *>
Base =
1043 rewriteForBase(L, BasePtrSCEV, BucketChain.Elements.begin()->Instr,
1044 CanPreInc,
Form, SCEVE, DeletedPtrs);
1054 for (
auto I = std::next(BucketChain.Elements.begin()),
1055 IE = BucketChain.Elements.end();
I !=
IE; ++
I) {
1057 assert(Ptr &&
"No pointer operand");
1058 if (NewPtrs.
count(Ptr))
1063 I->Offset ? cast<SCEVConstant>(
I->Offset)->getValue() :
nullptr,
1065 assert(NewPtr &&
"wrong rewrite!\n");
1073 for (
auto *Ptr : DeletedPtrs) {
1074 if (
Instruction *IDel = dyn_cast<Instruction>(Ptr))
1075 BBChanged.
insert(IDel->getParent());
1083 if (
Form == DSForm && !CanPreInc)
1084 DSFormChainRewritten++;
1085 else if (
Form == DQForm)
1086 DQFormChainRewritten++;
1087 else if (
Form == UpdateForm || (
Form == DSForm && CanPreInc))
1088 UpdFormChainRewritten++;
1093 bool PPCLoopInstrFormPrep::updateFormPrep(
Loop *L,
1095 bool MadeChange =
false;
1096 if (Buckets.empty())
1099 for (
auto &Bucket : Buckets)
1102 if (prepareBaseForUpdateFormChain(Bucket))
1103 MadeChange |= rewriteLoadStores(L, Bucket, BBChanged, UpdateForm);
1106 for (
auto *
BB : BBChanged)
1111 bool PPCLoopInstrFormPrep::dispFormPrep(
Loop *L,
1114 bool MadeChange =
false;
1116 if (Buckets.empty())
1120 for (
auto &Bucket : Buckets) {
1123 if (prepareBaseForDispFormChain(Bucket,
Form))
1124 MadeChange |= rewriteLoadStores(L, Bucket, BBChanged,
Form);
1128 for (
auto *
BB : BBChanged)
1142 const SCEV *BasePtrIncSCEV) {
1145 if (isa<SCEVConstant>(BasePtrIncSCEV))
1146 return cast<SCEVConstant>(BasePtrIncSCEV)->getValue();
1163 for (
auto &CurrentPHI : PHIIter) {
1164 PHINode *CurrentPHINode = dyn_cast<PHINode>(&CurrentPHI);
1165 if (!CurrentPHINode)
1173 const SCEVAddRecExpr *PHIBasePtrSCEV = dyn_cast<SCEVAddRecExpr>(PHISCEV);
1174 if (!PHIBasePtrSCEV)
1179 if (!PHIBasePtrIncSCEV || (PHIBasePtrIncSCEV != BasePtrIncSCEV))
1186 Value *StrippedBaseI =
I;
1187 while (
BitCastInst *BC = dyn_cast<BitCastInst>(StrippedBaseI))
1188 StrippedBaseI = BC->getOperand(0);
1190 Instruction *StrippedI = dyn_cast<Instruction>(StrippedBaseI);
1197 (StrippedI->
getOpcode() == Instruction::GetElementPtr &&
1213 bool PPCLoopInstrFormPrep::alreadyPrepared(
Loop *L,
Instruction *MemI,
1214 const SCEV *BasePtrStartSCEV,
1215 const SCEV *BasePtrIncSCEV,
1224 if (!PredBB || !LatchBB)
1229 for (
auto & CurrentPHI : PHIIter) {
1230 PHINode *CurrentPHINode = dyn_cast<PHINode>(&CurrentPHI);
1231 if (!CurrentPHINode)
1239 const SCEVAddRecExpr *PHIBasePtrSCEV = dyn_cast<SCEVAddRecExpr>(PHISCEV);
1240 if (!PHIBasePtrSCEV)
1245 if (!PHIBasePtrIncSCEV)
1253 if (PHIBasePtrIncSCEV == BasePtrIncSCEV) {
1256 if ((
Form == UpdateForm ||
Form == ChainCommoning ) &&
1257 PHIBasePtrSCEV->
getStart() == BasePtrStartSCEV) {
1258 ++PHINodeAlreadyExistsUpdate;
1261 if (
Form == DSForm ||
Form == DQForm) {
1266 ++PHINodeAlreadyExistsDS;
1268 ++PHINodeAlreadyExistsDQ;
1279 bool PPCLoopInstrFormPrep::runOnLoop(
Loop *L) {
1280 bool MadeChange =
false;
1296 if (!LoopPredecessor ||
1299 if (LoopPredecessor)
1302 if (!LoopPredecessor) {
1303 LLVM_DEBUG(
dbgs() <<
"PIP fails since no predecessor for current loop.\n");
1309 const Type *PointerElementType) {
1310 assert((PtrValue &&
I) &&
"Invalid parameter!");
1315 auto *II = dyn_cast<IntrinsicInst>(
I);
1316 if (II && ((II->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp) ||
1317 II->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp))
1327 if (!LARSCEV || LARSCEV->
getLoop() != L)
1331 const APInt &ConstInt = StepConst->getValue()->getValue();
1341 const Type *PointerElementType) {
1342 assert((PtrValue &&
I) &&
"Invalid parameter!");
1343 if (isa<IntrinsicInst>(
I))
1350 [](
const User *U) { return isa<SExtInst>(U); }));
1355 const Type *PointerElementType) {
1356 assert((PtrValue &&
I) &&
"Invalid parameter!");
1358 auto *II = dyn_cast<IntrinsicInst>(
I);
1360 return II->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp ||
1361 II->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp;
1363 return ST &&
ST->hasP9Vector() && (PointerElementType->
isVectorTy());
1370 const Type *PointerElementType) {
1382 if (isa<SCEVUnknown>(Start) && Start->getType()->isPointerTy())
1385 const SCEVAddExpr *ASCEV = dyn_cast<SCEVAddExpr>(Start);
1393 bool SawPointer =
false;
1395 if (
Op->getType()->isPointerTy()) {
1399 }
else if (!
Op->getType()->isIntegerTy())
1408 auto isValidConstantDiff = [](
const SCEV *Diff) {
1409 return dyn_cast<SCEVConstant>(Diff) !=
nullptr;
1414 auto isValidChainCommoningDiff = [](
const SCEV *Diff) {
1415 assert(Diff &&
"Invalid Diff!\n");
1418 if (isa<SCEVConstant>(Diff))
1425 const SCEVNAryExpr *ADiff = dyn_cast<SCEVNAryExpr>(Diff);
1430 if (!
Op->getType()->isIntegerTy())
1436 HasCandidateForPrepare =
false;
1445 if (!UpdateFormBuckets.empty())
1446 MadeChange |= updateFormPrep(L, UpdateFormBuckets);
1447 else if (!HasCandidateForPrepare) {
1450 <<
"No prepare candidates found, stop praparation for current loop!\n");
1462 if (!DSFormBuckets.empty())
1463 MadeChange |= dispFormPrep(L, DSFormBuckets, DSForm);
1472 if (!DQFormBuckets.empty())
1473 MadeChange |= dispFormPrep(L, DQFormBuckets, DQForm);
1485 collectCandidates(L, isChainCommoningCandidate, isValidChainCommoningDiff,
1489 if (!Buckets.empty())
1490 MadeChange |= chainCommoning(L, Buckets);