114#include <type_traits>
120#define DEBUG_TYPE "load-store-vectorizer"
122STATISTIC(NumVectorInstructions,
"Number of vector accesses generated");
123STATISTIC(NumScalarsVectorized,
"Number of scalar accesses vectorized");
133 std::tuple<
const Value * ,
139 const EqClassKey &K) {
142 <<
" of element size " << ElementSize <<
" bits in addrspace "
159 APInt OffsetFromLeader;
163void sortChainInBBOrder(Chain &
C) {
164 sort(
C, [](
auto &
A,
auto &
B) {
return A.Inst->comesBefore(
B.Inst); });
167void sortChainInOffsetOrder(Chain &
C) {
168 sort(
C, [](
const auto &
A,
const auto &
B) {
169 if (
A.OffsetFromLeader !=
B.OffsetFromLeader)
170 return A.OffsetFromLeader.slt(
B.OffsetFromLeader);
171 return A.Inst->comesBefore(
B.Inst);
176 for (
const auto &E :
C) {
177 dbgs() <<
" " << *E.Inst <<
" (offset " << E.OffsetFromLeader <<
")\n";
181using EquivalenceClassMap =
185constexpr unsigned StackAdjustedAlignment = 4;
189 for (
const ChainElem &E :
C)
195 const LoadInst *LI = dyn_cast<LoadInst>(
I);
196 return LI !=
nullptr && LI->
hasMetadata(LLVMContext::MD_invariant_load);
206 while (!Worklist.
empty()) {
209 for (
int i = 0; i < NumOperands; i++) {
211 if (!IM || IM->
getOpcode() == Instruction::PHI)
219 assert(IM !=
I &&
"Unexpected cycle while re-ordering instructions");
222 InstructionsToMove.
insert(IM);
229 for (
auto BBI =
I->getIterator(), E =
I->getParent()->end(); BBI != E;) {
231 if (!InstructionsToMove.
count(IM))
256 :
F(
F), AA(AA), AC(AC), DT(DT), SE(SE),
TTI(
TTI),
257 DL(
F.getDataLayout()), Builder(SE.getContext()) {}
262 static const unsigned MaxDepth = 3;
271 bool runOnEquivalenceClass(
const EqClassKey &EqClassKey,
277 bool runOnChain(Chain &
C);
282 std::vector<Chain> splitChainByContiguity(Chain &
C);
287 std::vector<Chain> splitChainByMayAliasInstrs(Chain &
C);
291 std::vector<Chain> splitChainByAlignment(Chain &
C);
295 bool vectorizeChain(Chain &
C);
298 std::optional<APInt> getConstantOffset(
Value *PtrA,
Value *PtrB,
301 std::optional<APInt> getConstantOffsetComplexAddrs(
Value *PtrA,
Value *PtrB,
304 std::optional<APInt> getConstantOffsetSelects(
Value *PtrA,
Value *PtrB,
311 Type *getChainElemTy(
const Chain &
C);
320 template <
bool IsLoadChain>
341class LoadStoreVectorizerLegacyPass :
public FunctionPass {
353 return "GPU Load and Store Vectorizer";
368char LoadStoreVectorizerLegacyPass::ID = 0;
371 "Vectorize load and Store instructions",
false,
false)
382 return new LoadStoreVectorizerLegacyPass();
385bool LoadStoreVectorizerLegacyPass::runOnFunction(
Function &
F) {
387 if (skipFunction(
F) ||
F.hasFnAttribute(Attribute::NoImplicitFloat))
390 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
391 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
392 ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
394 getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
397 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
399 return Vectorizer(
F, AA, AC, DT, SE,
TTI).run();
405 if (
F.hasFnAttribute(Attribute::NoImplicitFloat))
414 bool Changed = Vectorizer(
F, AA, AC, DT, SE,
TTI).
run();
420bool Vectorizer::run() {
421 bool Changed =
false;
447 for (
auto It = Barriers.
begin(),
End = std::prev(Barriers.
end()); It !=
End;
449 Changed |= runOnPseudoBB(*It, *std::next(It));
454 I->eraseFromParent();
466 dbgs() <<
"LSV: Running on pseudo-BB [" << *Begin <<
" ... ";
467 if (
End != Begin->getParent()->end())
470 dbgs() <<
"<BB end>";
474 bool Changed =
false;
475 for (
const auto &[EqClassKey, EqClass] :
476 collectEquivalenceClasses(Begin,
End))
477 Changed |= runOnEquivalenceClass(EqClassKey, EqClass);
482bool Vectorizer::runOnEquivalenceClass(
const EqClassKey &EqClassKey,
484 bool Changed =
false;
487 dbgs() <<
"LSV: Running on equivalence class of size " << EqClass.
size()
488 <<
" keyed on " << EqClassKey <<
":\n";
490 dbgs() <<
" " << *
I <<
"\n";
493 std::vector<Chain> Chains = gatherChains(EqClass);
495 <<
" nontrivial chains.\n";);
496 for (Chain &
C : Chains)
497 Changed |= runOnChain(
C);
501bool Vectorizer::runOnChain(Chain &
C) {
503 dbgs() <<
"LSV: Running on chain with " <<
C.size() <<
" instructions:\n";
513 bool Changed =
false;
514 for (
auto &
C : splitChainByMayAliasInstrs(
C))
515 for (
auto &
C : splitChainByContiguity(
C))
516 for (
auto &
C : splitChainByAlignment(
C))
517 Changed |= vectorizeChain(
C);
521std::vector<Chain> Vectorizer::splitChainByMayAliasInstrs(Chain &
C) {
525 sortChainInBBOrder(
C);
528 dbgs() <<
"LSV: splitChainByMayAliasInstrs considering chain:\n";
536 for (
const auto &E :
C)
537 ChainOffsets.insert({&*E.Inst, E.OffsetFromLeader});
550 auto Impl = [&](
auto IsLoad) {
552 auto [ChainBegin, ChainEnd] = [&](
auto IsLoad) {
553 if constexpr (IsLoad())
554 return std::make_pair(
C.begin(),
C.end());
556 return std::make_pair(
C.rbegin(),
C.rend());
558 assert(ChainBegin != ChainEnd);
560 std::vector<Chain> Chains;
563 for (
auto ChainIt = std::next(ChainBegin); ChainIt != ChainEnd; ++ChainIt) {
564 if (isSafeToMove<IsLoad>(ChainIt->Inst, NewChain.
front().Inst,
566 LLVM_DEBUG(
dbgs() <<
"LSV: No intervening may-alias instrs; can merge "
567 << *ChainIt->Inst <<
" into " << *ChainBegin->Inst
572 dbgs() <<
"LSV: Found intervening may-alias instrs; cannot merge "
573 << *ChainIt->Inst <<
" into " << *ChainBegin->Inst <<
"\n");
574 if (NewChain.
size() > 1) {
576 dbgs() <<
"LSV: got nontrivial chain without aliasing instrs:\n";
579 Chains.push_back(std::move(NewChain));
586 if (NewChain.
size() > 1) {
588 dbgs() <<
"LSV: got nontrivial chain without aliasing instrs:\n";
591 Chains.push_back(std::move(NewChain));
596 if (isa<LoadInst>(
C[0].Inst))
597 return Impl(std::bool_constant<true>());
599 assert(isa<StoreInst>(
C[0].Inst));
600 return Impl(std::bool_constant<false>());
603std::vector<Chain> Vectorizer::splitChainByContiguity(Chain &
C) {
607 sortChainInOffsetOrder(
C);
610 dbgs() <<
"LSV: splitChainByContiguity considering chain:\n";
614 std::vector<Chain>
Ret;
615 Ret.push_back({
C.front()});
617 for (
auto It = std::next(
C.begin()),
End =
C.end(); It !=
End; ++It) {
619 auto &CurChain =
Ret.back();
620 const ChainElem &Prev = CurChain.back();
622 assert(SzBits % 8 == 0 &&
"Non-byte sizes should have been filtered out by "
623 "collectEquivalenceClass");
624 APInt PrevReadEnd = Prev.OffsetFromLeader + SzBits / 8;
627 bool AreContiguous = It->OffsetFromLeader == PrevReadEnd;
629 << (AreContiguous ?
"" :
"not ") <<
"contiguous: "
630 << *Prev.Inst <<
" (ends at offset " << PrevReadEnd
631 <<
") -> " << *It->Inst <<
" (starts at offset "
632 << It->OffsetFromLeader <<
")\n");
634 CurChain.push_back(*It);
636 Ret.push_back({*It});
640 llvm::erase_if(Ret, [](
const auto &Chain) {
return Chain.size() <= 1; });
644Type *Vectorizer::getChainElemTy(
const Chain &
C) {
657 if (
any_of(
C, [](
const ChainElem &E) {
665 for (
const ChainElem &E :
C)
671std::vector<Chain> Vectorizer::splitChainByAlignment(Chain &
C) {
684 sortChainInOffsetOrder(
C);
687 dbgs() <<
"LSV: splitChainByAlignment considering chain:\n";
691 bool IsLoadChain = isa<LoadInst>(
C[0].Inst);
692 auto getVectorFactor = [&](
unsigned VF,
unsigned LoadStoreSize,
695 ChainSizeBytes, VecTy)
697 ChainSizeBytes, VecTy);
701 for (
const auto &E :
C) {
704 "Should have filtered out non-power-of-two elements in "
705 "collectEquivalenceClasses.");
712 std::vector<Chain>
Ret;
713 for (
unsigned CBegin = 0; CBegin <
C.size(); ++CBegin) {
718 for (
unsigned CEnd = CBegin + 1,
Size =
C.size(); CEnd <
Size; ++CEnd) {
719 APInt Sz =
C[CEnd].OffsetFromLeader +
721 C[CBegin].OffsetFromLeader;
722 if (Sz.
sgt(VecRegBytes))
724 CandidateChains.push_back(
729 for (
auto It = CandidateChains.rbegin(),
End = CandidateChains.rend();
731 auto [CEnd, SizeBytes] = *It;
733 dbgs() <<
"LSV: splitChainByAlignment considering candidate chain ["
734 << *
C[CBegin].Inst <<
" ... " << *
C[CEnd].Inst <<
"]\n");
736 Type *VecElemTy = getChainElemTy(
C);
740 unsigned VecElemBits =
DL.getTypeSizeInBits(VecElemTy);
743 assert((8 * SizeBytes) % VecElemBits == 0);
744 unsigned NumVecElems = 8 * SizeBytes / VecElemBits;
746 unsigned VF = 8 * VecRegBytes / VecElemBits;
749 unsigned TargetVF = getVectorFactor(VF, VecElemBits,
750 VecElemBits * NumVecElems / 8, VecTy);
751 if (TargetVF != VF && TargetVF < NumVecElems) {
753 dbgs() <<
"LSV: splitChainByAlignment discarding candidate chain "
755 << TargetVF <<
" != VF=" << VF
756 <<
" and TargetVF < NumVecElems=" << NumVecElems <<
"\n");
764 auto IsAllowedAndFast = [&, SizeBytes = SizeBytes, &
TTI =
TTI,
766 if (Alignment.value() % SizeBytes == 0)
768 unsigned VectorizedSpeed = 0;
770 F.getContext(), SizeBytes * 8, AS, Alignment, &VectorizedSpeed);
771 if (!AllowsMisaligned) {
773 <<
"LSV: Access of " << SizeBytes <<
"B in addrspace "
774 << AS <<
" with alignment " << Alignment.value()
775 <<
" is misaligned, and therefore can't be vectorized.\n");
779 unsigned ElementwiseSpeed = 0;
780 (
TTI).allowsMisalignedMemoryAccesses((
F).getContext(), VecElemBits, AS,
781 Alignment, &ElementwiseSpeed);
782 if (VectorizedSpeed < ElementwiseSpeed) {
784 <<
"LSV: Access of " << SizeBytes <<
"B in addrspace "
785 << AS <<
" with alignment " << Alignment.value()
786 <<
" has relative speed " << VectorizedSpeed
787 <<
", which is lower than the elementwise speed of "
789 <<
". Therefore this access won't be vectorized.\n");
805 bool IsAllocaAccess = AS ==
DL.getAllocaAddrSpace() &&
808 Align PrefAlign =
Align(StackAdjustedAlignment);
809 if (IsAllocaAccess && Alignment.
value() % SizeBytes != 0 &&
810 IsAllowedAndFast(PrefAlign)) {
812 PtrOperand, PrefAlign,
DL,
C[CBegin].Inst,
nullptr, &DT);
813 if (NewAlign >= Alignment) {
815 <<
"LSV: splitByChain upgrading alloca alignment from "
816 << Alignment.
value() <<
" to " << NewAlign.
value()
818 Alignment = NewAlign;
822 if (!IsAllowedAndFast(Alignment)) {
824 dbgs() <<
"LSV: splitChainByAlignment discarding candidate chain "
825 "because its alignment is not AllowedAndFast: "
826 << Alignment.
value() <<
"\n");
835 dbgs() <<
"LSV: splitChainByAlignment discarding candidate chain "
836 "because !isLegalToVectorizeLoad/StoreChain.");
841 Chain &NewChain =
Ret.emplace_back();
842 for (
unsigned I = CBegin;
I <= CEnd; ++
I)
843 NewChain.push_back(
C[
I]);
851bool Vectorizer::vectorizeChain(Chain &
C) {
855 sortChainInOffsetOrder(
C);
858 dbgs() <<
"LSV: Vectorizing chain of " <<
C.size() <<
" instructions:\n";
862 Type *VecElemTy = getChainElemTy(
C);
863 bool IsLoadChain = isa<LoadInst>(
C[0].Inst);
865 unsigned ChainBytes = std::accumulate(
866 C.begin(),
C.end(), 0u, [&](
unsigned Bytes,
const ChainElem &E) {
867 return Bytes + DL.getTypeStoreSize(getLoadStoreType(E.Inst));
869 assert(ChainBytes %
DL.getTypeStoreSize(VecElemTy) == 0);
873 VecElemTy, 8 * ChainBytes /
DL.getTypeSizeInBits(VecElemTy));
878 if (AS ==
DL.getAllocaAddrSpace()) {
879 Alignment = std::max(
887 for (
const ChainElem &E :
C)
889 DL.getTypeStoreSize(VecElemTy));
896 Builder.SetInsertPoint(
898 return A.Inst->comesBefore(
B.Inst);
903 VecInst = Builder.CreateAlignedLoad(VecTy,
908 for (
const ChainElem &E :
C) {
912 if (
auto *VT = dyn_cast<FixedVectorType>(
T)) {
913 auto Mask = llvm::to_vector<8>(
914 llvm::seq<int>(VecIdx, VecIdx + VT->getNumElements()));
915 V = Builder.CreateShuffleVector(VecInst, Mask,
I->getName());
916 VecIdx += VT->getNumElements();
918 V = Builder.CreateExtractElement(VecInst, Builder.getInt32(VecIdx),
922 if (
V->getType() !=
I->getType())
923 V = Builder.CreateBitOrPointerCast(V,
I->getType());
924 I->replaceAllUsesWith(V);
950 return A.Inst->comesBefore(
B.Inst);
956 auto InsertElem = [&](
Value *
V) {
957 if (
V->getType() != VecElemTy)
958 V = Builder.CreateBitOrPointerCast(V, VecElemTy);
959 Vec = Builder.CreateInsertElement(Vec, V, Builder.getInt32(VecIdx++));
961 for (
const ChainElem &E :
C) {
962 auto I = cast<StoreInst>(E.Inst);
965 for (
int J = 0, JE = VT->getNumElements(); J < JE; ++J) {
966 InsertElem(Builder.CreateExtractElement(
I->getValueOperand(),
967 Builder.getInt32(J)));
970 InsertElem(
I->getValueOperand());
976 VecInst = Builder.CreateAlignedStore(
984 for (
const ChainElem &E :
C)
985 ToErase.push_back(E.Inst);
987 ++NumVectorInstructions;
988 NumScalarsVectorized +=
C.size();
992template <
bool IsLoadChain>
993bool Vectorizer::isSafeToMove(
997 << *ChainBegin <<
")\n");
999 assert(isa<LoadInst>(ChainElem) == IsLoadChain);
1000 if (ChainElem == ChainBegin)
1005 if (isInvariantLoad(ChainElem))
1008 auto BBIt = std::next([&] {
1009 if constexpr (IsLoadChain)
1014 auto BBItEnd = std::next([&] {
1015 if constexpr (IsLoadChain)
1021 const APInt &ChainElemOffset = ChainOffsets.
at(ChainElem);
1022 const unsigned ChainElemSize =
1025 for (; BBIt != BBItEnd; ++BBIt) {
1028 if (!
I->mayReadOrWriteMemory())
1032 if (IsLoadChain && isa<LoadInst>(
I))
1036 if (!IsLoadChain && isInvariantLoad(
I))
1045 if (
auto OffsetIt = ChainOffsets.
find(
I); OffsetIt != ChainOffsets.
end()) {
1052 const APInt &IOffset = OffsetIt->second;
1054 if (IOffset == ChainElemOffset ||
1055 (IOffset.
sle(ChainElemOffset) &&
1056 (IOffset + IElemSize).sgt(ChainElemOffset)) ||
1057 (ChainElemOffset.
sle(IOffset) &&
1058 (ChainElemOffset + ChainElemSize).sgt(OffsetIt->second))) {
1064 dbgs() <<
"LSV: Found alias in chain: " << *
I <<
"\n";
1076 <<
" Aliasing instruction:\n"
1077 <<
" " << *
I <<
'\n'
1078 <<
" Aliased instruction and pointer:\n"
1079 <<
" " << *ChainElem <<
'\n'
1097 unsigned MatchingOpIdxB,
bool Signed) {
1098 LLVM_DEBUG(
dbgs() <<
"LSV: checkIfSafeAddSequence IdxDiff=" << IdxDiff
1099 <<
", AddOpA=" << *AddOpA <<
", MatchingOpIdxA="
1100 << MatchingOpIdxA <<
", AddOpB=" << *AddOpB
1101 <<
", MatchingOpIdxB=" << MatchingOpIdxB
1102 <<
", Signed=" <<
Signed <<
"\n");
1118 AddOpB->
getOpcode() == Instruction::Add &&
1122 Value *OtherOperandA = AddOpA->
getOperand(MatchingOpIdxA == 1 ? 0 : 1);
1123 Value *OtherOperandB = AddOpB->
getOperand(MatchingOpIdxB == 1 ? 0 : 1);
1124 Instruction *OtherInstrA = dyn_cast<Instruction>(OtherOperandA);
1125 Instruction *OtherInstrB = dyn_cast<Instruction>(OtherOperandB);
1127 if (OtherInstrB && OtherInstrB->
getOpcode() == Instruction::Add &&
1129 isa<ConstantInt>(OtherInstrB->
getOperand(1))) {
1131 cast<ConstantInt>(OtherInstrB->
getOperand(1))->getSExtValue();
1132 if (OtherInstrB->
getOperand(0) == OtherOperandA &&
1137 if (OtherInstrA && OtherInstrA->
getOpcode() == Instruction::Add &&
1139 isa<ConstantInt>(OtherInstrA->
getOperand(1))) {
1141 cast<ConstantInt>(OtherInstrA->
getOperand(1))->getSExtValue();
1142 if (OtherInstrA->
getOperand(0) == OtherOperandB &&
1148 if (OtherInstrA && OtherInstrB &&
1149 OtherInstrA->
getOpcode() == Instruction::Add &&
1150 OtherInstrB->
getOpcode() == Instruction::Add &&
1153 isa<ConstantInt>(OtherInstrA->
getOperand(1)) &&
1154 isa<ConstantInt>(OtherInstrB->
getOperand(1))) {
1156 cast<ConstantInt>(OtherInstrA->
getOperand(1))->getSExtValue();
1158 cast<ConstantInt>(OtherInstrB->
getOperand(1))->getSExtValue();
1167std::optional<APInt> Vectorizer::getConstantOffsetComplexAddrs(
1169 LLVM_DEBUG(
dbgs() <<
"LSV: getConstantOffsetComplexAddrs PtrA=" << *PtrA
1170 <<
" PtrB=" << *PtrB <<
" ContextInst=" << *ContextInst
1171 <<
" Depth=" <<
Depth <<
"\n");
1172 auto *GEPA = dyn_cast<GetElementPtrInst>(PtrA);
1173 auto *GEPB = dyn_cast<GetElementPtrInst>(PtrB);
1175 return getConstantOffsetSelects(PtrA, PtrB, ContextInst,
Depth);
1179 if (GEPA->getNumOperands() != GEPB->getNumOperands() ||
1180 GEPA->getPointerOperand() != GEPB->getPointerOperand())
1181 return std::nullopt;
1184 for (
unsigned I = 0, E = GEPA->getNumIndices() - 1;
I < E; ++
I) {
1186 return std::nullopt;
1195 return std::nullopt;
1200 if (!isa<SExtInst>(OpA) && !isa<ZExtInst>(OpA))
1201 return std::nullopt;
1203 bool Signed = isa<SExtInst>(OpA);
1207 OpB = dyn_cast<Instruction>(OpB->
getOperand(0));
1209 return std::nullopt;
1215 return std::nullopt;
1219 return std::nullopt;
1222 LLVM_DEBUG(
dbgs() <<
"LSV: getConstantOffsetComplexAddrs IdxDiff=" << IdxDiff
1230 if (OpB->
getOpcode() == Instruction::Add &&
1232 IdxDiff.
sle(cast<ConstantInt>(OpB->
getOperand(1))->getSExtValue()) &&
1238 OpA = dyn_cast<Instruction>(ValA);
1239 if (!Safe && OpA && OpA->
getOpcode() == Instruction::Add &&
1245 for (
unsigned MatchingOpIdxA : {0, 1})
1246 for (
unsigned MatchingOpIdxB : {0, 1})
1270 if (BitsAllowedToBeSet.
ult(IdxDiff.
abs()))
1271 return std::nullopt;
1276 return IdxDiff * Stride;
1277 return std::nullopt;
1280std::optional<APInt> Vectorizer::getConstantOffsetSelects(
1283 return std::nullopt;
1285 if (
auto *SelectA = dyn_cast<SelectInst>(PtrA)) {
1286 if (
auto *SelectB = dyn_cast<SelectInst>(PtrB)) {
1287 if (SelectA->getCondition() != SelectB->getCondition())
1288 return std::nullopt;
1289 LLVM_DEBUG(
dbgs() <<
"LSV: getConstantOffsetSelects, PtrA=" << *PtrA
1290 <<
", PtrB=" << *PtrB <<
", ContextInst="
1291 << *ContextInst <<
", Depth=" <<
Depth <<
"\n");
1292 std::optional<APInt> TrueDiff = getConstantOffset(
1293 SelectA->getTrueValue(), SelectB->getTrueValue(), ContextInst,
Depth);
1294 if (!TrueDiff.has_value())
1295 return std::nullopt;
1296 std::optional<APInt> FalseDiff =
1297 getConstantOffset(SelectA->getFalseValue(), SelectB->getFalseValue(),
1298 ContextInst,
Depth);
1299 if (TrueDiff == FalseDiff)
1303 return std::nullopt;
1309 EquivalenceClassMap
Ret;
1313 if (
const auto *Sel = dyn_cast<SelectInst>(ObjPtr)) {
1320 return Sel->getCondition();
1326 auto *LI = dyn_cast<LoadInst>(&
I);
1327 auto *
SI = dyn_cast<StoreInst>(&
I);
1331 if ((LI && !LI->
isSimple()) || (SI && !
SI->isSimple()))
1344 unsigned TySize =
DL.getTypeSizeInBits(Ty);
1345 if ((TySize % 8) != 0)
1356 unsigned AS =
Ptr->getType()->getPointerAddressSpace();
1359 unsigned VF = VecRegSize / TySize;
1360 VectorType *VecTy = dyn_cast<VectorType>(Ty);
1364 (VecTy && !
isPowerOf2_32(
DL.getTypeSizeInBits(VecTy->getScalarType()))))
1368 if (TySize > VecRegSize / 2 ||
1386 unsigned ASPtrBits =
DL.getIndexSizeInBits(AS);
1390 for (
size_t I = 1;
I < Instrs.
size(); ++
I) {
1391 assert(Instrs[
I - 1]->comesBefore(Instrs[
I]));
1400 struct InstrListElem :
ilist_node<InstrListElem>,
1401 std::pair<Instruction *, Chain> {
1405 struct InstrListElemDenseMapInfo {
1408 static InstrListElem *getEmptyKey() {
return PtrInfo::getEmptyKey(); }
1409 static InstrListElem *getTombstoneKey() {
1410 return PtrInfo::getTombstoneKey();
1412 static unsigned getHashValue(
const InstrListElem *
E) {
1413 return IInfo::getHashValue(
E->first);
1415 static bool isEqual(
const InstrListElem *
A,
const InstrListElem *
B) {
1416 if (
A == getEmptyKey() ||
B == getEmptyKey())
1417 return A == getEmptyKey() &&
B == getEmptyKey();
1418 if (
A == getTombstoneKey() ||
B == getTombstoneKey())
1419 return A == getTombstoneKey() &&
B == getTombstoneKey();
1420 return IInfo::isEqual(
A->first,
B->first);
1431 constexpr int MaxChainsToTry = 64;
1433 bool MatchFound =
false;
1434 auto ChainIter = MRU.
begin();
1435 for (
size_t J = 0; J < MaxChainsToTry && ChainIter != MRU.
end();
1437 std::optional<APInt>
Offset = getConstantOffset(
1441 (ChainIter->first->comesBefore(
I) ?
I : ChainIter->first));
1442 if (
Offset.has_value()) {
1445 ChainIter->second.push_back(ChainElem{
I,
Offset.value()});
1455 APInt ZeroOffset(ASPtrBits, 0);
1456 InstrListElem *E =
new (
Allocator.Allocate()) InstrListElem(
I);
1457 E->second.push_back(ChainElem{
I, ZeroOffset});
1463 std::vector<Chain>
Ret;
1467 if (E.second.size() > 1)
1468 Ret.push_back(std::move(E.second));
1472std::optional<APInt> Vectorizer::getConstantOffset(
Value *PtrA,
Value *PtrB,
1476 <<
", PtrB=" << *PtrB <<
", ContextInst= " << *ContextInst
1477 <<
", Depth=" <<
Depth <<
"\n");
1480 unsigned OrigBitWidth =
DL.getIndexTypeSizeInBits(PtrA->
getType());
1481 APInt OffsetA(OrigBitWidth, 0);
1482 APInt OffsetB(OrigBitWidth, 0);
1485 unsigned NewPtrBitWidth =
DL.getTypeStoreSizeInBits(PtrA->
getType());
1486 if (NewPtrBitWidth !=
DL.getTypeStoreSizeInBits(PtrB->
getType()))
1487 return std::nullopt;
1492 assert(OffsetA.getSignificantBits() <= NewPtrBitWidth &&
1493 OffsetB.getSignificantBits() <= NewPtrBitWidth);
1495 OffsetA = OffsetA.sextOrTrunc(NewPtrBitWidth);
1496 OffsetB = OffsetB.sextOrTrunc(NewPtrBitWidth);
1498 return (OffsetB - OffsetA).sextOrTrunc(OrigBitWidth);
1503 LLVM_DEBUG(
dbgs() <<
"LSV: SCEV PtrB - PtrA =" << *DistScev <<
"\n");
1509 return (OffsetB - OffsetA + Dist).
sextOrTrunc(OrigBitWidth);
1512 std::optional<APInt> Diff =
1513 getConstantOffsetComplexAddrs(PtrA, PtrB, ContextInst,
Depth);
1514 if (Diff.has_value())
1515 return (OffsetB - OffsetA + Diff->sext(OffsetB.getBitWidth()))
1516 .sextOrTrunc(OrigBitWidth);
1517 return std::nullopt;
AMDGPU Mark last scratch load
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
static bool isEqual(const Function &Caller, const Function &Callee)
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
static const unsigned MaxDepth
static bool checkNoWrapFlags(Instruction *I, bool Signed)
static bool checkIfSafeAddSequence(const APInt &IdxDiff, Instruction *AddOpA, unsigned MatchingOpIdxA, Instruction *AddOpB, unsigned MatchingOpIdxB, bool Signed)
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
Module.h This file contains the declarations for the Module class.
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Provides some synthesis utilities to produce sequences of values.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static bool isSafeToMove(const MachineOperand *Def, const MachineOperand *Use, const MachineInstr *Insert, const WebAssemblyFunctionInfo &MFI, const MachineRegisterInfo &MRI)
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
APInt zext(unsigned width) const
Zero extend to a new width.
APInt abs() const
Get the absolute value.
bool sgt(const APInt &RHS) const
Signed greater than comparison.
unsigned getBitWidth() const
Return the number of bits in the APInt.
bool ult(const APInt &RHS) const
Unsigned less than comparison.
bool sle(const APInt &RHS) const
Signed less or equal comparison.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
bool sge(const APInt &RHS) const
Signed greater or equal comparison.
int64_t getSExtValue() const
Get sign extended value.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
A function analysis which provides an AssumptionCache.
AssumptionCache run(Function &F, FunctionAnalysisManager &)
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
InstListType::reverse_iterator reverse_iterator
InstListType::iterator iterator
Instruction iterators...
Represents analyses that only rely on functions' control flow.
This class represents a range of values.
const APInt * getSingleElement() const
If this set contains a single element, return it, otherwise return null.
bool isSingleElement() const
Return true if this set contains exactly one member.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
Implements a dense probed hash-table based set.
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Class to represent fixed width SIMD vectors.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
Legacy wrapper pass to provide the GlobalsAAResult object.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
An instruction for reading from memory.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
This class implements a map that also provides access to all stored values in a deterministic order.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
Legacy wrapper pass to provide the SCEVAAResult object.
This class represents an analyzed expression in the program.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
ConstantRange getSignedRange(const SCEV *S)
Determine the signed range for a particular SCEV.
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
const SCEV * getCouldNotCompute()
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
StringRef - Represent a constant reference to a string, i.e.
Analysis pass providing the TargetTransformInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
std::pair< iterator, bool > insert(const ValueT &V)
TypeSize getSequentialElementStride(const DataLayout &DL) const
Value * getOperand() const
const ParentTy * getParent() const
This class implements an extremely fast bulk output stream that can only output to a stream.
A simple intrusive list implementation.
void push_front(reference Node)
Insert a node at the front; never copies.
void remove(reference N)
Remove a node by reference; never deletes.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
auto min_element(R &&Range)
unsigned getLoadStoreAddressSpace(const Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Pass * createLoadStoreVectorizerPass()
Create a legacy pass manager instance of the LoadStoreVectorizer pass.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
iterator_range< po_iterator< T > > post_order(const T &G)
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isModOrRefSet(const ModRefInfo MRI)
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
auto max_element(R &&Range)
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
constexpr unsigned BitWidth
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
gep_type_iterator gep_type_begin(const User *GEP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
void initializeLoadStoreVectorizerLegacyPassPass(PassRegistry &)
Implement std::hash so that hash_code can be used in STL containers.
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
An information struct used to provide DenseMap with the various necessary components for a given valu...
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.