114#include <type_traits>
120#define DEBUG_TYPE "load-store-vectorizer"
122STATISTIC(NumVectorInstructions,
"Number of vector accesses generated");
123STATISTIC(NumScalarsVectorized,
"Number of scalar accesses vectorized");
133 std::tuple<
const Value * ,
139 const EqClassKey &K) {
142 <<
" of element size " << ElementSize <<
" bits in addrspace "
159 APInt OffsetFromLeader;
160 ChainElem(Instruction *Inst, APInt OffsetFromLeader)
161 : Inst(std::
move(Inst)), OffsetFromLeader(std::
move(OffsetFromLeader)) {}
165void sortChainInBBOrder(Chain &
C) {
166 sort(
C, [](
auto &
A,
auto &
B) {
return A.Inst->comesBefore(
B.Inst); });
169void sortChainInOffsetOrder(Chain &
C) {
170 sort(
C, [](
const auto &
A,
const auto &
B) {
171 if (
A.OffsetFromLeader !=
B.OffsetFromLeader)
172 return A.OffsetFromLeader.slt(
B.OffsetFromLeader);
173 return A.Inst->comesBefore(
B.Inst);
178 for (
const auto &
E :
C) {
179 dbgs() <<
" " << *
E.Inst <<
" (offset " <<
E.OffsetFromLeader <<
")\n";
183using EquivalenceClassMap =
187constexpr unsigned StackAdjustedAlignment = 4;
191 for (
const ChainElem &
E :
C)
198 return LI !=
nullptr && LI->
hasMetadata(LLVMContext::MD_invariant_load);
208 while (!Worklist.
empty()) {
211 for (
int Idx = 0; Idx < NumOperands; Idx++) {
213 if (!IM || IM->
getOpcode() == Instruction::PHI)
221 assert(IM !=
I &&
"Unexpected cycle while re-ordering instructions");
224 InstructionsToMove.
insert(IM);
231 for (
auto BBI =
I->getIterator(),
E =
I->getParent()->end(); BBI !=
E;) {
233 if (!InstructionsToMove.
contains(IM))
245 TargetTransformInfo &TTI;
246 const DataLayout &DL;
257 DenseSet<Instruction *> ExtraElements;
260 Vectorizer(Function &F,
AliasAnalysis &AA, AssumptionCache &AC,
261 DominatorTree &DT, ScalarEvolution &SE, TargetTransformInfo &TTI)
262 : F(F), AA(AA), AC(AC), DT(DT), SE(SE), TTI(TTI),
263 DL(F.getDataLayout()), Builder(SE.
getContext()) {}
268 static const unsigned MaxDepth = 3;
277 bool runOnEquivalenceClass(
const EqClassKey &EqClassKey,
283 bool runOnChain(Chain &
C);
289 std::vector<Chain> splitChainByContiguity(Chain &
C);
295 std::vector<Chain> splitChainByMayAliasInstrs(Chain &
C);
299 std::vector<Chain> splitChainByAlignment(Chain &
C);
303 bool vectorizeChain(Chain &
C);
306 std::optional<APInt> getConstantOffset(
Value *PtrA,
Value *PtrB,
307 Instruction *ContextInst,
309 std::optional<APInt> getConstantOffsetComplexAddrs(
Value *PtrA,
Value *PtrB,
310 Instruction *ContextInst,
312 std::optional<APInt> getConstantOffsetSelects(
Value *PtrA,
Value *PtrB,
313 Instruction *ContextInst,
319 Type *getChainElemTy(
const Chain &
C);
328 template <
bool IsLoadChain>
330 Instruction *ChainElem, Instruction *ChainBegin,
331 const DenseMap<Instruction *, APInt /*OffsetFromLeader*/> &ChainOffsets,
332 BatchAAResults &BatchAA);
337 void mergeEquivalenceClasses(EquivalenceClassMap &EQClasses)
const;
358 bool accessIsAllowedAndFast(
unsigned SizeBytes,
unsigned AS, Align Alignment,
359 unsigned VecElemBits)
const;
365 ChainElem createExtraElementAfter(
const ChainElem &PrevElem,
Type *Ty,
366 APInt
Offset, StringRef Prefix,
367 Align Alignment =
Align());
372 FixedVectorType *VecTy);
376 void deleteExtraElements();
379class LoadStoreVectorizerLegacyPass :
public FunctionPass {
383 LoadStoreVectorizerLegacyPass() : FunctionPass(ID) {
390 StringRef getPassName()
const override {
391 return "GPU Load and Store Vectorizer";
394 void getAnalysisUsage(AnalysisUsage &AU)
const override {
406char LoadStoreVectorizerLegacyPass::ID = 0;
409 "Vectorize load and Store instructions",
false,
false)
417 "Vectorize load and store instructions",
false,
false)
420 return new LoadStoreVectorizerLegacyPass();
423bool LoadStoreVectorizerLegacyPass::runOnFunction(
Function &
F) {
425 if (skipFunction(
F) ||
F.hasFnAttribute(Attribute::NoImplicitFloat))
428 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
429 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
430 ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
431 TargetTransformInfo &
TTI =
432 getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
434 AssumptionCache &AC =
435 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
437 return Vectorizer(
F, AA, AC, DT, SE,
TTI).run();
443 if (
F.hasFnAttribute(Attribute::NoImplicitFloat))
458bool Vectorizer::run() {
485 for (
auto It = Barriers.
begin(), End = std::prev(Barriers.
end()); It != End;
487 Changed |= runOnPseudoBB(*It, *std::next(It));
500 I->eraseFromParent();
504 deleteExtraElements();
513 dbgs() <<
"LSV: Running on pseudo-BB [" << *Begin <<
" ... ";
514 if (End != Begin->getParent()->end())
517 dbgs() <<
"<BB end>";
522 for (
const auto &[EqClassKey, EqClass] :
523 collectEquivalenceClasses(Begin, End))
524 Changed |= runOnEquivalenceClass(EqClassKey, EqClass);
529bool Vectorizer::runOnEquivalenceClass(
const EqClassKey &EqClassKey,
534 dbgs() <<
"LSV: Running on equivalence class of size " << EqClass.
size()
535 <<
" keyed on " << EqClassKey <<
":\n";
536 for (Instruction *
I : EqClass)
537 dbgs() <<
" " << *
I <<
"\n";
540 std::vector<Chain> Chains = gatherChains(EqClass);
542 <<
" nontrivial chains.\n";);
543 for (Chain &
C : Chains)
548bool Vectorizer::runOnChain(Chain &
C) {
550 dbgs() <<
"LSV: Running on chain with " <<
C.size() <<
" instructions:\n";
561 for (
auto &
C : splitChainByMayAliasInstrs(
C))
562 for (
auto &
C : splitChainByContiguity(
C))
563 for (
auto &
C : splitChainByAlignment(
C))
568std::vector<Chain> Vectorizer::splitChainByMayAliasInstrs(Chain &
C) {
572 sortChainInBBOrder(
C);
575 dbgs() <<
"LSV: splitChainByMayAliasInstrs considering chain:\n";
583 for (
const auto &
E :
C)
584 ChainOffsets.insert({&*
E.Inst,
E.OffsetFromLeader});
588 BatchAAResults BatchAA(AA);
601 auto Impl = [&](
auto IsLoad) {
603 auto [ChainBegin, ChainEnd] = [&](
auto IsLoad) {
604 if constexpr (IsLoad())
605 return std::make_pair(
C.begin(),
C.end());
607 return std::make_pair(
C.rbegin(),
C.rend());
609 assert(ChainBegin != ChainEnd);
611 std::vector<Chain> Chains;
614 for (
auto ChainIt = std::next(ChainBegin); ChainIt != ChainEnd; ++ChainIt) {
616 ChainOffsets, BatchAA)) {
617 LLVM_DEBUG(
dbgs() <<
"LSV: No intervening may-alias instrs; can merge "
618 << *ChainIt->Inst <<
" into " << *ChainBegin->Inst
623 dbgs() <<
"LSV: Found intervening may-alias instrs; cannot merge "
624 << *ChainIt->Inst <<
" into " << *ChainBegin->Inst <<
"\n");
625 if (NewChain.
size() > 1) {
627 dbgs() <<
"LSV: got nontrivial chain without aliasing instrs:\n";
630 Chains.emplace_back(std::move(NewChain));
637 if (NewChain.
size() > 1) {
639 dbgs() <<
"LSV: got nontrivial chain without aliasing instrs:\n";
642 Chains.emplace_back(std::move(NewChain));
648 return Impl(std::bool_constant<true>());
651 return Impl(std::bool_constant<false>());
654std::vector<Chain> Vectorizer::splitChainByContiguity(Chain &
C) {
658 sortChainInOffsetOrder(
C);
661 dbgs() <<
"LSV: splitChainByContiguity considering chain:\n";
675 Align OptimisticAlign =
Align(MaxVecRegBits / 8);
676 unsigned int MaxVectorNumElems =
677 MaxVecRegBits /
DL.getTypeSizeInBits(ElementType);
684 FixedVectorType *OptimisticVectorType =
696 APInt OffsetOfBestAlignedElemFromLeader =
C[0].OffsetFromLeader;
697 for (
const auto &
E :
C) {
699 if (ElementAlignment > BestAlignedElemAlign) {
700 BestAlignedElemAlign = ElementAlignment;
701 OffsetOfBestAlignedElemFromLeader =
E.OffsetFromLeader;
705 auto DeriveAlignFromBestAlignedElem = [&](APInt NewElemOffsetFromLeader) {
707 BestAlignedElemAlign,
708 (NewElemOffsetFromLeader - OffsetOfBestAlignedElemFromLeader)
713 unsigned ASPtrBits =
DL.getIndexSizeInBits(AS);
715 std::vector<Chain> Ret;
716 Ret.push_back({
C.front()});
718 unsigned ChainElemTyBits =
DL.getTypeSizeInBits(getChainElemTy(
C));
719 ChainElem &Prev =
C[0];
720 for (
auto It = std::next(
C.begin()), End =
C.end(); It != End; ++It) {
721 auto &CurChain = Ret.back();
725 APInt PrevReadEnd = Prev.OffsetFromLeader + PrevSzBytes;
730 8 * SzBytes % ChainElemTyBits == 0 &&
731 "Every chain-element size must be a multiple of the element size after "
733 APInt ReadEnd = It->OffsetFromLeader + SzBytes;
735 bool AreContiguous =
false;
736 if (It->OffsetFromLeader.sle(PrevReadEnd)) {
738 uint64_t Overlap = (PrevReadEnd - It->OffsetFromLeader).getZExtValue();
739 if (8 * Overlap % ChainElemTyBits == 0)
740 AreContiguous =
true;
744 << (AreContiguous ?
"contiguous" :
"chain-breaker")
745 << *It->Inst <<
" (starts at offset "
746 << It->OffsetFromLeader <<
")\n");
754 bool GapFilled =
false;
755 if (!AreContiguous && TryFillGaps && PrevSzBytes == SzBytes) {
756 APInt GapSzBytes = It->OffsetFromLeader - PrevReadEnd;
757 if (GapSzBytes == PrevSzBytes) {
759 ChainElem NewElem = createExtraElementAfter(
761 DeriveAlignFromBestAlignedElem(PrevReadEnd));
762 CurChain.push_back(NewElem);
768 if ((GapSzBytes == 2 * PrevSzBytes) && (CurChain.size() % 4 == 1)) {
769 ChainElem NewElem1 = createExtraElementAfter(
771 DeriveAlignFromBestAlignedElem(PrevReadEnd));
772 ChainElem NewElem2 = createExtraElementAfter(
774 DeriveAlignFromBestAlignedElem(PrevReadEnd + PrevSzBytes));
775 CurChain.push_back(NewElem1);
776 CurChain.push_back(NewElem2);
781 if (AreContiguous || GapFilled)
782 CurChain.push_back(*It);
784 Ret.push_back({*It});
788 if (ReadEnd.
sge(PrevReadEnd))
793 llvm::erase_if(Ret, [](
const auto &Chain) {
return Chain.size() <= 1; });
797Type *Vectorizer::getChainElemTy(
const Chain &
C) {
810 if (
any_of(
C, [](
const ChainElem &
E) {
813 return Type::getIntNTy(
818 for (
const ChainElem &
E :
C)
824std::vector<Chain> Vectorizer::splitChainByAlignment(Chain &
C) {
837 sortChainInOffsetOrder(
C);
840 dbgs() <<
"LSV: splitChainByAlignment considering chain:\n";
845 auto GetVectorFactor = [&](
unsigned VF,
unsigned LoadStoreSize,
848 ChainSizeBytes, VecTy)
850 ChainSizeBytes, VecTy);
854 for (
const auto &
E :
C) {
857 "Should have filtered out non-power-of-two elements in "
858 "collectEquivalenceClasses.");
868 bool CandidateChainsMayContainExtraLoadsStores =
any_of(
869 C, [
this](
const ChainElem &
E) {
return ExtraElements.
contains(
E.Inst); });
871 std::vector<Chain> Ret;
872 for (
unsigned CBegin = 0; CBegin <
C.size(); ++CBegin) {
880 APInt PrevReadEnd =
C[CBegin].OffsetFromLeader + Sz;
881 for (
unsigned CEnd = CBegin + 1,
Size =
C.size(); CEnd <
Size; ++CEnd) {
882 APInt ReadEnd =
C[CEnd].OffsetFromLeader +
884 unsigned BytesAdded =
885 PrevReadEnd.
sle(ReadEnd) ? (ReadEnd - PrevReadEnd).getSExtValue() : 0;
887 if (Sz > VecRegBytes)
889 CandidateChains.emplace_back(CEnd, Sz);
894 for (
auto It = CandidateChains.rbegin(), End = CandidateChains.rend();
896 auto [CEnd, SizeBytes] = *It;
898 dbgs() <<
"LSV: splitChainByAlignment considering candidate chain ["
899 << *
C[CBegin].Inst <<
" ... " << *
C[CEnd].Inst <<
"]\n");
901 Type *VecElemTy = getChainElemTy(
C);
905 unsigned VecElemBits =
DL.getTypeSizeInBits(VecElemTy);
908 assert((8 * SizeBytes) % VecElemBits == 0);
909 unsigned NumVecElems = 8 * SizeBytes / VecElemBits;
911 unsigned VF = 8 * VecRegBytes / VecElemBits;
914 unsigned TargetVF = GetVectorFactor(VF, VecElemBits,
915 VecElemBits * NumVecElems / 8, VecTy);
916 if (TargetVF != VF && TargetVF < NumVecElems) {
918 dbgs() <<
"LSV: splitChainByAlignment discarding candidate chain "
920 << TargetVF <<
" != VF=" << VF
921 <<
" and TargetVF < NumVecElems=" << NumVecElems <<
"\n");
935 bool IsAllocaAccess = AS ==
DL.getAllocaAddrSpace() &&
938 Align PrefAlign =
Align(StackAdjustedAlignment);
939 if (IsAllocaAccess && Alignment.
value() % SizeBytes != 0 &&
940 accessIsAllowedAndFast(SizeBytes, AS, PrefAlign, VecElemBits)) {
942 PtrOperand, PrefAlign,
DL,
C[CBegin].Inst,
nullptr, &DT);
943 if (NewAlign >= Alignment) {
945 <<
"LSV: splitByChain upgrading alloca alignment from "
946 << Alignment.
value() <<
" to " << NewAlign.
value()
948 Alignment = NewAlign;
952 Chain ExtendingLoadsStores;
953 if (!accessIsAllowedAndFast(SizeBytes, AS, Alignment, VecElemBits)) {
957 bool AllowedAndFast =
false;
962 assert(VecElemBits % 8 == 0);
963 unsigned VecElemBytes = VecElemBits / 8;
965 unsigned NewSizeBytes = VecElemBytes * NewNumVecElems;
968 "TargetVF expected to be a power of 2");
969 assert(NewNumVecElems <= TargetVF &&
970 "Should not extend past TargetVF");
973 <<
"LSV: attempting to extend chain of " << NumVecElems
974 <<
" " << (IsLoadChain ?
"loads" :
"stores") <<
" to "
975 << NewNumVecElems <<
" elements\n");
976 bool IsLegalToExtend =
986 if (IsLegalToExtend &&
987 accessIsAllowedAndFast(NewSizeBytes, AS, Alignment,
990 <<
"LSV: extending " << (IsLoadChain ?
"load" :
"store")
991 <<
" chain of " << NumVecElems <<
" "
992 << (IsLoadChain ?
"loads" :
"stores")
993 <<
" with total byte size of " << SizeBytes <<
" to "
994 << NewNumVecElems <<
" "
995 << (IsLoadChain ?
"loads" :
"stores")
996 <<
" with total byte size of " << NewSizeBytes
997 <<
", TargetVF=" << TargetVF <<
" \n");
1003 unsigned ASPtrBits =
DL.getIndexSizeInBits(AS);
1004 for (
unsigned I = 0;
I < (NewNumVecElems - NumVecElems);
I++) {
1005 ChainElem NewElem = createExtraElementAfter(
1006 C[CBegin], VecElemTy,
1007 APInt(ASPtrBits, SizeBytes +
I * VecElemBytes),
"Extend");
1008 ExtendingLoadsStores.push_back(NewElem);
1012 SizeBytes = NewSizeBytes;
1013 NumVecElems = NewNumVecElems;
1014 AllowedAndFast =
true;
1017 if (!AllowedAndFast) {
1020 <<
"LSV: splitChainByAlignment discarding candidate chain "
1021 "because its alignment is not AllowedAndFast: "
1022 << Alignment.
value() <<
"\n");
1032 dbgs() <<
"LSV: splitChainByAlignment discarding candidate chain "
1033 "because !isLegalToVectorizeLoad/StoreChain.");
1037 if (CandidateChainsMayContainExtraLoadsStores) {
1049 [
this](
const ChainElem &
E) {
1053 if (CurrCandContainsExtraLoadsStores &&
1061 <<
"LSV: splitChainByAlignment discarding candidate chain "
1062 "because it contains extra loads/stores that we cannot "
1063 "legally vectorize into a masked load/store \n");
1070 for (
unsigned I = CBegin;
I <= CEnd; ++
I)
1071 NewChain.emplace_back(
C[
I]);
1072 for (ChainElem
E : ExtendingLoadsStores)
1073 NewChain.emplace_back(
E);
1081bool Vectorizer::vectorizeChain(Chain &
C) {
1086 C, [
this](
const ChainElem &
E) {
return ExtraElements.
contains(
E.Inst); });
1090 if (
C.size() == 2 && ChainContainsExtraLoadsStores)
1093 sortChainInOffsetOrder(
C);
1096 dbgs() <<
"LSV: Vectorizing chain of " <<
C.size() <<
" instructions:\n";
1100 Type *VecElemTy = getChainElemTy(
C);
1104 APInt PrevReadEnd =
C[0].OffsetFromLeader + BytesAdded;
1105 unsigned ChainBytes = BytesAdded;
1106 for (
auto It = std::next(
C.begin()), End =
C.end(); It != End; ++It) {
1108 APInt ReadEnd = It->OffsetFromLeader + SzBytes;
1111 PrevReadEnd.
sle(ReadEnd) ? (ReadEnd - PrevReadEnd).getSExtValue() : 0;
1112 ChainBytes += BytesAdded;
1116 assert(8 * ChainBytes %
DL.getTypeSizeInBits(VecElemTy) == 0);
1119 unsigned NumElem = 8 * ChainBytes /
DL.getTypeSizeInBits(VecElemTy);
1125 if (AS ==
DL.getAllocaAddrSpace()) {
1126 Alignment = std::max(
1129 MaybeAlign(),
DL,
C[0].Inst,
nullptr, &DT));
1134 for (
const ChainElem &
E :
C)
1136 DL.getTypeStoreSize(VecElemTy));
1145 return A.Inst->comesBefore(
B.Inst);
1150 if (ChainContainsExtraLoadsStores) {
1167 for (
const ChainElem &
E :
C) {
1172 (
E.OffsetFromLeader -
C[0].OffsetFromLeader).getZExtValue();
1173 unsigned VecIdx = 8 * EOffset /
DL.getTypeSizeInBits(VecElemTy);
1184 if (
V->getType() !=
I->getType())
1212 return A.Inst->comesBefore(
B.Inst);
1217 auto InsertElem = [&](
Value *
V,
unsigned VecIdx) {
1218 if (
V->getType() != VecElemTy)
1222 for (
const ChainElem &
E :
C) {
1225 (
E.OffsetFromLeader -
C[0].OffsetFromLeader).getZExtValue();
1226 unsigned VecIdx = 8 * EOffset /
DL.getTypeSizeInBits(VecElemTy);
1227 if (FixedVectorType *VT =
1229 for (
int J = 0, JE = VT->getNumElements(); J < JE; ++J) {
1235 InsertElem(
I->getValueOperand(), VecIdx);
1241 if (ChainContainsExtraLoadsStores) {
1258 for (
const ChainElem &
E :
C)
1259 ToErase.emplace_back(
E.Inst);
1261 ++NumVectorInstructions;
1262 NumScalarsVectorized +=
C.size();
1266template <
bool IsLoadChain>
1267bool Vectorizer::isSafeToMove(
1268 Instruction *ChainElem, Instruction *ChainBegin,
1269 const DenseMap<Instruction *, APInt /*OffsetFromLeader*/> &ChainOffsets,
1270 BatchAAResults &BatchAA) {
1271 LLVM_DEBUG(
dbgs() <<
"LSV: isSafeToMove(" << *ChainElem <<
" -> "
1272 << *ChainBegin <<
")\n");
1275 if (ChainElem == ChainBegin)
1283 auto BBIt = std::next([&] {
1284 if constexpr (IsLoadChain)
1289 auto BBItEnd = std::next([&] {
1290 if constexpr (IsLoadChain)
1296 const APInt &ChainElemOffset = ChainOffsets.
at(ChainElem);
1297 const unsigned ChainElemSize =
1300 for (; BBIt != BBItEnd; ++BBIt) {
1303 if (!
I->mayReadOrWriteMemory())
1320 if (
auto OffsetIt = ChainOffsets.
find(
I); OffsetIt != ChainOffsets.
end()) {
1327 const APInt &IOffset = OffsetIt->second;
1329 if (IOffset == ChainElemOffset ||
1330 (IOffset.
sle(ChainElemOffset) &&
1331 (IOffset + IElemSize).sgt(ChainElemOffset)) ||
1332 (ChainElemOffset.sle(IOffset) &&
1333 (ChainElemOffset + ChainElemSize).sgt(OffsetIt->second))) {
1340 dbgs() <<
"LSV: Found alias in chain: " << *
I <<
"\n";
1352 <<
" Aliasing instruction:\n"
1353 <<
" " << *
I <<
'\n'
1354 <<
" Aliased instruction and pointer:\n"
1355 <<
" " << *ChainElem <<
'\n'
1373 unsigned MatchingOpIdxB,
bool Signed) {
1374 LLVM_DEBUG(
dbgs() <<
"LSV: checkIfSafeAddSequence IdxDiff=" << IdxDiff
1375 <<
", AddOpA=" << *AddOpA <<
", MatchingOpIdxA="
1376 << MatchingOpIdxA <<
", AddOpB=" << *AddOpB
1377 <<
", MatchingOpIdxB=" << MatchingOpIdxB
1378 <<
", Signed=" <<
Signed <<
"\n");
1394 AddOpB->
getOpcode() == Instruction::Add &&
1398 Value *OtherOperandA = AddOpA->
getOperand(MatchingOpIdxA == 1 ? 0 : 1);
1399 Value *OtherOperandB = AddOpB->
getOperand(MatchingOpIdxB == 1 ? 0 : 1);
1403 if (OtherInstrB && OtherInstrB->
getOpcode() == Instruction::Add &&
1408 if (OtherInstrB->
getOperand(0) == OtherOperandA &&
1413 if (OtherInstrA && OtherInstrA->
getOpcode() == Instruction::Add &&
1418 if (OtherInstrA->
getOperand(0) == OtherOperandB &&
1424 if (OtherInstrA && OtherInstrB &&
1425 OtherInstrA->
getOpcode() == Instruction::Add &&
1426 OtherInstrB->
getOpcode() == Instruction::Add &&
1443std::optional<APInt> Vectorizer::getConstantOffsetComplexAddrs(
1445 LLVM_DEBUG(
dbgs() <<
"LSV: getConstantOffsetComplexAddrs PtrA=" << *PtrA
1446 <<
" PtrB=" << *PtrB <<
" ContextInst=" << *ContextInst
1447 <<
" Depth=" <<
Depth <<
"\n");
1451 return getConstantOffsetSelects(PtrA, PtrB, ContextInst,
Depth);
1455 if (GEPA->getNumOperands() != GEPB->getNumOperands() ||
1456 GEPA->getPointerOperand() != GEPB->getPointerOperand())
1457 return std::nullopt;
1460 for (
unsigned I = 0,
E = GEPA->getNumIndices() - 1;
I <
E; ++
I) {
1462 return std::nullopt;
1471 return std::nullopt;
1477 return std::nullopt;
1485 return std::nullopt;
1487 const SCEV *OffsetSCEVA = SE.
getSCEV(ValA);
1488 const SCEV *OffsetSCEVB = SE.
getSCEV(OpB);
1489 const SCEV *IdxDiffSCEV = SE.
getMinusSCEV(OffsetSCEVB, OffsetSCEVA);
1491 return std::nullopt;
1495 return std::nullopt;
1498 LLVM_DEBUG(
dbgs() <<
"LSV: getConstantOffsetComplexAddrs IdxDiff=" << IdxDiff
1506 if (OpB->
getOpcode() == Instruction::Add &&
1515 if (!Safe && OpA && OpA->
getOpcode() == Instruction::Add &&
1521 for (
unsigned MatchingOpIdxA : {0, 1})
1522 for (
unsigned MatchingOpIdxB : {0, 1})
1543 APInt BitsAllowedToBeSet = Known.Zero.zext(IdxDiff.
getBitWidth());
1546 Safe = BitsAllowedToBeSet.
uge(IdxDiff.
abs());
1550 return IdxDiff * Stride;
1551 return std::nullopt;
1554std::optional<APInt> Vectorizer::getConstantOffsetSelects(
1556 if (
Depth++ == MaxDepth)
1557 return std::nullopt;
1561 if (SelectA->getCondition() != SelectB->getCondition())
1562 return std::nullopt;
1563 LLVM_DEBUG(
dbgs() <<
"LSV: getConstantOffsetSelects, PtrA=" << *PtrA
1564 <<
", PtrB=" << *PtrB <<
", ContextInst="
1565 << *ContextInst <<
", Depth=" <<
Depth <<
"\n");
1566 std::optional<APInt> TrueDiff = getConstantOffset(
1567 SelectA->getTrueValue(), SelectB->getTrueValue(), ContextInst,
Depth);
1569 return std::nullopt;
1570 std::optional<APInt> FalseDiff =
1571 getConstantOffset(SelectA->getFalseValue(), SelectB->getFalseValue(),
1572 ContextInst,
Depth);
1573 if (TrueDiff == FalseDiff)
1577 return std::nullopt;
1580void Vectorizer::mergeEquivalenceClasses(EquivalenceClassMap &EQClasses)
const {
1581 if (EQClasses.size() < 2)
1586 static_assert(std::tuple_size_v<EqClassKey> == 4,
1587 "EqClassKey has changed - EqClassReducedKey needs changes too");
1588 using EqClassReducedKey =
1589 std::tuple<std::tuple_element_t<1, EqClassKey> ,
1590 std::tuple_element_t<2, EqClassKey> ,
1591 std::tuple_element_t<3, EqClassKey> >;
1592 using ECReducedKeyToUnderlyingObjectMap =
1593 MapVector<EqClassReducedKey,
1594 SmallPtrSet<std::tuple_element_t<0, EqClassKey>, 4>>;
1599 ECReducedKeyToUnderlyingObjectMap RedKeyToUOMap;
1600 bool FoundPotentiallyOptimizableEC =
false;
1601 for (
const auto &EC : EQClasses) {
1602 const auto &
Key =
EC.first;
1603 EqClassReducedKey RedKey{std::get<1>(
Key), std::get<2>(
Key),
1605 auto &UOMap = RedKeyToUOMap[RedKey];
1607 if (UOMap.size() > 1)
1608 FoundPotentiallyOptimizableEC =
true;
1610 if (!FoundPotentiallyOptimizableEC)
1614 dbgs() <<
"LSV: mergeEquivalenceClasses: before merging:\n";
1615 for (
const auto &EC : EQClasses) {
1616 dbgs() <<
" Key: {" <<
EC.first <<
"}\n";
1617 for (
const auto &Inst :
EC.second)
1618 dbgs() <<
" Inst: " << *Inst <<
'\n';
1622 dbgs() <<
"LSV: mergeEquivalenceClasses: RedKeyToUOMap:\n";
1623 for (
const auto &RedKeyToUO : RedKeyToUOMap) {
1624 dbgs() <<
" Reduced key: {" << std::get<0>(RedKeyToUO.first) <<
", "
1625 << std::get<1>(RedKeyToUO.first) <<
", "
1626 <<
static_cast<int>(std::get<2>(RedKeyToUO.first)) <<
"} --> "
1627 << RedKeyToUO.second.size() <<
" underlying objects:\n";
1628 for (
auto UObject : RedKeyToUO.second)
1629 dbgs() <<
" " << *UObject <<
'\n';
1633 using UObjectToUObjectMap = DenseMap<const Value *, const Value *>;
1636 auto GetUltimateTargets =
1637 [](SmallPtrSetImpl<const Value *> &UObjects) -> UObjectToUObjectMap {
1638 UObjectToUObjectMap IndirectionMap;
1639 for (
const auto *UObject : UObjects) {
1640 const unsigned MaxLookupDepth = 1;
1642 if (UltimateTarget != UObject)
1643 IndirectionMap[UObject] = UltimateTarget;
1645 UObjectToUObjectMap UltimateTargetsMap;
1646 for (
const auto *UObject : UObjects) {
1648 auto It = IndirectionMap.find(Target);
1649 for (; It != IndirectionMap.end(); It = IndirectionMap.find(Target))
1651 UltimateTargetsMap[UObject] =
Target;
1653 return UltimateTargetsMap;
1658 for (
auto &[RedKey, UObjects] : RedKeyToUOMap) {
1659 if (UObjects.size() < 2)
1661 auto UTMap = GetUltimateTargets(UObjects);
1662 for (
const auto &[UObject, UltimateTarget] : UTMap) {
1663 if (UObject == UltimateTarget)
1666 EqClassKey KeyFrom{UObject, std::get<0>(RedKey), std::get<1>(RedKey),
1667 std::get<2>(RedKey)};
1668 EqClassKey KeyTo{UltimateTarget, std::get<0>(RedKey), std::get<1>(RedKey),
1669 std::get<2>(RedKey)};
1672 const auto &VecTo = EQClasses[KeyTo];
1673 const auto &VecFrom = EQClasses[KeyFrom];
1674 SmallVector<Instruction *, 8> MergedVec;
1675 std::merge(VecFrom.begin(), VecFrom.end(), VecTo.begin(), VecTo.end(),
1676 std::back_inserter(MergedVec),
1677 [](Instruction *
A, Instruction *
B) {
1678 return A && B && A->comesBefore(B);
1680 EQClasses[KeyTo] = std::move(MergedVec);
1681 EQClasses.erase(KeyFrom);
1685 dbgs() <<
"LSV: mergeEquivalenceClasses: after merging:\n";
1686 for (
const auto &EC : EQClasses) {
1687 dbgs() <<
" Key: {" <<
EC.first <<
"}\n";
1688 for (
const auto &Inst :
EC.second)
1689 dbgs() <<
" Inst: " << *Inst <<
'\n';
1697 EquivalenceClassMap Ret;
1699 auto GetUnderlyingObject = [](
const Value *Ptr) ->
const Value * {
1708 return Sel->getCondition();
1719 if ((LI && !LI->
isSimple()) || (SI && !
SI->isSimple()))
1732 unsigned TySize =
DL.getTypeSizeInBits(Ty);
1733 if ((TySize % 8) != 0)
1747 unsigned VF = VecRegSize / TySize;
1752 (VecTy && !
isPowerOf2_32(
DL.getTypeSizeInBits(VecTy->getScalarType()))))
1756 if (TySize > VecRegSize / 2 ||
1760 Ret[{GetUnderlyingObject(Ptr), AS,
1766 mergeEquivalenceClasses(Ret);
1775 unsigned ASPtrBits =
DL.getIndexSizeInBits(AS);
1779 for (
size_t I = 1;
I < Instrs.
size(); ++
I) {
1780 assert(Instrs[
I - 1]->comesBefore(Instrs[
I]));
1789 struct InstrListElem : ilist_node<InstrListElem>,
1790 std::pair<Instruction *, Chain> {
1791 explicit InstrListElem(Instruction *
I)
1794 struct InstrListElemDenseMapInfo {
1795 using PtrInfo = DenseMapInfo<InstrListElem *>;
1796 using IInfo = DenseMapInfo<Instruction *>;
1797 static InstrListElem *getEmptyKey() {
return PtrInfo::getEmptyKey(); }
1798 static InstrListElem *getTombstoneKey() {
1799 return PtrInfo::getTombstoneKey();
1801 static unsigned getHashValue(
const InstrListElem *
E) {
1802 return IInfo::getHashValue(
E->first);
1804 static bool isEqual(
const InstrListElem *
A,
const InstrListElem *
B) {
1805 if (
A == getEmptyKey() ||
B == getEmptyKey())
1806 return A == getEmptyKey() &&
B == getEmptyKey();
1807 if (
A == getTombstoneKey() ||
B == getTombstoneKey())
1808 return A == getTombstoneKey() &&
B == getTombstoneKey();
1809 return IInfo::isEqual(
A->first,
B->first);
1812 SpecificBumpPtrAllocator<InstrListElem>
Allocator;
1813 simple_ilist<InstrListElem> MRU;
1814 DenseSet<InstrListElem *, InstrListElemDenseMapInfo> Chains;
1819 for (Instruction *
I : Instrs) {
1820 constexpr int MaxChainsToTry = 64;
1822 bool MatchFound =
false;
1823 auto ChainIter = MRU.
begin();
1824 for (
size_t J = 0; J < MaxChainsToTry && ChainIter != MRU.
end();
1826 if (std::optional<APInt>
Offset = getConstantOffset(
1830 (ChainIter->first->comesBefore(
I) ?
I : ChainIter->first))) {
1833 ChainIter->second.emplace_back(
I,
Offset.value());
1843 APInt ZeroOffset(ASPtrBits, 0);
1844 InstrListElem *
E =
new (
Allocator.Allocate()) InstrListElem(
I);
1845 E->second.emplace_back(
I, ZeroOffset);
1851 std::vector<Chain> Ret;
1852 Ret.reserve(Chains.
size());
1855 if (
E.second.size() > 1)
1856 Ret.emplace_back(std::move(
E.second));
1860std::optional<APInt> Vectorizer::getConstantOffset(
Value *PtrA,
Value *PtrB,
1861 Instruction *ContextInst,
1864 <<
", PtrB=" << *PtrB <<
", ContextInst= " << *ContextInst
1865 <<
", Depth=" <<
Depth <<
"\n");
1868 unsigned OrigBitWidth =
DL.getIndexTypeSizeInBits(PtrA->
getType());
1869 APInt OffsetA(OrigBitWidth, 0);
1870 APInt OffsetB(OrigBitWidth, 0);
1873 unsigned NewPtrBitWidth =
DL.getTypeStoreSizeInBits(PtrA->
getType());
1874 if (NewPtrBitWidth !=
DL.getTypeStoreSizeInBits(PtrB->
getType()))
1875 return std::nullopt;
1880 assert(OffsetA.getSignificantBits() <= NewPtrBitWidth &&
1881 OffsetB.getSignificantBits() <= NewPtrBitWidth);
1883 OffsetA = OffsetA.sextOrTrunc(NewPtrBitWidth);
1884 OffsetB = OffsetB.sextOrTrunc(NewPtrBitWidth);
1886 return (OffsetB - OffsetA).sextOrTrunc(OrigBitWidth);
1891 LLVM_DEBUG(
dbgs() <<
"LSV: SCEV PtrB - PtrA =" << *DistScev <<
"\n");
1897 return (OffsetB - OffsetA + Dist).
sextOrTrunc(OrigBitWidth);
1900 if (std::optional<APInt> Diff =
1901 getConstantOffsetComplexAddrs(PtrA, PtrB, ContextInst,
Depth))
1902 return (OffsetB - OffsetA + Diff->sext(OffsetB.getBitWidth()))
1903 .sextOrTrunc(OrigBitWidth);
1904 return std::nullopt;
1907bool Vectorizer::accessIsAllowedAndFast(
unsigned SizeBytes,
unsigned AS,
1909 unsigned VecElemBits)
const {
1911 if (Alignment.
value() % SizeBytes == 0)
1915 unsigned VectorizedSpeed = 0;
1917 F.getContext(), SizeBytes * 8, AS, Alignment, &VectorizedSpeed);
1918 if (!AllowsMisaligned) {
1920 dbgs() <<
"LSV: Access of " << SizeBytes <<
"B in addrspace " << AS
1921 <<
" with alignment " << Alignment.
value()
1922 <<
" is misaligned, and therefore can't be vectorized.\n");
1926 unsigned ElementwiseSpeed = 0;
1927 (
TTI).allowsMisalignedMemoryAccesses((
F).
getContext(), VecElemBits, AS,
1928 Alignment, &ElementwiseSpeed);
1929 if (VectorizedSpeed < ElementwiseSpeed) {
1930 LLVM_DEBUG(
dbgs() <<
"LSV: Access of " << SizeBytes <<
"B in addrspace "
1931 << AS <<
" with alignment " << Alignment.
value()
1932 <<
" has relative speed " << VectorizedSpeed
1933 <<
", which is lower than the elementwise speed of "
1935 <<
". Therefore this access won't be vectorized.\n");
1941ChainElem Vectorizer::createExtraElementAfter(
const ChainElem &Prev,
Type *Ty,
1942 APInt
Offset, StringRef Prefix,
1948 PrevLoad->getPointerOperand(), Builder.
getInt(
Offset), Prefix +
"GEP");
1949 LLVM_DEBUG(
dbgs() <<
"LSV: Extra GEP Created: \n" << *NewGep <<
"\n");
1956 LLVM_DEBUG(
dbgs() <<
"LSV: Extra GEP Created: \n" << *NewGep <<
"\n");
1966 ExtraElements.
insert(NewElement);
1968 APInt NewOffsetFromLeader = Prev.OffsetFromLeader +
Offset;
1971 <<
" OffsetFromLeader: " << NewOffsetFromLeader <<
"\n");
1972 return ChainElem{NewElement, NewOffsetFromLeader};
1976 FixedVectorType *VecTy) {
1982 for (
const ChainElem &
E :
C) {
1986 (
E.OffsetFromLeader -
C[0].OffsetFromLeader).getZExtValue();
1989 if (FixedVectorType *VT =
1991 for (
unsigned J = 0; J < VT->getNumElements(); ++J)
1992 MaskElts[VecIdx + J] = Builder.
getInt1(
true);
1994 MaskElts[VecIdx] = Builder.
getInt1(
true);
1999void Vectorizer::deleteExtraElements() {
2000 for (
auto *ExtraElement : ExtraElements) {
2002 [[maybe_unused]]
bool Deleted =
2004 assert(
Deleted &&
"Extra Load should always be trivially dead");
2010 ExtraElement->eraseFromParent();
2015 ExtraElements.clear();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isEqual(const Function &Caller, const Function &Callee)
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
Module.h This file contains the declarations for the Module class.
static bool checkNoWrapFlags(Instruction *I, bool Signed)
static bool checkIfSafeAddSequence(const APInt &IdxDiff, Instruction *AddOpA, unsigned MatchingOpIdxA, Instruction *AddOpB, unsigned MatchingOpIdxB, bool Signed)
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
static bool isInvariantLoad(const Instruction *I, const Value *Ptr, const bool IsKernelFn)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static bool isSafeToMove(const MachineInstr &From, const MachineInstr &To)
Check if it's safe to move From down to To, checking that no physical registers are clobbered.
Provides some synthesis utilities to produce sequences of values.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
APInt abs() const
Get the absolute value.
unsigned getBitWidth() const
Return the number of bits in the APInt.
bool sle(const APInt &RHS) const
Signed less or equal comparison.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
bool sge(const APInt &RHS) const
Signed greater or equal comparison.
int64_t getSExtValue() const
Get sign extended value.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
InstListType::reverse_iterator reverse_iterator
InstListType::iterator iterator
Instruction iterators...
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Represents analyses that only rely on functions' control flow.
const APInt * getSingleElement() const
If this set contains a single element, return it, otherwise return null.
bool isSingleElement() const
Return true if this set contains exactly one member.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
ValueT & at(const_arg_type_t< KeyT > Val)
at - Return the entry for the specified key, or abort if no such entry exists.
iterator find(const_arg_type_t< KeyT > Val)
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
Legacy wrapper pass to provide the GlobalsAAResult object.
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
LLVM_ABI CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
LLVM_ABI CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
ConstantInt * getInt(const APInt &AI)
Get a constant integer value.
LLVM_ABI bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
LLVM_ABI bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
An instruction for reading from memory.
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
This class implements a map that also provides access to all stored values in a deterministic order.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Legacy wrapper pass to provide the SCEVAAResult object.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
ConstantRange getSignedRange(const SCEV *S)
Determine the signed range for a particular SCEV.
LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
LLVM_ABI const SCEV * getCouldNotCompute()
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Value * getPointerOperand()
Analysis pass providing the TargetTransformInfo.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
TypeSize getSequentialElementStride(const DataLayout &DL) const
Value * getOperand() const
const ParentTy * getParent() const
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
This class implements an extremely fast bulk output stream that can only output to a stream.
void push_front(reference Node)
Insert a node at the front; never copies.
void remove(reference N)
Remove a node by reference; never deletes.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
Abstract Attribute helper functions.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
ElementType
The element type of an SRV or UAV resource.
Context & getContext() const
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
unsigned getLoadStoreAddressSpace(const Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
APFloat abs(APFloat X)
Returns the absolute value of the argument.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI Pass * createLoadStoreVectorizerPass()
Create a legacy pass manager instance of the LoadStoreVectorizer pass.
iterator_range< po_iterator< T > > post_order(const T &G)
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
LLVM_ABI Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
generic_gep_type_iterator<> gep_type_iterator
bool isModOrRefSet(const ModRefInfo MRI)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI void initializeLoadStoreVectorizerLegacyPassPass(PassRegistry &)
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
gep_type_iterator gep_type_begin(const User *GEP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.