37#include "llvm/IR/IntrinsicsHexagon.h"
61#define DEBUG_TYPE "hexagon-vc"
66#define DEFAULT_HVX_VTCM_PAGE_SIZE 131072
81 MinLoadGroupSizeForAlignment(
"hvc-ld-min-group-size-for-alignment",
84class HexagonVectorCombine {
90 :
F(F_),
DL(
F.getDataLayout()),
AA(AA_), AC(AC_), DT(DT_), SE(SE_),
101 Type *getByteTy(
int ElemCount = 0)
const;
104 Type *getBoolTy(
int ElemCount = 0)
const;
108 std::optional<APInt> getIntValue(
const Value *Val)
const;
114 bool isTrue(
const Value *Val)
const;
116 bool isFalse(
const Value *Val)
const;
125 int getSizeOf(
const Value *Val, SizeKind Kind = Store)
const;
126 int getSizeOf(
const Type *Ty, SizeKind Kind = Store)
const;
127 int getTypeAlignment(
Type *Ty)
const;
128 size_t length(
Value *Val)
const;
129 size_t length(
Type *Ty)
const;
134 int Length,
int Where)
const;
158 unsigned ToWidth)
const;
162 std::optional<int> calculatePointerDifference(
Value *Ptr0,
Value *Ptr1)
const;
164 unsigned getNumSignificantBits(
const Value *V,
171 template <
typename T = std::vector<Instruction *>>
174 const T &IgnoreInsts = {})
const;
177 [[maybe_unused]]
bool isByteVecTy(
Type *Ty)
const;
191 int Start,
int Length)
const;
210 AlignVectors(
const HexagonVectorCombine &HVC_) : HVC(HVC_) {}
215 using InstList = std::vector<Instruction *>;
219 AddrInfo(
const AddrInfo &) =
default;
220 AddrInfo &operator=(
const AddrInfo &) =
default;
223 : Inst(
I), Addr(
A), ValTy(
T), HaveAlign(
H),
224 NeedAlign(HVC.getTypeAlignment(ValTy)) {}
235 using AddrList = std::vector<AddrInfo>;
239 return A->comesBefore(
B);
242 using DepList = std::set<Instruction *, InstrLess>;
245 MoveGroup(
const AddrInfo &AI,
Instruction *
B,
bool Hvx,
bool Load)
246 :
Base(
B), Main{AI.Inst}, Clones{}, IsHvx(Hvx), IsLoad(Load) {}
247 MoveGroup() =
default;
255 using MoveList = std::vector<MoveGroup>;
275 Segment(
Value *Val,
int Begin,
int Len)
276 : Val(Val), Start(Begin),
Size(Len) {}
277 Segment(
const Segment &Seg) =
default;
278 Segment &operator=(
const Segment &Seg) =
default;
285 Block(
Value *Val,
int Len,
int Pos) : Seg(Val, 0, Len), Pos(Pos) {}
287 : Seg(Val, Off, Len), Pos(Pos) {}
295 ByteSpan section(
int Start,
int Length)
const;
296 ByteSpan &shift(
int Offset);
299 int size()
const {
return Blocks.size(); }
300 Block &operator[](
int i) {
return Blocks[i]; }
301 const Block &operator[](
int i)
const {
return Blocks[i]; }
303 std::vector<Block> Blocks;
306 iterator begin() {
return Blocks.begin(); }
307 iterator end() {
return Blocks.end(); }
313 std::optional<AddrInfo> getAddrInfo(
Instruction &In)
const;
314 bool isHvx(
const AddrInfo &AI)
const;
316 [[maybe_unused]]
bool isSectorTy(
Type *Ty)
const;
324 const InstMap &CloneMap = InstMap())
const;
327 const InstMap &CloneMap = InstMap())
const;
351 bool createAddressGroups();
352 MoveList createLoadGroups(
const AddrList &Group)
const;
353 MoveList createStoreGroups(
const AddrList &Group)
const;
354 bool moveTogether(MoveGroup &Move)
const;
355 template <
typename T>
358 void realignLoadGroup(
IRBuilderBase &Builder,
const ByteSpan &VSpan,
359 int ScLen,
Value *AlignVal,
Value *AlignAddr)
const;
360 void realignStoreGroup(
IRBuilderBase &Builder,
const ByteSpan &VSpan,
361 int ScLen,
Value *AlignVal,
Value *AlignAddr)
const;
362 bool realignGroup(
const MoveGroup &Move);
364 int Alignment)
const;
367 AddrGroupMap AddrGroups;
384 const HexagonVectorCombine &HVC;
388 const AlignVectors::AddrGroupMap &AG) {
389 OS <<
"Printing AddrGroups:"
391 for (
auto &It : AG) {
392 OS <<
"\n\tInstruction: ";
394 OS <<
"\n\tAddrInfo: ";
395 for (
auto &AI : It.second)
402 const AlignVectors::AddrList &AL) {
403 OS <<
"\n *** Addr List: ***\n";
404 for (
auto &AG : AL) {
405 OS <<
"\n *** Addr Group: ***\n";
413 const AlignVectors::AddrInfo &AI) {
414 OS <<
"Inst: " << AI.Inst <<
" " << *AI.Inst <<
'\n';
415 OS <<
"Addr: " << *AI.Addr <<
'\n';
416 OS <<
"Type: " << *AI.ValTy <<
'\n';
417 OS <<
"HaveAlign: " << AI.HaveAlign.
value() <<
'\n';
418 OS <<
"NeedAlign: " << AI.NeedAlign.
value() <<
'\n';
419 OS <<
"Offset: " << AI.Offset;
424 const AlignVectors::MoveList &
ML) {
425 OS <<
"\n *** Move List: ***\n";
426 for (
auto &MG :
ML) {
427 OS <<
"\n *** Move Group: ***\n";
435 const AlignVectors::MoveGroup &MG) {
436 OS <<
"IsLoad:" << (MG.IsLoad ?
"yes" :
"no");
437 OS <<
", IsHvx:" << (MG.IsHvx ?
"yes" :
"no") <<
'\n';
440 OS <<
" " << *
I <<
'\n';
443 OS <<
" " << *
I <<
'\n';
445 for (
auto [K, V] : MG.Clones) {
447 K->printAsOperand(OS,
false);
448 OS <<
"\t-> " << *V <<
'\n';
455 OS <<
" @" <<
B.Pos <<
" [" <<
B.Seg.Start <<
',' <<
B.Seg.Size <<
"] ";
456 if (
B.Seg.Val ==
reinterpret_cast<const Value *
>(&
B)) {
457 OS <<
"(self:" <<
B.Seg.Val <<
')';
458 }
else if (
B.Seg.Val !=
nullptr) {
467 const AlignVectors::ByteSpan &BS) {
468 OS <<
"ByteSpan[size=" << BS.size() <<
", extent=" << BS.extent() <<
'\n';
469 for (
const AlignVectors::ByteSpan::Block &
B : BS)
489 HvxIdioms(
const HexagonVectorCombine &HVC_) : HVC(HVC_) {
490 auto *
Int32Ty = HVC.getIntTy(32);
491 HvxI32Ty = HVC.getHvxTy(
Int32Ty,
false);
492 HvxP32Ty = HVC.getHvxTy(
Int32Ty,
true);
513 std::optional<unsigned> RoundAt;
518 -> std::pair<unsigned, Signedness>;
519 auto canonSgn(SValue
X, SValue
Y)
const -> std::pair<SValue, SValue>;
521 auto matchFxpMul(
Instruction &In)
const -> std::optional<FxpOp>;
525 const FxpOp &
Op)
const ->
Value *;
527 bool Rounding)
const ->
Value *;
529 bool Rounding)
const ->
Value *;
532 Value *CarryIn =
nullptr)
const
533 -> std::pair<Value *, Value *>;
538 -> std::pair<Value *, Value *>;
550 std::optional<uint64_t>
553 std::optional<uint64_t> getPHIBaseMinAlignment(
Instruction &In,
564 const HexagonVectorCombine &HVC;
570 const HvxIdioms::FxpOp &
Op) {
571 static const char *SgnNames[] = {
"Positive",
"Signed",
"Unsigned"};
573 if (
Op.RoundAt.has_value()) {
574 if (
Op.Frac != 0 && *
Op.RoundAt ==
Op.Frac - 1) {
577 OS <<
" + 1<<" << *
Op.RoundAt;
580 OS <<
"\n X:(" << SgnNames[
Op.X.Sgn] <<
") " << *
Op.X.Val <<
"\n"
581 <<
" Y:(" << SgnNames[
Op.Y.Sgn] <<
") " << *
Op.Y.Val;
589template <
typename T>
T *getIfUnordered(
T *MaybeT) {
590 return MaybeT && MaybeT->isUnordered() ? MaybeT :
nullptr;
603template <
typename Pred,
typename T>
void erase_if(
T &&container, Pred p) {
641auto AlignVectors::ByteSpan::extent()
const ->
int {
644 int Min = Blocks[0].Pos;
645 int Max = Blocks[0].Pos + Blocks[0].Seg.Size;
646 for (
int i = 1, e =
size(); i !=
e; ++i) {
647 Min = std::min(Min, Blocks[i].Pos);
648 Max = std::max(Max, Blocks[i].Pos + Blocks[i].Seg.Size);
653auto AlignVectors::ByteSpan::section(
int Start,
int Length)
const -> ByteSpan {
655 for (
const ByteSpan::Block &
B : Blocks) {
656 int L = std::max(
B.Pos, Start);
657 int R = std::min(
B.Pos +
B.Seg.Size, Start +
Length);
660 int Off =
L >
B.Pos ?
L -
B.Pos : 0;
661 Section.Blocks.emplace_back(
B.Seg.Val,
B.Seg.Start + Off, R - L, L);
667auto AlignVectors::ByteSpan::shift(
int Offset) -> ByteSpan & {
674 SmallVector<Value *, 8> Values(Blocks.size());
675 for (
int i = 0, e = Blocks.size(); i != e; ++i)
676 Values[i] = Blocks[i].Seg.Val;
687 return Align(
DL.getABITypeAlign(ValTy).value());
690auto AlignVectors::getAddrInfo(Instruction &In)
const
691 -> std::optional<AddrInfo> {
693 return AddrInfo(HVC, L,
L->getPointerOperand(),
L->getType(),
696 return AddrInfo(HVC, S, S->getPointerOperand(),
697 S->getValueOperand()->getType(), S->getAlign());
701 case Intrinsic::masked_load:
702 return AddrInfo(HVC,
II,
II->getArgOperand(0),
II->getType(),
703 II->getParamAlign(0).valueOrOne());
704 case Intrinsic::masked_store:
705 return AddrInfo(HVC,
II,
II->getArgOperand(1),
706 II->getArgOperand(0)->getType(),
707 II->getParamAlign(1).valueOrOne());
713auto AlignVectors::isHvx(
const AddrInfo &AI)
const ->
bool {
717auto AlignVectors::getPayload(
Value *Val)
const ->
Value * {
721 ID =
II->getIntrinsicID();
723 return In->getOperand(0);
728auto AlignVectors::getMask(
Value *Val)
const ->
Value * {
730 switch (
II->getIntrinsicID()) {
731 case Intrinsic::masked_load:
732 return II->getArgOperand(1);
733 case Intrinsic::masked_store:
734 return II->getArgOperand(2);
738 Type *ValTy = getPayload(Val)->getType();
744auto AlignVectors::getPassThrough(
Value *Val)
const ->
Value * {
746 if (
II->getIntrinsicID() == Intrinsic::masked_load)
747 return II->getArgOperand(2);
752auto AlignVectors::createAdjustedPointer(IRBuilderBase &Builder,
Value *Ptr,
753 Type *ValTy,
int Adjust,
754 const InstMap &CloneMap)
const
757 if (Instruction *New = CloneMap.lookup(
I))
759 return Builder.CreatePtrAdd(Ptr, HVC.getConstInt(Adjust),
"gep");
762auto AlignVectors::createAlignedPointer(IRBuilderBase &Builder,
Value *Ptr,
763 Type *ValTy,
int Alignment,
764 const InstMap &CloneMap)
const
768 for (
auto [Old, New] : CloneMap)
769 I->replaceUsesOfWith(Old, New);
774 Value *AsInt = Builder.CreatePtrToInt(Ptr, HVC.getIntTy(),
"pti");
775 Value *
Mask = HVC.getConstInt(-Alignment);
776 Value *
And = Builder.CreateAnd(remap(AsInt), Mask,
"and");
777 return Builder.CreateIntToPtr(
781auto AlignVectors::createLoad(IRBuilderBase &Builder,
Type *ValTy,
Value *Ptr,
788 "Expectning scalar predicate");
789 if (HVC.isFalse(Predicate))
791 if (!HVC.isTrue(Predicate)) {
792 Value *
Load = createPredicatedLoad(Builder, ValTy, Ptr, Predicate,
793 Alignment, MDSources);
794 return Builder.CreateSelect(Mask, Load, PassThru);
798 assert(!HVC.isUndef(Mask));
799 if (HVC.isZero(Mask))
803 if (HVC.isTrue(Mask))
804 return createSimpleLoad(Builder, ValTy, Ptr, EffA.
value(), MDSources);
807 Builder.CreateMaskedLoad(ValTy, Ptr, EffA, Mask, PassThru,
"mld");
813auto AlignVectors::createSimpleLoad(IRBuilderBase &Builder,
Type *ValTy,
814 Value *Ptr,
int Alignment,
818 Instruction *
Load = Builder.CreateAlignedLoad(ValTy, Ptr, EffA,
"ald");
824auto AlignVectors::createPredicatedLoad(IRBuilderBase &Builder,
Type *ValTy,
830 "Predicates 'scalar' vector loads not yet supported");
832 assert(!
Predicate->getType()->isVectorTy() &&
"Expectning scalar predicate");
834 assert(HVC.getSizeOf(ValTy, HVC.Alloc) % EffA.
value() == 0);
836 if (HVC.isFalse(Predicate))
838 if (HVC.isTrue(Predicate))
839 return createSimpleLoad(Builder, ValTy, Ptr, EffA.
value(), MDSources);
841 auto V6_vL32b_pred_ai = HVC.HST.
getIntrinsicId(Hexagon::V6_vL32b_pred_ai);
843 return HVC.createHvxIntrinsic(Builder, V6_vL32b_pred_ai, ValTy,
844 {
Predicate, Ptr, HVC.getConstInt(0)}, {},
848auto AlignVectors::createStore(IRBuilderBase &Builder,
Value *Val,
Value *Ptr,
851 if (HVC.isZero(Mask) || HVC.isUndef(Val) || HVC.isUndef(Mask))
854 "Expectning scalar predicate"));
856 if (HVC.isFalse(Predicate))
858 if (HVC.isTrue(Predicate))
863 if (HVC.isTrue(Mask)) {
865 return createPredicatedStore(Builder, Val, Ptr, Predicate, Alignment,
869 return createSimpleStore(Builder, Val, Ptr, Alignment, MDSources);
875 Builder.CreateMaskedStore(Val, Ptr,
Align(Alignment), Mask);
882 Value *PredLoad = createPredicatedLoad(Builder, Val->getType(), Ptr,
883 Predicate, Alignment, MDSources);
884 Value *Mux = Builder.CreateSelect(Mask, Val, PredLoad);
885 return createPredicatedStore(Builder, Mux, Ptr, Predicate, Alignment,
889auto AlignVectors::createSimpleStore(IRBuilderBase &Builder,
Value *Val,
890 Value *Ptr,
int Alignment,
900auto AlignVectors::createPredicatedStore(IRBuilderBase &Builder,
Value *Val,
907 "Predicates 'scalar' vector stores not yet supported");
909 if (HVC.isFalse(Predicate))
911 if (HVC.isTrue(Predicate))
912 return createSimpleStore(Builder, Val, Ptr, EffA.
value(), MDSources);
914 assert(HVC.getSizeOf(Val, HVC.Alloc) % EffA.
value() == 0);
915 auto V6_vS32b_pred_ai = HVC.HST.
getIntrinsicId(Hexagon::V6_vS32b_pred_ai);
917 return HVC.createHvxIntrinsic(Builder, V6_vS32b_pred_ai,
nullptr,
918 {
Predicate, Ptr, HVC.getConstInt(0), Val}, {},
922auto AlignVectors::getUpwardDeps(Instruction *In, Instruction *
Base)
const
926 "Base and In should be in the same block");
927 assert(
Base->comesBefore(In) &&
"Base should come before In");
930 std::deque<Instruction *> WorkQ = {
In};
931 while (!WorkQ.empty()) {
938 if (
I->getParent() == Parent &&
Base->comesBefore(
I))
946auto AlignVectors::createAddressGroups() ->
bool {
951 auto findBaseAndOffset = [&](AddrInfo &AI) -> std::pair<Instruction *, int> {
952 for (AddrInfo &W : WorkStack) {
953 if (
auto D = HVC.calculatePointerDifference(AI.Addr,
W.Addr))
954 return std::make_pair(
W.Inst, *
D);
956 return std::make_pair(
nullptr, 0);
959 auto traverseBlock = [&](
DomTreeNode *DomN,
auto Visit) ->
void {
961 for (Instruction &
I :
Block) {
962 auto AI = this->getAddrInfo(
I);
965 auto F = findBaseAndOffset(*AI);
967 if (Instruction *BI =
F.first) {
968 AI->Offset =
F.second;
971 WorkStack.push_back(*AI);
972 GroupInst = AI->Inst;
974 AddrGroups[GroupInst].push_back(*AI);
980 while (!WorkStack.empty() && WorkStack.back().Inst->getParent() == &
Block)
981 WorkStack.pop_back();
984 traverseBlock(HVC.DT.
getRootNode(), traverseBlock);
985 assert(WorkStack.empty());
989 AddrGroups.
remove_if([](
auto &
G) {
return G.second.size() == 1; });
993 G.second, [&](
auto &
I) { return HVC.HST.isTypeForHVX(I.ValTy); });
997 return !AddrGroups.
empty();
1000auto AlignVectors::createLoadGroups(
const AddrList &Group)
const -> MoveList {
1008 auto tryAddTo = [&](
const AddrInfo &
Info, MoveGroup &Move) {
1009 assert(!Move.Main.empty() &&
"Move group should have non-empty Main");
1011 HVC.ORE.
emit([&]() {
1012 return OptimizationRemarkMissed(
DEBUG_TYPE,
"GroupSizeLimitExceeded",
1013 Info.Inst->getDebugLoc(),
1014 Info.Inst->getParent())
1015 <<
"alignment group exceeds size limit";
1020 if (Move.IsHvx != isHvx(Info))
1024 if (
Base->getParent() !=
Info.Inst->getParent())
1027 if (!HVC.isSafeToMoveBeforeInBB(*
Info.Inst,
Base->getIterator())) {
1028 HVC.ORE.
emit([&]() {
1029 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsafeToRelocate",
1030 Info.Inst->getDebugLoc(),
1031 Info.Inst->getParent())
1032 <<
"unsafe to relocate memory access for alignment";
1038 return HVC.isSafeToMoveBeforeInBB(*
I,
Base->getIterator()) &&
1039 HVC.isSafeToClone(*
I);
1041 DepList Deps = getUpwardDeps(
Info.Inst,
Base);
1045 Move.Main.push_back(
Info.Inst);
1050 MoveList LoadGroups;
1052 for (
const AddrInfo &Info : Group) {
1053 if (!
Info.Inst->mayReadFromMemory())
1055 if (LoadGroups.empty() || !tryAddTo(Info, LoadGroups.back()))
1056 LoadGroups.emplace_back(Info, Group.front().Inst, isHvx(Info),
true);
1060 unsigned LoadGroupSizeLimit = MinLoadGroupSizeForAlignment;
1061 erase_if(LoadGroups, [LoadGroupSizeLimit](
const MoveGroup &
G) {
1062 return G.Main.size() < LoadGroupSizeLimit;
1068 llvm::any_of(LoadGroups, [](
const MoveGroup &
G) {
return G.IsHvx; });
1069 erase_if(LoadGroups, [](
const MoveGroup &
G) {
return G.IsHvx; });
1071 HVC.ORE.
emit([&]() {
1072 return OptimizationRemarkMissed(
DEBUG_TYPE,
"HvxVersionTooLow",
1074 <<
"HVX version too low for predicated load operations";
1083auto AlignVectors::createStoreGroups(
const AddrList &Group)
const -> MoveList {
1091 auto tryAddTo = [&](
const AddrInfo &
Info, MoveGroup &Move) {
1092 assert(!Move.Main.empty() &&
"Move group should have non-empty Main");
1094 HVC.ORE.
emit([&]() {
1095 return OptimizationRemarkMissed(
DEBUG_TYPE,
"GroupSizeLimitExceeded",
1096 Info.Inst->getDebugLoc(),
1097 Info.Inst->getParent())
1098 <<
"alignment group exceeds size limit";
1105 "Not handling stores with return values");
1107 if (Move.IsHvx != isHvx(Info))
1113 if (
Base->getParent() !=
Info.Inst->getParent())
1115 if (!HVC.isSafeToMoveBeforeInBB(*
Info.Inst,
Base->getIterator(),
1117 HVC.ORE.
emit([&]() {
1118 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsafeToRelocate",
1119 Info.Inst->getDebugLoc(),
1120 Info.Inst->getParent())
1121 <<
"unsafe to relocate memory access for alignment";
1125 Move.Main.push_back(
Info.Inst);
1129 MoveList StoreGroups;
1131 for (
auto I = Group.rbegin(),
E = Group.rend();
I !=
E; ++
I) {
1132 const AddrInfo &
Info = *
I;
1133 if (!
Info.Inst->mayWriteToMemory())
1135 if (StoreGroups.empty() || !tryAddTo(Info, StoreGroups.back()))
1136 StoreGroups.emplace_back(Info, Group.front().Inst, isHvx(Info),
false);
1140 erase_if(StoreGroups, [](
const MoveGroup &
G) {
return G.Main.size() <= 1; });
1145 llvm::any_of(StoreGroups, [](
const MoveGroup &
G) {
return G.IsHvx; });
1146 erase_if(StoreGroups, [](
const MoveGroup &
G) {
return G.IsHvx; });
1148 HVC.ORE.
emit([&]() {
1149 return OptimizationRemarkMissed(
DEBUG_TYPE,
"HvxVersionTooLow",
1151 <<
"HVX version too low for predicated store operations";
1159 if (!VADoFullStores) {
1160 erase_if(StoreGroups, [
this](
const MoveGroup &
G) {
1162 auto MaybeInfo = this->getAddrInfo(*S);
1163 assert(MaybeInfo.has_value());
1164 return HVC.HST.isHVXVectorType(
1165 EVT::getEVT(MaybeInfo->ValTy, false));
1173auto AlignVectors::moveTogether(MoveGroup &Move)
const ->
bool {
1175 assert(!Move.Main.empty() &&
"Move group should have non-empty Main");
1181 Move.Clones = cloneBefore(Where->
getIterator(), Move.Deps);
1184 for (Instruction *M : Main) {
1186 M->moveAfter(Where);
1187 for (
auto [Old, New] : Move.Clones)
1188 M->replaceUsesOfWith(Old, New);
1192 for (
int i = 0, e = Move.Deps.size(); i != e; ++i)
1193 Move.Deps[i] = Move.Clones[Move.Deps[i]];
1198 assert(Move.Deps.empty());
1201 for (Instruction *M : Main.drop_front(1)) {
1207 return Move.Main.size() + Move.Deps.size() > 1;
1210template <
typename T>
1215 for (Instruction *
I : Insts) {
1216 assert(HVC.isSafeToClone(*
I));
1218 C->setName(Twine(
"c.") +
I->getName() +
".");
1219 C->insertBefore(To);
1221 for (
auto [Old, New] : Map)
1222 C->replaceUsesOfWith(Old, New);
1223 Map.insert(std::make_pair(
I,
C));
1228auto AlignVectors::realignLoadGroup(IRBuilderBase &Builder,
1229 const ByteSpan &VSpan,
int ScLen,
1234 Type *SecTy = HVC.getByteTy(ScLen);
1235 int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;
1236 bool DoAlign = !HVC.isZero(AlignVal);
1238 BasicBlock *BaseBlock = Builder.GetInsertBlock();
1268 for (
int Index = 0;
Index != NumSectors; ++
Index)
1269 ASpan.Blocks.emplace_back(
nullptr, ScLen, Index * ScLen);
1270 for (
int Index = 0;
Index != NumSectors; ++
Index) {
1271 ASpan.Blocks[
Index].Seg.Val =
1272 reinterpret_cast<Value *
>(&ASpan.Blocks[
Index]);
1278 DenseMap<void *, Instruction *> EarliestUser;
1284 assert(
A->getParent() ==
B->getParent());
1285 return A->comesBefore(
B);
1287 auto earliestUser = [&](
const auto &
Uses) {
1289 for (
const Use &U :
Uses) {
1291 assert(
I !=
nullptr &&
"Load used in a non-instruction?");
1295 if (
I->getParent() == BaseBlock) {
1297 User = std::min(User,
I, isEarlier);
1305 for (
const ByteSpan::Block &
B : VSpan) {
1306 ByteSpan ASection = ASpan.section(
B.Pos,
B.Seg.Size);
1307 for (
const ByteSpan::Block &S : ASection) {
1308 auto &EU = EarliestUser[S.Seg.Val];
1309 EU = std::min(EU, earliestUser(
B.Seg.Val->uses()), isEarlier);
1314 dbgs() <<
"ASpan:\n" << ASpan <<
'\n';
1315 dbgs() <<
"Earliest users of ASpan:\n";
1316 for (
auto &[Val, User] : EarliestUser) {
1317 dbgs() << Val <<
"\n ->" << *
User <<
'\n';
1321 auto createLoad = [&](IRBuilderBase &Builder,
const ByteSpan &VSpan,
1322 int Index,
bool MakePred) {
1324 createAdjustedPointer(Builder, AlignAddr, SecTy, Index * ScLen);
1326 MakePred ? makeTestIfUnaligned(Builder, AlignVal, ScLen) : nullptr;
1331 int Width = (1 + DoAlign) * ScLen;
1332 return this->createLoad(Builder, SecTy, Ptr, Predicate, ScLen, True,
Undef,
1333 VSpan.section(Start, Width).values());
1338 assert(
In->getParent() == To->getParent());
1339 DepList Deps = getUpwardDeps(&*In, &*To);
1342 InstMap
Map = cloneBefore(In, Deps);
1343 for (
auto [Old, New] : Map)
1344 In->replaceUsesOfWith(Old, New);
1349 for (
int Index = 0;
Index != NumSectors + 1; ++
Index) {
1357 DoAlign &&
Index > 0 ? EarliestUser[&ASpan[
Index - 1]] :
nullptr;
1359 Index < NumSectors ? EarliestUser[&ASpan[
Index]] :
nullptr;
1360 if (
auto *Where = std::min(PrevAt, ThisAt, isEarlier)) {
1363 createLoad(Builder, VSpan, Index, DoAlign && Index == NumSectors);
1371 if (!HVC.isSafeToMoveBeforeInBB(*Load, BasePos))
1372 moveBefore(
Load->getIterator(), BasePos);
1374 LLVM_DEBUG(
dbgs() <<
"Loads[" << Index <<
"]:" << *Loads[Index] <<
'\n');
1380 for (
int Index = 0;
Index != NumSectors; ++
Index) {
1381 ASpan[
Index].Seg.Val =
nullptr;
1382 if (
auto *Where = EarliestUser[&ASpan[Index]]) {
1388 assert(NextLoad !=
nullptr);
1389 Val = HVC.vralignb(Builder, Val, NextLoad, AlignVal);
1391 ASpan[
Index].Seg.Val = Val;
1396 for (
const ByteSpan::Block &
B : VSpan) {
1397 ByteSpan ASection = ASpan.section(
B.Pos,
B.Seg.Size).shift(-
B.Pos);
1404 std::vector<ByteSpan::Block *> ABlocks;
1405 for (ByteSpan::Block &S : ASection) {
1406 if (S.Seg.Val !=
nullptr)
1407 ABlocks.push_back(&S);
1410 [&](
const ByteSpan::Block *
A,
const ByteSpan::Block *
B) {
1414 for (ByteSpan::Block *S : ABlocks) {
1419 Value *Pay = HVC.vbytes(Builder, getPayload(S->Seg.Val));
1421 HVC.insertb(Builder, Accum, Pay, S->Seg.Start, S->Seg.Size, S->Pos);
1429 Type *ValTy = getPayload(
B.Seg.Val)->getType();
1432 getPassThrough(
B.Seg.Val),
"sel");
1437auto AlignVectors::realignStoreGroup(IRBuilderBase &Builder,
1438 const ByteSpan &VSpan,
int ScLen,
1443 Type *SecTy = HVC.getByteTy(ScLen);
1444 int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;
1445 bool DoAlign = !HVC.isZero(AlignVal);
1448 ByteSpan ASpanV, ASpanM;
1452 auto MakeVec = [](IRBuilderBase &Builder,
Value *Val) ->
Value * {
1456 auto *VecTy = VectorType::get(Ty, 1,
false);
1462 for (
int Index = (DoAlign ? -1 : 0);
Index != NumSectors + DoAlign; ++
Index) {
1466 VSpan.section(Index * ScLen, ScLen).shift(-Index * ScLen);
1471 for (ByteSpan::Block &S : VSection) {
1472 Value *Pay = getPayload(S.Seg.Val);
1474 Pay->
getType(), HVC.getByteTy());
1475 Value *PartM = HVC.insertb(Builder, Zero, HVC.vbytes(Builder, Mask),
1476 S.Seg.Start, S.Seg.Size, S.Pos);
1477 AccumM = Builder.
CreateOr(AccumM, PartM);
1479 Value *PartV = HVC.insertb(Builder,
Undef, HVC.vbytes(Builder, Pay),
1480 S.Seg.Start, S.Seg.Size, S.Pos);
1485 ASpanV.Blocks.emplace_back(AccumV, ScLen, Index * ScLen);
1486 ASpanM.Blocks.emplace_back(AccumM, ScLen, Index * ScLen);
1490 dbgs() <<
"ASpanV before vlalign:\n" << ASpanV <<
'\n';
1491 dbgs() <<
"ASpanM before vlalign:\n" << ASpanM <<
'\n';
1496 for (
int Index = 1;
Index != NumSectors + 2; ++
Index) {
1497 Value *PrevV = ASpanV[
Index - 1].Seg.Val, *ThisV = ASpanV[
Index].Seg.Val;
1498 Value *PrevM = ASpanM[
Index - 1].Seg.Val, *ThisM = ASpanM[
Index].Seg.Val;
1500 ASpanV[
Index - 1].Seg.Val = HVC.vlalignb(Builder, PrevV, ThisV, AlignVal);
1501 ASpanM[
Index - 1].Seg.Val = HVC.vlalignb(Builder, PrevM, ThisM, AlignVal);
1506 dbgs() <<
"ASpanV after vlalign:\n" << ASpanV <<
'\n';
1507 dbgs() <<
"ASpanM after vlalign:\n" << ASpanM <<
'\n';
1510 auto createStore = [&](IRBuilderBase &Builder,
const ByteSpan &ASpanV,
1511 const ByteSpan &ASpanM,
int Index,
bool MakePred) {
1514 if (HVC.isUndef(Val) || HVC.isZero(Mask))
1517 createAdjustedPointer(Builder, AlignAddr, SecTy, Index * ScLen);
1519 MakePred ? makeTestIfUnaligned(Builder, AlignVal, ScLen) : nullptr;
1524 int Width = (1 + DoAlign) * ScLen;
1525 this->createStore(Builder, Val, Ptr, Predicate, ScLen,
1526 HVC.vlsb(Builder, Mask),
1527 VSpan.section(Start, Width).values());
1530 for (
int Index = 0;
Index != NumSectors + DoAlign; ++
Index) {
1531 createStore(Builder, ASpanV, ASpanM, Index, DoAlign && Index == NumSectors);
1535auto AlignVectors::realignGroup(
const MoveGroup &Move) ->
bool {
1544 auto getMaxOf = [](
auto Range,
auto GetValue) {
1546 return GetValue(
A) < GetValue(
B);
1550 AddrList &BaseInfos = AddrGroups[Move.Base];
1565 std::set<Instruction *> TestSet(Move.Main.begin(), Move.Main.end());
1569 BaseInfos, std::back_inserter(MoveInfos),
1570 [&TestSet](
const AddrInfo &AI) {
return TestSet.count(AI.Inst); });
1573 const AddrInfo &WithMaxAlign =
1574 getMaxOf(MoveInfos, [](
const AddrInfo &AI) {
return AI.HaveAlign; });
1575 Align MaxGiven = WithMaxAlign.HaveAlign;
1578 const AddrInfo &WithMinOffset =
1579 getMaxOf(MoveInfos, [](
const AddrInfo &AI) {
return -AI.Offset; });
1581 const AddrInfo &WithMaxNeeded =
1582 getMaxOf(MoveInfos, [](
const AddrInfo &AI) {
return AI.NeedAlign; });
1583 Align MinNeeded = WithMaxNeeded.NeedAlign;
1596 InstSimplifyFolder(HVC.DL));
1597 Value *AlignAddr =
nullptr;
1598 Value *AlignVal =
nullptr;
1600 if (MinNeeded <= MaxGiven) {
1601 int Start = WithMinOffset.Offset;
1602 int OffAtMax = WithMaxAlign.Offset;
1609 int Adjust = -
alignTo(OffAtMax - Start, MinNeeded.value());
1610 AlignAddr = createAdjustedPointer(Builder, WithMaxAlign.Addr,
1611 WithMaxAlign.ValTy, Adjust, Move.Clones);
1612 int Diff =
Start - (OffAtMax + Adjust);
1613 AlignVal = HVC.getConstInt(Diff);
1615 assert(
static_cast<decltype(MinNeeded.value())
>(Diff) < MinNeeded.value());
1625 createAlignedPointer(Builder, WithMinOffset.Addr, WithMinOffset.ValTy,
1626 MinNeeded.value(), Move.Clones);
1628 Builder.
CreatePtrToInt(WithMinOffset.Addr, HVC.getIntTy(),
"pti");
1630 for (
auto [Old, New] : Move.Clones)
1631 I->replaceUsesOfWith(Old, New);
1636 for (
const AddrInfo &AI : MoveInfos) {
1637 VSpan.Blocks.emplace_back(AI.Inst, HVC.getSizeOf(AI.ValTy),
1638 AI.Offset - WithMinOffset.Offset);
1645 : std::max<int>(MinNeeded.value(), 4);
1646 assert(!Move.IsHvx || ScLen == 64 || ScLen == 128);
1647 assert(Move.IsHvx || ScLen == 4 || ScLen == 8);
1650 dbgs() <<
"ScLen: " << ScLen <<
"\n";
1651 dbgs() <<
"AlignVal:" << *AlignVal <<
"\n";
1652 dbgs() <<
"AlignAddr:" << *AlignAddr <<
"\n";
1653 dbgs() <<
"VSpan:\n" << VSpan <<
'\n';
1657 realignLoadGroup(Builder, VSpan, ScLen, AlignVal, AlignAddr);
1659 realignStoreGroup(Builder, VSpan, ScLen, AlignVal, AlignAddr);
1662 HVC.ORE.
emit([&]() {
1663 return OptimizationRemark(
DEBUG_TYPE,
"VectorsAligned",
1665 <<
"aligned vector memory operations";
1668 for (
auto *Inst : Move.Main)
1669 Inst->eraseFromParent();
1674auto AlignVectors::makeTestIfUnaligned(IRBuilderBase &Builder,
Value *AlignVal,
1675 int Alignment)
const ->
Value * {
1676 auto *AlignTy = AlignVal->getType();
1678 AlignVal, ConstantInt::get(AlignTy, Alignment - 1),
"and");
1679 Value *
Zero = ConstantInt::get(AlignTy, 0);
1683auto AlignVectors::isSectorTy(
Type *Ty)
const ->
bool {
1684 if (!HVC.isByteVecTy(Ty))
1686 int Size = HVC.getSizeOf(Ty);
1692auto AlignVectors::run() ->
bool {
1695 if (!createAddressGroups())
1699 dbgs() <<
"Address groups(" << AddrGroups.
size() <<
"):\n";
1700 for (
auto &[In, AL] : AddrGroups) {
1701 for (
const AddrInfo &AI : AL)
1702 dbgs() <<
"---\n" << AI <<
'\n';
1707 MoveList LoadGroups, StoreGroups;
1709 for (
auto &
G : AddrGroups) {
1715 dbgs() <<
"\nLoad groups(" << LoadGroups.size() <<
"):\n";
1716 for (
const MoveGroup &
G : LoadGroups)
1717 dbgs() <<
G <<
"\n";
1718 dbgs() <<
"Store groups(" << StoreGroups.size() <<
"):\n";
1719 for (
const MoveGroup &
G : StoreGroups)
1720 dbgs() <<
G <<
"\n";
1724 unsigned CountLimit = VAGroupCountLimit;
1725 if (CountLimit == 0)
1728 if (LoadGroups.size() > CountLimit) {
1729 LoadGroups.resize(CountLimit);
1730 StoreGroups.clear();
1732 unsigned StoreLimit = CountLimit - LoadGroups.size();
1733 if (StoreGroups.size() > StoreLimit)
1734 StoreGroups.resize(StoreLimit);
1737 for (
auto &M : LoadGroups)
1739 for (
auto &M : StoreGroups)
1744 for (
auto &M : LoadGroups)
1746 for (
auto &M : StoreGroups)
1756auto HvxIdioms::getNumSignificantBits(
Value *V, Instruction *In)
const
1757 -> std::pair<unsigned, Signedness> {
1758 unsigned Bits = HVC.getNumSignificantBits(V, In);
1764 KnownBits Known = HVC.getKnownBits(V, In);
1765 Signedness Sign =
Signed;
1766 unsigned NumToTest = 0;
1770 NumToTest =
Bits - 1;
1783 return {
Bits, Sign};
1786auto HvxIdioms::canonSgn(SValue
X, SValue
Y)
const
1787 -> std::pair<SValue, SValue> {
1800auto HvxIdioms::matchFxpMul(Instruction &In)
const -> std::optional<FxpOp> {
1801 using namespace PatternMatch;
1802 auto *Ty =
In.getType();
1805 return std::nullopt;
1814 auto m_Shr = [](
auto &&
V,
auto &&S) {
1826 if (
Op.Frac > Width)
1827 return std::nullopt;
1834 return std::nullopt;
1842 Op.Opcode = Instruction::Mul;
1844 Op.X.Sgn = getNumSignificantBits(
Op.X.Val, &In).second;
1845 Op.Y.Sgn = getNumSignificantBits(
Op.Y.Val, &In).second;
1850 return std::nullopt;
1853auto HvxIdioms::processFxpMul(Instruction &In,
const FxpOp &
Op)
const
1855 assert(
Op.X.Val->getType() ==
Op.Y.Val->getType());
1858 if (VecTy ==
nullptr)
1861 unsigned ElemWidth = ElemTy->getBitWidth();
1864 if ((HVC.length(VecTy) * ElemWidth) % (8 * HVC.HST.
getVectorLength()) != 0)
1874 if (ElemWidth <= 32 &&
Op.Frac == 0)
1877 auto [BitsX, SignX] = getNumSignificantBits(
Op.X.Val, &In);
1878 auto [BitsY, SignY] = getNumSignificantBits(
Op.Y.Val, &In);
1884 InstSimplifyFolder(HVC.DL));
1886 auto roundUpWidth = [](
unsigned Width) ->
unsigned {
1892 if (Width > 32 && Width % 32 != 0) {
1899 BitsX = roundUpWidth(BitsX);
1900 BitsY = roundUpWidth(BitsY);
1905 unsigned Width = std::max(BitsX, BitsY);
1907 auto *ResizeTy = VectorType::get(HVC.getIntTy(Width), VecTy);
1908 if (Width < ElemWidth) {
1911 }
else if (Width > ElemWidth) {
1918 assert(
X->getType() ==
Y->getType() &&
X->getType() == ResizeTy);
1920 unsigned VecLen = HVC.length(ResizeTy);
1921 unsigned ChopLen = (8 * HVC.HST.
getVectorLength()) / std::min(Width, 32u);
1925 ChopOp.ResTy = VectorType::get(
Op.ResTy->getElementType(), ChopLen,
false);
1927 for (
unsigned V = 0;
V != VecLen / ChopLen; ++
V) {
1928 ChopOp.X.Val = HVC.subvector(Builder,
X, V * ChopLen, ChopLen);
1929 ChopOp.Y.Val = HVC.subvector(Builder,
Y, V * ChopLen, ChopLen);
1930 Results.push_back(processFxpMulChopped(Builder, In, ChopOp));
1945inline bool HvxIdioms::matchScatter(Instruction &In)
const {
1949 return (
II->getIntrinsicID() == Intrinsic::masked_scatter);
1952inline bool HvxIdioms::matchGather(Instruction &In)
const {
1956 return (
II->getIntrinsicID() == Intrinsic::masked_gather);
1959inline bool HvxIdioms::matchMLoad(Instruction &In)
const {
1963 return (
II->getIntrinsicID() == Intrinsic::masked_load);
1966inline bool HvxIdioms::matchMStore(Instruction &In)
const {
1970 return (
II->getIntrinsicID() == Intrinsic::masked_store);
1978 case Instruction::Add:
1979 case Instruction::Sub:
1980 case Instruction::Mul:
1981 case Instruction::And:
1982 case Instruction::Or:
1983 case Instruction::Xor:
1984 case Instruction::AShr:
1985 case Instruction::LShr:
1986 case Instruction::Shl:
1987 case Instruction::UDiv:
1995 assert(Ptr &&
"Unable to extract pointer");
2001 if (
II->getIntrinsicID() == Intrinsic::masked_store)
2002 return II->getOperand(1);
2008 HvxIdioms::DstQualifier &Qual) {
2014 Qual = HvxIdioms::LdSt;
2016 if (
II->getIntrinsicID() == Intrinsic::masked_gather) {
2018 Qual = HvxIdioms::LLVM_Gather;
2019 }
else if (
II->getIntrinsicID() == Intrinsic::masked_scatter) {
2021 Qual = HvxIdioms::LLVM_Scatter;
2022 }
else if (
II->getIntrinsicID() == Intrinsic::masked_store) {
2024 Qual = HvxIdioms::LdSt;
2025 }
else if (
II->getIntrinsicID() ==
2026 Intrinsic::hexagon_V6_vgather_vscattermh) {
2028 Qual = HvxIdioms::HEX_Gather_Scatter;
2029 }
else if (
II->getIntrinsicID() == Intrinsic::hexagon_V6_vscattermh_128B) {
2031 Qual = HvxIdioms::HEX_Scatter;
2032 }
else if (
II->getIntrinsicID() == Intrinsic::hexagon_V6_vgathermh_128B) {
2034 Qual = HvxIdioms::HEX_Gather;
2042 Qual = HvxIdioms::Call;
2047 Qual = HvxIdioms::Arithmetic;
2067 for (
auto &U : In->uses()) {
2071 Users.push_back(Destination);
2083 assert(In &&
"Bad instruction");
2087 "Not a gather Intrinsic");
2116 if (
II &&
II->getIntrinsicID() == Intrinsic::masked_gather)
2126 auto *Src = IE->getOperand(1);
2141 LLVM_DEBUG(
dbgs() <<
" Unable to locate Address from intrinsic\n");
2153 if (
II->getIntrinsicID() == Intrinsic::masked_load)
2154 return II->getType();
2155 if (
II->getIntrinsicID() == Intrinsic::masked_store)
2156 return II->getOperand(0)->getType();
2158 return In->getType();
2167 if (
II->getIntrinsicID() == Intrinsic::masked_load)
2169 if (
II->getIntrinsicID() == Intrinsic::masked_gather)
2181 return cstDataVector;
2183 return GEPIndex->getOperand(0);
2199 LLVM_DEBUG(
dbgs() <<
" Unable to locate Index from intrinsic\n");
2209 assert(
I &&
"Unable to reinterprete cast");
2210 Type *NT = HVC.getHvxTy(HVC.getIntTy(32),
false);
2211 std::vector<unsigned> shuffleMask;
2212 for (
unsigned i = 0; i < 64; ++i)
2213 shuffleMask.push_back(i);
2215 Value *CastShuffle =
2216 Builder.CreateShuffleVector(
I,
I, Mask,
"identity_shuffle");
2217 return Builder.CreateBitCast(CastShuffle, NT,
"cst64_i16_to_32_i32");
2224 assert(
I &&
"Unable to reinterprete cast");
2225 Type *NT = HVC.getHvxTy(HVC.getIntTy(32),
false);
2226 std::vector<unsigned> shuffleMask;
2227 for (
unsigned i = 0; i < 128; ++i)
2228 shuffleMask.push_back(i);
2230 Value *CastShuffle =
2231 Builder.CreateShuffleVector(
I,
I, Mask,
"identity_shuffle");
2232 return Builder.CreateBitCast(CastShuffle, NT,
"cst128_i8_to_32_i32");
2238 unsigned int pattern) {
2239 std::vector<unsigned int> byteMask;
2240 for (
unsigned i = 0; i < 32; ++i)
2241 byteMask.push_back(pattern);
2243 return Builder.CreateIntrinsic(
2245 {llvm::ConstantDataVector::get(Ctx, byteMask), HVC.getConstInt(~0)},
2249Value *HvxIdioms::processVScatter(Instruction &In)
const {
2251 assert(InpTy &&
"Cannot handle no vector type for llvm.scatter/gather");
2252 unsigned InpSize = HVC.getSizeOf(InpTy);
2253 auto *
F =
In.getFunction();
2254 LLVMContext &Ctx =
F->getContext();
2256 assert(ElemTy &&
"llvm.scatter needs integer type argument");
2259 unsigned Elements = HVC.length(InpTy);
2260 dbgs() <<
"\n[Process scatter](" <<
In <<
")\n" << *
In.getParent() <<
"\n";
2261 dbgs() <<
" Input type(" << *InpTy <<
") elements(" <<
Elements
2262 <<
") VecLen(" << InpSize <<
") type(" << *ElemTy <<
") ElemWidth("
2263 << ElemWidth <<
")\n";
2267 InstSimplifyFolder(HVC.DL));
2269 auto *ValueToScatter =
In.getOperand(0);
2270 LLVM_DEBUG(
dbgs() <<
" ValueToScatter : " << *ValueToScatter <<
"\n");
2274 <<
") for vscatter\n");
2299 Value *CastIndex =
nullptr;
2300 if (cstDataVector) {
2302 Type *IndexVectorType = HVC.getHvxTy(HVC.getIntTy(32),
false);
2303 AllocaInst *IndexesAlloca = Builder.
CreateAlloca(IndexVectorType);
2304 [[maybe_unused]]
auto *StoreIndexes =
2305 Builder.
CreateStore(cstDataVector, IndexesAlloca);
2306 LLVM_DEBUG(
dbgs() <<
" StoreIndexes : " << *StoreIndexes <<
"\n");
2308 Builder.
CreateLoad(IndexVectorType, IndexesAlloca,
"reload_index");
2313 CastIndex = Indexes;
2317 if (ElemWidth == 1) {
2320 Type *
NT = HVC.getHvxTy(HVC.getIntTy(32),
false);
2326 HVC.getHvxTy(HVC.getIntTy(32),
true), V6_vunpack, CastIndexes,
nullptr);
2327 LLVM_DEBUG(
dbgs() <<
" UnpackedIndexes : " << *UnpackedIndexes <<
")\n");
2331 [[maybe_unused]]
Value *IndexHi =
2332 HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedIndexes);
2333 [[maybe_unused]]
Value *IndexLo =
2334 HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedIndexes);
2342 HVC.getHvxTy(HVC.getIntTy(32),
true), V6_vunpack, CastSrc,
nullptr);
2343 LLVM_DEBUG(
dbgs() <<
" UnpackedValToScat: " << *UnpackedValueToScatter
2346 [[maybe_unused]]
Value *UVSHi =
2347 HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedValueToScatter);
2348 [[maybe_unused]]
Value *UVSLo =
2349 HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedValueToScatter);
2354 auto *QByteMask =
get_i32_Mask(HVC, Builder, Ctx, 0x00ff00ff);
2357 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermhq_128B,
2363 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermhq_128B,
2367 }
else if (ElemWidth == 2) {
2372 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermh_128B,
2376 }
else if (ElemWidth == 4) {
2378 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermw_128B,
2388Value *HvxIdioms::processVGather(Instruction &In)
const {
2389 [[maybe_unused]]
auto *InpTy =
2391 assert(InpTy &&
"Cannot handle no vector type for llvm.gather");
2392 [[maybe_unused]]
auto *ElemTy =
2394 assert(ElemTy &&
"llvm.gather needs vector of ptr argument");
2395 auto *
F =
In.getFunction();
2396 LLVMContext &Ctx =
F->getContext();
2398 << *
In.getParent() <<
"\n");
2400 << HVC.length(InpTy) <<
") VecLen(" << HVC.getSizeOf(InpTy)
2401 <<
") type(" << *ElemTy <<
") Access alignment("
2402 << *
In.getOperand(1) <<
") AddressSpace("
2403 << ElemTy->getAddressSpace() <<
")\n");
2407 "llvm.gather needs vector for mask");
2409 InstSimplifyFolder(HVC.DL));
2414 HvxIdioms::DstQualifier Qual = HvxIdioms::Undefined;
2420 LLVM_DEBUG(
dbgs() <<
" Destination : " << *Dst <<
" Qual(" << Qual
2426 LLVM_DEBUG(
dbgs() <<
"Could not locate vgather destination ptr\n");
2432 assert(DstType &&
"Cannot handle non vector dst type for llvm.gather");
2447 Type *
NT = HVC.getHvxTy(HVC.getIntTy(32),
false);
2448 if (Qual == HvxIdioms::LdSt || Qual == HvxIdioms::Arithmetic) {
2452 unsigned OutputSize = HVC.getSizeOf(DstType);
2456 <<
" Address space ("
2458 <<
" Result type : " << *DstType
2459 <<
"\n Size in bytes : " << OutputSize
2460 <<
" element type(" << *DstElemTy
2461 <<
")\n ElemWidth : " << ElemWidth <<
" bytes\n");
2464 assert(IndexType &&
"Cannot handle non vector index type for llvm.gather");
2465 unsigned IndexWidth = HVC.DL.
getTypeAllocSize(IndexType->getElementType());
2470 IndexLoad, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2479 if (ElemWidth == 1) {
2484 Value *CastIndexes =
2487 auto *UnpackedIndexes =
2489 V6_vunpack, CastIndexes,
nullptr);
2495 [[maybe_unused]]
Value *IndexHi =
2496 HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedIndexes);
2497 [[maybe_unused]]
Value *IndexLo =
2498 HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedIndexes);
2502 auto *QByteMask =
get_i32_Mask(HVC, Builder, Ctx, 0x00ff00ff);
2506 auto V6_vgather = HVC.HST.
getIntrinsicId(Hexagon::V6_vgathermhq);
2508 Type::getVoidTy(Ctx), V6_vgather,
2509 {Ptr, QByteMask, CastedPtr,
2515 HVC.getHvxTy(HVC.getIntTy(32),
false), Ptr,
"temp_result_hi");
2516 LLVM_DEBUG(
dbgs() <<
" LoadedResultHi : " << *LoadedResultHi <<
"\n");
2521 Type::getVoidTy(Ctx), V6_vgather,
2522 {Ptr, QByteMask, CastedPtr,
2527 HVC.getHvxTy(HVC.getIntTy(32),
false), Ptr,
"temp_result_lo");
2528 LLVM_DEBUG(
dbgs() <<
" LoadedResultLo : " << *LoadedResultLo <<
"\n");
2535 NT, V6_vpackeb, {LoadedResultHi, LoadedResultLo},
nullptr);
2537 [[maybe_unused]]
auto *StoreRes = Builder.
CreateStore(Res, Ptr);
2539 }
else if (ElemWidth == 2) {
2541 if (IndexWidth == 2) {
2549 Value *AdjustedIndex = HVC.createHvxIntrinsic(
2550 Builder, V6_vaslh, NT, {CastIndex, HVC.getConstInt(1)});
2552 <<
" Shifted half index: " << *AdjustedIndex <<
")\n");
2558 Type::getVoidTy(Ctx), V6_vgather,
2562 for (
auto &U : Dst->uses()) {
2564 dbgs() <<
" dst used by: " << *UI <<
"\n";
2566 for (
auto &U :
In.uses()) {
2568 dbgs() <<
" In used by : " << *UI <<
"\n";
2573 HVC.getHvxTy(HVC.getIntTy(16),
false), Ptr,
"temp_result");
2574 LLVM_DEBUG(
dbgs() <<
" LoadedResult : " << *LoadedResult <<
"\n");
2575 In.replaceAllUsesWith(LoadedResult);
2577 dbgs() <<
" Unhandled index type for vgather\n";
2580 }
else if (ElemWidth == 4) {
2581 if (IndexWidth == 4) {
2584 Value *AdjustedIndex = HVC.createHvxIntrinsic(
2585 Builder, V6_vaslh, NT, {Indexes, HVC.getConstInt(2)});
2587 <<
" Shifted word index: " << *AdjustedIndex <<
")\n");
2589 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermw_128B,
2612 Dst->eraseFromParent();
2613 }
else if (Qual == HvxIdioms::LLVM_Scatter) {
2617 assert(DstInpTy &&
"Cannot handle no vector type for llvm.scatter");
2618 unsigned DstInpSize = HVC.getSizeOf(DstInpTy);
2619 unsigned DstElements = HVC.length(DstInpTy);
2621 assert(DstElemTy &&
"llvm.scatter needs vector of ptr argument");
2622 dbgs() <<
" Gather feeds into scatter\n Values to scatter : "
2623 << *Dst->getOperand(0) <<
"\n";
2624 dbgs() <<
" Dst type(" << *DstInpTy <<
") elements(" << DstElements
2625 <<
") VecLen(" << DstInpSize <<
") type(" << *DstElemTy
2626 <<
") Access alignment(" << *Dst->getOperand(2) <<
")\n";
2640 Src, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2660 Value *AdjustedIndex = HVC.createHvxIntrinsic(
2661 Builder, V6_vaslh, NT, {CastIndex, HVC.getConstInt(1)});
2662 LLVM_DEBUG(
dbgs() <<
" Shifted half index: " << *AdjustedIndex <<
")\n");
2665 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
2669 }
else if (Qual == HvxIdioms::HEX_Gather_Scatter) {
2677 if (cstDataVector) {
2682 [[maybe_unused]]
auto *StoreIndexes =
2683 Builder.
CreateStore(cstDataVector, IndexesAlloca);
2684 LLVM_DEBUG(
dbgs() <<
" StoreIndexes : " << *StoreIndexes <<
"\n");
2685 Value *LoadedIndex =
2686 Builder.
CreateLoad(NT, IndexesAlloca,
"reload_index");
2688 LLVM_DEBUG(
dbgs() <<
" ResultAlloca : " << *ResultAlloca <<
"\n");
2691 IndexLoad, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2695 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
2696 {ResultAlloca, CastedSrc,
2700 HVC.getHvxTy(HVC.getIntTy(16),
false), ResultAlloca,
"temp_result");
2701 LLVM_DEBUG(
dbgs() <<
" LoadedResult : " << *LoadedResult <<
"\n");
2703 In.replaceAllUsesWith(LoadedResult);
2713 Src, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2726 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgather_vscattermh,
2732 }
else if (Qual == HvxIdioms::HEX_Scatter) {
2739 IndexLoad, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2746 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
2751 HVC.getHvxTy(HVC.getIntTy(16),
false), ResultAlloca,
"temp_result");
2752 LLVM_DEBUG(
dbgs() <<
" LoadedResult : " << *LoadedResult <<
"\n");
2753 In.replaceAllUsesWith(LoadedResult);
2754 }
else if (Qual == HvxIdioms::HEX_Gather) {
2759 if (cstDataVector) {
2763 [[maybe_unused]]
auto *StoreIndexes =
2764 Builder.
CreateStore(cstDataVector, IndexesAlloca);
2765 LLVM_DEBUG(
dbgs() <<
" StoreIndexes : " << *StoreIndexes <<
"\n");
2766 Value *LoadedIndex =
2767 Builder.
CreateLoad(NT, IndexesAlloca,
"reload_index");
2770 <<
"\n AddressSpace: "
2774 IndexLoad, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2778 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
2779 {ResultAlloca, CastedSrc,
2783 HVC.getHvxTy(HVC.getIntTy(16),
false), ResultAlloca,
"temp_result");
2784 LLVM_DEBUG(
dbgs() <<
" LoadedResult : " << *LoadedResult <<
"\n");
2786 In.replaceAllUsesWith(LoadedResult);
2789 }
else if (Qual == HvxIdioms::LLVM_Gather) {
2791 errs() <<
" Underimplemented vgather to vgather sequence\n";
2801std::optional<uint64_t> HvxIdioms::getPHIBaseMinAlignment(Instruction &In,
2802 PHINode *PN)
const {
2804 return std::nullopt;
2806 SmallVector<Value *, 16> Worklist;
2807 SmallPtrSet<Value *, 16> Visited;
2808 uint64_t minPHIAlignment = Value::MaximumAlignment;
2811 while (!Worklist.
empty()) {
2814 if (!Visited.
insert(V).second)
2826 if (KnownAlign.
value() < minPHIAlignment)
2827 minPHIAlignment = KnownAlign.
value();
2830 if (minPHIAlignment != Value::MaximumAlignment)
2831 return minPHIAlignment;
2832 return std::nullopt;
2836std::optional<uint64_t> HvxIdioms::getAlignment(Instruction &In,
2838 SmallPtrSet<Value *, 16> Visited;
2839 return getAlignmentImpl(In, ptr, Visited);
2842std::optional<uint64_t>
2843HvxIdioms::getAlignmentImpl(Instruction &In,
Value *ptr,
2844 SmallPtrSet<Value *, 16> &Visited)
const {
2847 if (!Visited.
insert(ptr).second)
2848 return std::nullopt;
2852 if (KnownAlign.
value() > 1) {
2854 return KnownAlign.
value();
2860 auto baseAlignmentOpt = getPHIBaseMinAlignment(In, PN);
2861 if (!baseAlignmentOpt)
2862 return std::nullopt;
2864 uint64_t minBaseAlignment = *baseAlignmentOpt;
2866 if (minBaseAlignment == 1)
2869 uint64_t minPHIAlignment = minBaseAlignment;
2871 <<
")nodes and min base aligned to (" << minBaseAlignment
2878 uint64_t MemberAlignment = Value::MaximumAlignment;
2880 MemberAlignment = *res;
2882 return std::nullopt;
2884 if (minPHIAlignment > MemberAlignment)
2885 minPHIAlignment = MemberAlignment;
2887 LLVM_DEBUG(
dbgs() <<
" total PHI alignment(" << minPHIAlignment <<
")\n");
2888 return minPHIAlignment;
2892 auto *GEPPtr =
GEP->getPointerOperand();
2895 if (&In == GEPPtr) {
2897 GEPPtr->getType()->getPointerAddressSpace()),
2899 if (
GEP->accumulateConstantOffset(HVC.DL,
Offset)) {
2901 <<
Offset.getZExtValue() <<
")\n");
2902 return Offset.getZExtValue();
2907 return std::nullopt;
2910Value *HvxIdioms::processMStore(Instruction &In)
const {
2911 [[maybe_unused]]
auto *InpTy =
2913 assert(InpTy &&
"Cannot handle no vector type for llvm.masked.store");
2916 << *
In.getParent() <<
"\n");
2918 << HVC.length(InpTy) <<
") VecLen(" << HVC.getSizeOf(InpTy)
2919 <<
") type(" << *InpTy->getElementType() <<
") of size("
2920 << InpTy->getScalarSizeInBits() <<
")bits\n");
2922 assert(CI &&
"Expected llvm.masked.store to be a call");
2923 Align HaveAlign = CI->getParamAlign(1).valueOrOne();
2931 Type *ValTy =
In.getOperand(0)->getType();
2935 if (EffA < HaveAlign)
2939 AttrBuilder AttrB(CI->getContext());
2940 AttrB.addAlignmentAttr(EffA);
2942 CI->getAttributes().addParamAttributes(CI->getContext(), 1, AttrB));
2946Value *HvxIdioms::processMLoad(Instruction &In)
const {
2948 assert(InpTy &&
"Cannot handle non vector type for llvm.masked.store");
2950 << *
In.getParent() <<
"\n");
2952 << HVC.length(InpTy) <<
") VecLen(" << HVC.getSizeOf(InpTy)
2953 <<
") type(" << *InpTy->getElementType() <<
") of size("
2954 << InpTy->getScalarSizeInBits() <<
")bits\n");
2956 assert(CI &&
"Expected to be a call to llvm.masked.load");
2958 Align HaveAlign = CI->getParamAlign(0).valueOrOne();
2966 Type *ValTy =
In.getType();
2969 if (EffA < HaveAlign)
2975 AttrBuilder AttrB(CI->getContext());
2976 AttrB.addAlignmentAttr(EffA);
2978 CI->getAttributes().addParamAttributes(CI->getContext(), 0, AttrB));
2982auto HvxIdioms::processFxpMulChopped(IRBuilderBase &Builder, Instruction &In,
2983 const FxpOp &
Op)
const ->
Value * {
2984 assert(
Op.X.Val->getType() ==
Op.Y.Val->getType());
2986 unsigned Width = InpTy->getScalarSizeInBits();
2989 if (!
Op.RoundAt || *
Op.RoundAt ==
Op.Frac - 1) {
2992 Value *QMul =
nullptr;
2994 QMul = createMulQ15(Builder,
Op.X,
Op.Y, Rounding);
2995 }
else if (Width == 32) {
2996 QMul = createMulQ31(Builder,
Op.X,
Op.Y, Rounding);
2998 if (QMul !=
nullptr)
3004 assert(Width < 32 || Width % 32 == 0);
3014 assert(
Op.Frac != 0 &&
"Unshifted mul should have been skipped");
3015 if (
Op.Frac == 16) {
3017 if (
Value *MulH = createMulH16(Builder,
Op.X,
Op.Y))
3021 Value *Prod32 = createMul16(Builder,
Op.X,
Op.Y);
3024 ConstantInt::get(Prod32->
getType(), 1ull << *
Op.RoundAt);
3025 Prod32 = Builder.
CreateAdd(Prod32, RoundVal,
"add");
3030 ? Builder.
CreateAShr(Prod32, ShiftAmt,
"asr")
3031 : Builder.
CreateLShr(Prod32, ShiftAmt,
"lsr");
3032 return Builder.
CreateTrunc(Shifted, InpTy,
"trn");
3039 auto WordX = HVC.splitVectorElements(Builder,
Op.X.Val, 32);
3040 auto WordY = HVC.splitVectorElements(Builder,
Op.Y.Val, 32);
3041 auto WordP = createMulLong(Builder, WordX,
Op.X.Sgn, WordY,
Op.Y.Sgn);
3046 if (
Op.RoundAt.has_value()) {
3049 RoundV[*
Op.RoundAt / 32] =
3050 ConstantInt::get(HvxWordTy, 1ull << (*
Op.RoundAt % 32));
3051 WordP = createAddLong(Builder, WordP, RoundV);
3057 unsigned SkipWords =
Op.Frac / 32;
3058 Constant *ShiftAmt = ConstantInt::get(HvxWordTy,
Op.Frac % 32);
3060 for (
int Dst = 0, End = WordP.size() - SkipWords; Dst != End; ++Dst) {
3061 int Src = Dst + SkipWords;
3063 if (Src + 1 < End) {
3074 WordP.resize(WordP.size() - SkipWords);
3076 return HVC.joinVectorElements(Builder, WordP,
Op.ResTy);
3079auto HvxIdioms::createMulQ15(IRBuilderBase &Builder, SValue
X, SValue
Y,
3080 bool Rounding)
const ->
Value * {
3081 assert(
X.Val->getType() ==
Y.Val->getType());
3082 assert(
X.Val->getType()->getScalarType() == HVC.getIntTy(16));
3089 auto V6_vmpyhvsrs = HVC.HST.
getIntrinsicId(Hexagon::V6_vmpyhvsrs);
3090 return HVC.createHvxIntrinsic(Builder, V6_vmpyhvsrs,
X.Val->getType(),
3094auto HvxIdioms::createMulQ31(IRBuilderBase &Builder, SValue
X, SValue
Y,
3095 bool Rounding)
const ->
Value * {
3096 Type *InpTy =
X.Val->getType();
3097 assert(InpTy ==
Y.Val->getType());
3109 HVC.createHvxIntrinsic(Builder, V6_vmpyewuh, InpTy, {
X.Val,
Y.Val});
3110 return HVC.createHvxIntrinsic(Builder, V6_vmpyo_acc, InpTy,
3111 {V1,
X.Val,
Y.Val});
3114auto HvxIdioms::createAddCarry(IRBuilderBase &Builder,
Value *
X,
Value *
Y,
3115 Value *CarryIn)
const
3116 -> std::pair<Value *, Value *> {
3117 assert(
X->getType() ==
Y->getType());
3126 if (CarryIn ==
nullptr)
3128 Args.push_back(CarryIn);
3130 Value *Ret = HVC.createHvxIntrinsic(Builder, AddCarry,
3134 return {
Result, CarryOut};
3141 if (CarryIn !=
nullptr) {
3142 unsigned Width = VecTy->getScalarSizeInBits();
3145 for (
unsigned i = 0, e = 32 / Width; i !=
e; ++i)
3146 Mask = (Mask << Width) | 1;
3150 HVC.createHvxIntrinsic(Builder, V6_vandqrt,
nullptr,
3151 {CarryIn, HVC.getConstInt(Mask)});
3152 Result1 = Builder.
CreateAdd(
X, ValueIn,
"add");
3158 return {Result2, Builder.
CreateOr(CarryOut1, CarryOut2,
"orb")};
3161auto HvxIdioms::createMul16(IRBuilderBase &Builder, SValue
X, SValue
Y)
const
3164 std::tie(
X,
Y) = canonSgn(
X,
Y);
3177 HVC.createHvxIntrinsic(Builder, V6_vmpyh, HvxP32Ty, {
Y.Val,
X.Val});
3179 return HVC.vshuff(Builder, HVC.sublo(Builder,
P), HVC.subhi(Builder,
P));
3182auto HvxIdioms::createMulH16(IRBuilderBase &Builder, SValue
X, SValue
Y)
const
3184 Type *HvxI16Ty = HVC.getHvxTy(HVC.getIntTy(16),
false);
3189 return HVC.createHvxIntrinsic(Builder, V6_vmpyuhvs, HvxI16Ty,
3194 Type *HvxP16Ty = HVC.getHvxTy(HVC.getIntTy(16),
true);
3197 unsigned Len = HVC.length(HvxP16Ty) / 2;
3199 SmallVector<int, 128> PickOdd(Len);
3200 for (
int i = 0; i !=
static_cast<int>(
Len); ++i)
3201 PickOdd[i] = 2 * i + 1;
3204 HVC.sublo(Builder, Pair16), HVC.subhi(Builder, Pair16), PickOdd,
"shf");
3207auto HvxIdioms::createMul32(IRBuilderBase &Builder, SValue
X, SValue
Y)
const
3208 -> std::pair<Value *, Value *> {
3209 assert(
X.Val->getType() ==
Y.Val->getType());
3210 assert(
X.Val->getType() == HvxI32Ty);
3213 std::tie(
X,
Y) = canonSgn(
X,
Y);
3216 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyss_parts;
3218 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyus_parts;
3220 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyuu_parts;
3223 Value *Parts = HVC.createHvxIntrinsic(Builder, V6_vmpy_parts,
nullptr,
3224 {
X.Val,
Y.Val}, {HvxI32Ty});
3233 assert(WordX.size() == WordY.size());
3234 unsigned Idx = 0,
Length = WordX.size();
3238 if (HVC.isZero(WordX[Idx]))
3239 Sum[Idx] = WordY[Idx];
3240 else if (HVC.isZero(WordY[Idx]))
3241 Sum[Idx] = WordX[Idx];
3247 Value *Carry =
nullptr;
3248 for (; Idx !=
Length; ++Idx) {
3249 std::tie(Sum[Idx], Carry) =
3250 createAddCarry(Builder, WordX[Idx], WordY[Idx], Carry);
3264 for (
int i = 0, e = WordX.size(); i != e; ++i) {
3265 for (
int j = 0, f = WordY.size(); j != f; ++j) {
3267 Signedness SX = (i + 1 ==
e) ? SgnX :
Unsigned;
3269 auto [
Lo,
Hi] = createMul32(Builder, {WordX[i], SX}, {WordY[
j],
SY});
3270 Products[i +
j + 0].push_back(
Lo);
3271 Products[i +
j + 1].push_back(
Hi);
3285 for (
int i = 0, e = Products.size(); i != e; ++i) {
3286 while (Products[i].
size() > 1) {
3287 Value *Carry =
nullptr;
3288 for (
int j = i;
j !=
e; ++
j) {
3289 auto &ProdJ = Products[
j];
3290 auto [Sum, CarryOut] = createAddCarry(Builder, pop_back_or_zero(ProdJ),
3291 pop_back_or_zero(ProdJ), Carry);
3292 ProdJ.insert(ProdJ.begin(), Sum);
3299 for (
auto &
P : Products) {
3300 assert(
P.size() == 1 &&
"Should have been added together");
3307auto HvxIdioms::run() ->
bool {
3310 for (BasicBlock &
B : HVC.F) {
3311 for (
auto It =
B.rbegin(); It !=
B.rend(); ++It) {
3312 if (
auto Fxm = matchFxpMul(*It)) {
3313 Value *
New = processFxpMul(*It, *Fxm);
3319 It->replaceAllUsesWith(New);
3321 It = StartOver ?
B.rbegin()
3324 }
else if (matchGather(*It)) {
3330 It->eraseFromParent();
3334 }
else if (matchScatter(*It)) {
3340 It->eraseFromParent();
3344 }
else if (matchMLoad(*It)) {
3350 }
else if (matchMStore(*It)) {
3365auto HexagonVectorCombine::run() ->
bool {
3367 dbgs() <<
"Module before HexagonVectorCombine\n" << *
F.getParent();
3370 if (HST.useHVXOps()) {
3372 Changed |= AlignVectors(*this).run();
3374 Changed |= HvxIdioms(*this).run();
3378 dbgs() <<
"Module " << (
Changed ?
"(modified)" :
"(unchanged)")
3379 <<
" after HexagonVectorCombine\n"
3385auto HexagonVectorCombine::getIntTy(
unsigned Width)
const -> IntegerType * {
3389auto HexagonVectorCombine::getByteTy(
int ElemCount)
const ->
Type * {
3391 IntegerType *ByteTy = Type::getInt8Ty(
F.getContext());
3394 return VectorType::get(ByteTy, ElemCount,
false);
3397auto HexagonVectorCombine::getBoolTy(
int ElemCount)
const ->
Type * {
3399 IntegerType *BoolTy = Type::getInt1Ty(
F.getContext());
3402 return VectorType::get(BoolTy, ElemCount,
false);
3405auto HexagonVectorCombine::getConstInt(
int Val,
unsigned Width)
const
3410auto HexagonVectorCombine::isZero(
const Value *Val)
const ->
bool {
3412 return C->isNullValue();
3416auto HexagonVectorCombine::getIntValue(
const Value *Val)
const
3417 -> std::optional<APInt> {
3419 return CI->getValue();
3420 return std::nullopt;
3423auto HexagonVectorCombine::isUndef(
const Value *Val)
const ->
bool {
3427auto HexagonVectorCombine::isTrue(
const Value *Val)
const ->
bool {
3431auto HexagonVectorCombine::isFalse(
const Value *Val)
const ->
bool {
3435auto HexagonVectorCombine::getHvxTy(
Type *ElemTy,
bool Pair)
const
3441 "Invalid HVX element type");
3442 unsigned HwLen = HST.getVectorLength();
3444 return VectorType::get(ElemTy, Pair ? 2 * NumElems : NumElems,
3448auto HexagonVectorCombine::getSizeOf(
const Value *Val, SizeKind Kind)
const
3450 return getSizeOf(Val->
getType(), Kind);
3453auto HexagonVectorCombine::getSizeOf(
const Type *Ty, SizeKind Kind)
const
3455 auto *NcTy =
const_cast<Type *
>(Ty);
3458 return DL.getTypeStoreSize(NcTy).getFixedValue();
3460 return DL.getTypeAllocSize(NcTy).getFixedValue();
3465auto HexagonVectorCombine::getTypeAlignment(
Type *Ty)
const ->
int {
3468 if (HST.isTypeForHVX(Ty))
3469 return HST.getVectorLength();
3470 return DL.getABITypeAlign(Ty).value();
3473auto HexagonVectorCombine::length(
Value *Val)
const ->
size_t {
3474 return length(Val->
getType());
3477auto HexagonVectorCombine::length(
Type *Ty)
const ->
size_t {
3479 assert(VecTy &&
"Must be a vector type");
3480 return VecTy->getElementCount().getFixedValue();
3483auto HexagonVectorCombine::simplify(
Value *V)
const ->
Value * {
3485 SimplifyQuery Q(
DL, &TLI, &DT, &AC, In);
3492auto HexagonVectorCombine::insertb(IRBuilderBase &Builder,
Value *Dst,
3494 int Where)
const ->
Value * {
3495 assert(isByteVecTy(Dst->getType()) && isByteVecTy(Src->getType()));
3496 int SrcLen = getSizeOf(Src);
3497 int DstLen = getSizeOf(Dst);
3503 Value *P2Src = vresize(Builder, Src, P2Len,
Poison);
3504 Value *P2Dst = vresize(Builder, Dst, P2Len,
Poison);
3507 for (
int i = 0; i != P2Len; ++i) {
3511 (Where <= i && i < Where +
Length) ? P2Len + Start + (i - Where) : i;
3515 return vresize(Builder, P2Insert, DstLen,
Poison);
3518auto HexagonVectorCombine::vlalignb(IRBuilderBase &Builder,
Value *
Lo,
3520 assert(
Lo->getType() ==
Hi->getType() &&
"Argument type mismatch");
3523 int VecLen = getSizeOf(
Hi);
3524 if (
auto IntAmt = getIntValue(Amt))
3525 return getElementRange(Builder,
Lo,
Hi, VecLen - IntAmt->getSExtValue(),
3528 if (HST.isTypeForHVX(
Hi->getType())) {
3529 assert(
static_cast<unsigned>(VecLen) == HST.getVectorLength() &&
3530 "Expecting an exact HVX type");
3531 return createHvxIntrinsic(Builder, HST.getIntrinsicId(Hexagon::V6_vlalignb),
3532 Hi->getType(), {Hi, Lo, Amt});
3540 Builder.
CreateTrunc(Shift, Type::getInt32Ty(
F.getContext()),
"trn");
3545 return vralignb(Builder,
Lo,
Hi,
Sub);
3550auto HexagonVectorCombine::vralignb(IRBuilderBase &Builder,
Value *
Lo,
3552 assert(
Lo->getType() ==
Hi->getType() &&
"Argument type mismatch");
3555 int VecLen = getSizeOf(
Lo);
3556 if (
auto IntAmt = getIntValue(Amt))
3557 return getElementRange(Builder,
Lo,
Hi, IntAmt->getSExtValue(), VecLen);
3559 if (HST.isTypeForHVX(
Lo->getType())) {
3560 assert(
static_cast<unsigned>(VecLen) == HST.getVectorLength() &&
3561 "Expecting an exact HVX type");
3562 return createHvxIntrinsic(Builder, HST.getIntrinsicId(Hexagon::V6_valignb),
3563 Lo->getType(), {Hi, Lo, Amt});
3570 Builder.
CreateTrunc(Shift, Type::getInt32Ty(
F.getContext()),
"trn");
3574 Type *Int64Ty = Type::getInt64Ty(
F.getContext());
3586auto HexagonVectorCombine::concat(IRBuilderBase &Builder,
3590 std::vector<Value *> Work[2];
3591 int ThisW = 0, OtherW = 1;
3593 Work[ThisW].assign(Vecs.begin(), Vecs.end());
3594 while (Work[ThisW].
size() > 1) {
3596 SMask.
resize(length(Ty) * 2);
3597 std::iota(SMask.
begin(), SMask.
end(), 0);
3599 Work[OtherW].clear();
3600 if (Work[ThisW].
size() % 2 != 0)
3602 for (
int i = 0, e = Work[ThisW].
size(); i <
e; i += 2) {
3604 Work[ThisW][i], Work[ThisW][i + 1], SMask,
"shf");
3605 Work[OtherW].push_back(Joined);
3613 SMask.
resize(Vecs.size() * length(Vecs.front()->getType()));
3614 std::iota(SMask.
begin(), SMask.
end(), 0);
3619auto HexagonVectorCombine::vresize(IRBuilderBase &Builder,
Value *Val,
3623 assert(ValTy->getElementType() == Pad->getType());
3625 int CurSize = length(ValTy);
3626 if (CurSize == NewSize)
3629 if (CurSize > NewSize)
3630 return getElementRange(Builder, Val, Val, 0, NewSize);
3632 SmallVector<int, 128> SMask(NewSize);
3633 std::iota(SMask.
begin(), SMask.
begin() + CurSize, 0);
3634 std::fill(SMask.
begin() + CurSize, SMask.
end(), CurSize);
3639auto HexagonVectorCombine::rescale(IRBuilderBase &Builder,
Value *Mask,
3646 Type *FromSTy = FromTy->getScalarType();
3647 Type *ToSTy = ToTy->getScalarType();
3648 if (FromSTy == ToSTy)
3651 int FromSize = getSizeOf(FromSTy);
3652 int ToSize = getSizeOf(ToSTy);
3653 assert(FromSize % ToSize == 0 || ToSize % FromSize == 0);
3656 int FromCount = length(MaskTy);
3657 int ToCount = (FromCount * FromSize) / ToSize;
3658 assert((FromCount * FromSize) % ToSize == 0);
3660 auto *FromITy =
getIntTy(FromSize * 8);
3661 auto *ToITy =
getIntTy(ToSize * 8);
3666 Mask, VectorType::get(FromITy, FromCount,
false),
"sxt");
3668 Ext, VectorType::get(ToITy, ToCount,
false),
"cst");
3670 Cast, VectorType::get(getBoolTy(), ToCount,
false),
"trn");
3674auto HexagonVectorCombine::vlsb(IRBuilderBase &Builder,
Value *Val)
const
3677 if (ScalarTy == getBoolTy())
3680 Value *Bytes = vbytes(Builder, Val);
3682 return Builder.
CreateTrunc(Bytes, getBoolTy(getSizeOf(VecTy)),
"trn");
3685 return Builder.
CreateTrunc(Bytes, getBoolTy(),
"trn");
3689auto HexagonVectorCombine::vbytes(IRBuilderBase &Builder,
Value *Val)
const
3692 if (ScalarTy == getByteTy())
3695 if (ScalarTy != getBoolTy())
3696 return Builder.
CreateBitCast(Val, getByteTy(getSizeOf(Val)),
"cst");
3699 return Builder.
CreateSExt(Val, VectorType::get(getByteTy(), VecTy),
"sxt");
3700 return Builder.
CreateSExt(Val, getByteTy(),
"sxt");
3703auto HexagonVectorCombine::subvector(IRBuilderBase &Builder,
Value *Val,
3704 unsigned Start,
unsigned Length)
const
3707 return getElementRange(Builder, Val, Val, Start,
Length);
3710auto HexagonVectorCombine::sublo(IRBuilderBase &Builder,
Value *Val)
const
3712 size_t Len = length(Val);
3713 assert(Len % 2 == 0 &&
"Length should be even");
3714 return subvector(Builder, Val, 0, Len / 2);
3717auto HexagonVectorCombine::subhi(IRBuilderBase &Builder,
Value *Val)
const
3719 size_t Len = length(Val);
3720 assert(Len % 2 == 0 &&
"Length should be even");
3721 return subvector(Builder, Val, Len / 2, Len / 2);
3724auto HexagonVectorCombine::vdeal(IRBuilderBase &Builder,
Value *Val0,
3726 assert(Val0->getType() == Val1->getType());
3727 int Len = length(Val0);
3728 SmallVector<int, 128>
Mask(2 * Len);
3730 for (
int i = 0; i !=
Len; ++i) {
3737auto HexagonVectorCombine::vshuff(IRBuilderBase &Builder,
Value *Val0,
3739 assert(Val0->getType() == Val1->getType());
3740 int Len = length(Val0);
3741 SmallVector<int, 128>
Mask(2 * Len);
3743 for (
int i = 0; i !=
Len; ++i) {
3744 Mask[2 * i + 0] = i;
3750auto HexagonVectorCombine::createHvxIntrinsic(IRBuilderBase &Builder,
3756 auto getCast = [&](IRBuilderBase &Builder,
Value *Val,
3758 Type *SrcTy = Val->getType();
3759 if (SrcTy == DestTy)
3764 assert(HST.isTypeForHVX(SrcTy,
true));
3766 Type *BoolTy = Type::getInt1Ty(
F.getContext());
3771 unsigned HwLen = HST.getVectorLength();
3772 Intrinsic::ID TC = HwLen == 64 ? Intrinsic::hexagon_V6_pred_typecast
3773 : Intrinsic::hexagon_V6_pred_typecast_128B;
3782 SmallVector<Value *, 4> IntrArgs;
3783 for (
int i = 0, e =
Args.size(); i != e; ++i) {
3785 Type *
T = IntrTy->getParamType(i);
3786 if (
A->getType() !=
T) {
3792 StringRef MaybeName = !IntrTy->getReturnType()->isVoidTy() ?
"cup" :
"";
3793 CallInst *
Call = Builder.
CreateCall(IntrFn, IntrArgs, MaybeName);
3800 if (RetTy ==
nullptr || CallTy == RetTy)
3803 assert(HST.isTypeForHVX(CallTy,
true));
3804 return getCast(Builder,
Call, RetTy);
3807auto HexagonVectorCombine::splitVectorElements(IRBuilderBase &Builder,
3809 unsigned ToWidth)
const
3824 assert(VecTy->getElementType()->isIntegerTy());
3825 unsigned FromWidth = VecTy->getScalarSizeInBits();
3827 assert(ToWidth <= FromWidth &&
"Breaking up into wider elements?");
3828 unsigned NumResults = FromWidth / ToWidth;
3832 unsigned Length = length(VecTy);
3836 auto splitInHalf = [&](
unsigned Begin,
unsigned End,
auto splitFunc) ->
void {
3840 if (Begin + 1 == End)
3846 auto *VTy = VectorType::get(
getIntTy(Width / 2), 2 *
Length,
false);
3849 Value *Res =
vdeal(Builder, sublo(Builder, VVal), subhi(Builder, VVal));
3851 unsigned Half = (Begin + End) / 2;
3852 Results[Begin] = sublo(Builder, Res);
3853 Results[Half] = subhi(Builder, Res);
3855 splitFunc(Begin, Half, splitFunc);
3856 splitFunc(Half, End, splitFunc);
3859 splitInHalf(0, NumResults, splitInHalf);
3863auto HexagonVectorCombine::joinVectorElements(IRBuilderBase &Builder,
3865 VectorType *ToType)
const
3867 assert(ToType->getElementType()->isIntegerTy());
3878 unsigned ToWidth = ToType->getScalarSizeInBits();
3879 unsigned Width = Inputs.front()->getType()->getScalarSizeInBits();
3880 assert(Width <= ToWidth);
3882 unsigned Length = length(Inputs.front()->getType());
3884 unsigned NeedInputs = ToWidth / Width;
3885 if (Inputs.size() != NeedInputs) {
3890 Last, ConstantInt::get(
Last->getType(), Width - 1),
"asr");
3891 Inputs.resize(NeedInputs, Sign);
3894 while (Inputs.size() > 1) {
3897 for (
int i = 0, e = Inputs.size(); i < e; i += 2) {
3898 Value *Res =
vshuff(Builder, Inputs[i], Inputs[i + 1]);
3901 Inputs.resize(Inputs.size() / 2);
3904 assert(Inputs.front()->getType() == ToType);
3905 return Inputs.front();
3908auto HexagonVectorCombine::calculatePointerDifference(
Value *Ptr0,
3910 -> std::optional<int> {
3912 const SCEV *Scev0 = SE.getSCEV(Ptr0);
3913 const SCEV *Scev1 = SE.getSCEV(Ptr1);
3914 const SCEV *ScevDiff = SE.getMinusSCEV(Scev0, Scev1);
3916 APInt
V =
Const->getAPInt();
3917 if (
V.isSignedIntN(8 *
sizeof(
int)))
3918 return static_cast<int>(
V.getSExtValue());
3925 I->eraseFromParent();
3927 SmallVector<Instruction *, 8> ToErase;
3930#define CallBuilder(B, F) \
3933 if (auto *I = dyn_cast<Instruction>(V)) \
3934 B_.ToErase.push_back(I); \
3938 auto Simplify = [
this](
Value *
V) {
3944 auto StripBitCast = [](
Value *
V) {
3946 V =
C->getOperand(0);
3950 Ptr0 = StripBitCast(Ptr0);
3951 Ptr1 = StripBitCast(Ptr1);
3953 return std::nullopt;
3957 if (Gep0->getPointerOperand() != Gep1->getPointerOperand())
3958 return std::nullopt;
3959 if (Gep0->getSourceElementType() != Gep1->getSourceElementType())
3960 return std::nullopt;
3962 Builder
B(Gep0->getParent());
3963 int Scale = getSizeOf(Gep0->getSourceElementType(),
Alloc);
3966 if (Gep0->getNumOperands() != 2 || Gep1->getNumOperands() != 2)
3967 return std::nullopt;
3969 Value *Idx0 = Gep0->getOperand(1);
3970 Value *Idx1 = Gep1->getOperand(1);
3975 return Diff->getSExtValue() * Scale;
3977 KnownBits Known0 = getKnownBits(Idx0, Gep0);
3978 KnownBits Known1 = getKnownBits(Idx1, Gep1);
3981 return std::nullopt;
3989 Diff0 =
C->getSExtValue();
3991 return std::nullopt;
4000 Diff1 =
C->getSExtValue();
4002 return std::nullopt;
4005 return (Diff0 + Diff1) * Scale;
4010auto HexagonVectorCombine::getNumSignificantBits(
const Value *V,
4011 const Instruction *CtxI)
const
4016auto HexagonVectorCombine::getKnownBits(
const Value *V,
4017 const Instruction *CtxI)
const
4022auto HexagonVectorCombine::isSafeToClone(
const Instruction &In)
const ->
bool {
4023 if (
In.mayHaveSideEffects() ||
In.isAtomic() ||
In.isVolatile() ||
4024 In.isFenceLike() ||
In.mayReadOrWriteMemory()) {
4032template <
typename T>
4033auto HexagonVectorCombine::isSafeToMoveBeforeInBB(
const Instruction &In,
4035 const T &IgnoreInsts)
const
4038 [
this](
const Instruction &
I) -> std::optional<MemoryLocation> {
4040 switch (
II->getIntrinsicID()) {
4041 case Intrinsic::masked_load:
4043 case Intrinsic::masked_store:
4059 bool MayWrite =
In.mayWriteToMemory();
4060 auto MaybeLoc = getLocOrNone(In);
4062 auto From =
In.getIterator();
4065 bool MoveUp = (To !=
Block.end() && To->comesBefore(&In));
4067 MoveUp ? std::make_pair(To, From) : std::make_pair(std::next(From), To);
4068 for (
auto It =
Range.first; It !=
Range.second; ++It) {
4069 const Instruction &I = *It;
4070 if (llvm::is_contained(IgnoreInsts, &I))
4073 if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
4074 if (II->getIntrinsicID() == Intrinsic::assume)
4081 if (!CB->hasFnAttr(Attribute::WillReturn))
4083 if (!CB->hasFnAttr(Attribute::NoSync))
4086 if (
I.mayReadOrWriteMemory()) {
4087 auto MaybeLocI = getLocOrNone(I);
4088 if (MayWrite || I.mayWriteToMemory()) {
4089 if (!MaybeLoc || !MaybeLocI)
4091 if (!AA.isNoAlias(*MaybeLoc, *MaybeLocI))
4099auto HexagonVectorCombine::isByteVecTy(
Type *Ty)
const ->
bool {
4101 return VecTy->getElementType() == getByteTy();
4105auto HexagonVectorCombine::getElementRange(IRBuilderBase &Builder,
Value *
Lo,
4109 SmallVector<int, 128> SMask(
Length);
4110 std::iota(SMask.
begin(), SMask.
end(), Start);
4117class HexagonVectorCombineLegacy :
public FunctionPass {
4121 HexagonVectorCombineLegacy() : FunctionPass(
ID) {}
4123 StringRef getPassName()
const override {
return "Hexagon Vector Combine"; }
4125 void getAnalysisUsage(AnalysisUsage &AU)
const override {
4133 AU.
addRequired<OptimizationRemarkEmitterWrapperPass>();
4134 FunctionPass::getAnalysisUsage(AU);
4138 if (skipFunction(
F))
4140 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
4141 AssumptionCache &AC =
4142 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
4143 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
4144 ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
4145 TargetLibraryInfo &TLI =
4146 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
F);
4147 auto &TM = getAnalysis<TargetPassConfig>().getTM<HexagonTargetMachine>();
4148 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
4149 HexagonVectorCombine HVC(
F, AA, AC, DT, SE, TLI, TM, ORE);
4155char HexagonVectorCombineLegacy::ID = 0;
4158 "Hexagon Vector Combine",
false,
false)
4170 return new HexagonVectorCombineLegacy();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
static IntegerType * getIntTy(IRBuilderBase &B, const TargetLibraryInfo *TLI)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
static Value * locateIndexesFromIntrinsic(Instruction *In)
Instruction * locateDestination(Instruction *In, HvxIdioms::DstQualifier &Qual)
Value * getReinterpretiveCast_i8_to_i32(const HexagonVectorCombine &HVC, IRBuilderBase &Builder, LLVMContext &Ctx, Value *I)
static Value * locateIndexesFromGEP(Value *In)
#define CallBuilder(B, F)
Value * getPointer(Value *Ptr)
#define DEFAULT_HVX_VTCM_PAGE_SIZE
static Value * locateAddressFromIntrinsic(Instruction *In)
static Instruction * selectDestination(Instruction *In, HvxIdioms::DstQualifier &Qual)
Value * get_i32_Mask(const HexagonVectorCombine &HVC, IRBuilderBase &Builder, LLVMContext &Ctx, unsigned int pattern)
bool isArithmetic(unsigned Opc)
static Type * getIndexType(Value *In)
GetElementPtrInst * locateGepFromIntrinsic(Instruction *In)
Value * getReinterpretiveCast_i16_to_i32(const HexagonVectorCombine &HVC, IRBuilderBase &Builder, LLVMContext &Ctx, Value *I)
static Align effectiveAlignForValueTy(const DataLayout &DL, Type *ValTy, int Requested)
iv Induction Variable Users
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static bool isCandidate(const MachineInstr *MI, Register &DefedReg, Register FrameReg)
static bool isUndef(const MachineInstr &MI)
This file implements a map that provides insertion order iteration.
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Remove Loads Into Fake Uses
static ConstantInt * getConstInt(MDNode *MD, unsigned NumOp)
This file defines the SmallVector class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
Target-Independent Code Generator Pass Configuration Options pass.
static uint32_t getAlignment(const MCSectionCOFF &Sec)
static const uint32_t IV[8]
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
unsigned getAddressSpace() const
Return the address space for the allocation.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Represent a constant reference to an array (0 or more elements consecutively in memory),...
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
InstListType::const_iterator const_iterator
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
AttributeList getAttributes() const
Return the attributes for this call.
@ ICMP_ULT
unsigned less than
static LLVM_ABI Constant * get(LLVMContext &Context, ArrayRef< uint8_t > Elts)
get() constructors - Return a constant with vector type with an element count and element type matchi...
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
LLVM_ABI Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
LLVM_ABI TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
iterator_range< iterator > children()
DomTreeNodeBase< NodeT > * getRootNode()
getRootNode - This returns the entry node for the CFG of the function.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
const BasicBlock & front() const
const BasicBlock & back() const
DISubprogram * getSubprogram() const
Get the attached subprogram.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Value * getPointerOperand()
bool isHVXVectorType(EVT VecTy, bool IncludeBool=false) const
bool useHVXV62Ops() const
bool useHVXV69Ops() const
unsigned getVectorLength() const
bool useHVXV66Ops() const
bool isTypeForHVX(Type *VecTy, bool IncludeBool=false) const
Intrinsic::ID getIntrinsicId(unsigned Opc) const
Common base class shared among various IRBuilders.
AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > OverloadTypes, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="", ArrayRef< OperandBundleDef > OpBundles={})
Create a call to intrinsic ID with Args, mangled using OverloadTypes.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
const char * getOpcodeName() const
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
This class implements a map that also provides access to all stored values in a deterministic order.
void remove_if(Predicate Pred)
Remove the elements that match the predicate.
bool doesNotAccessMemory() const
Whether this function accesses no memory.
bool onlyAccessesInaccessibleMem() const
Whether this function only (at most) accesses inaccessible memory.
static LLVM_ABI std::optional< MemoryLocation > getOrNone(const Instruction *Inst)
static LLVM_ABI MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
The main scalar evolution driver.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Target-Independent Code Generator Pass Configuration Options.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
const ParentTy * getParent() const
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Rounding
Possible values of current rounding mode, which is specified in bits 23:22 of FPCR.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
auto m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
friend class Instruction
Iterator for Instructions in a `BasicBlock.
LLVM_ABI Instruction * getTerminator() const
LLVM_ABI Instruction & front() const
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createHexagonVectorCombineLegacyPass()
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
MemoryEffectsBase< IRMemLocation > MemoryEffects
Summary of how a function affects memory in the program.
LLVM_ABI Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
DomTreeNodeBase< BasicBlock > DomTreeNode
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Get the upper bound on bit size for this Value Op as a signed integer.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
LLVM_ABI bool mayHaveNonDefUseDependency(const Instruction &I)
Returns true if the result or effects of the given instructions I depend values not reachable through...
MaskT vshuff(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
MaskT vdeal(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.