36#include "llvm/IR/IntrinsicsHexagon.h"
60#define DEBUG_TYPE "hexagon-vc"
65#define DEFAULT_HVX_VTCM_PAGE_SIZE 131072
80class HexagonVectorCombine {
85 :
F(F_),
DL(
F.getDataLayout()),
AA(AA_), AC(AC_), DT(DT_),
95 Type *getByteTy(
int ElemCount = 0)
const;
98 Type *getBoolTy(
int ElemCount = 0)
const;
102 std::optional<APInt> getIntValue(
const Value *Val)
const;
108 bool isTrue(
const Value *Val)
const;
110 bool isFalse(
const Value *Val)
const;
119 int getSizeOf(
const Value *Val, SizeKind Kind = Store)
const;
120 int getSizeOf(
const Type *Ty, SizeKind Kind = Store)
const;
121 int getTypeAlignment(
Type *Ty)
const;
122 size_t length(
Value *Val)
const;
123 size_t length(
Type *Ty)
const;
128 int Length,
int Where)
const;
152 unsigned ToWidth)
const;
156 std::optional<int> calculatePointerDifference(
Value *Ptr0,
Value *Ptr1)
const;
158 unsigned getNumSignificantBits(
const Value *V,
165 template <
typename T = std::vector<Instruction *>>
168 const T &IgnoreInsts = {})
const;
171 [[maybe_unused]]
bool isByteVecTy(
Type *Ty)
const;
184 int Start,
int Length)
const;
203 AlignVectors(
const HexagonVectorCombine &HVC_) : HVC(HVC_) {}
208 using InstList = std::vector<Instruction *>;
212 AddrInfo(
const AddrInfo &) =
default;
213 AddrInfo &operator=(
const AddrInfo &) =
default;
216 : Inst(
I), Addr(
A), ValTy(
T), HaveAlign(
H),
217 NeedAlign(HVC.getTypeAlignment(ValTy)) {}
228 using AddrList = std::vector<AddrInfo>;
232 return A->comesBefore(
B);
235 using DepList = std::set<Instruction *, InstrLess>;
238 MoveGroup(
const AddrInfo &AI,
Instruction *
B,
bool Hvx,
bool Load)
239 :
Base(
B), Main{AI.Inst}, Clones{}, IsHvx(Hvx), IsLoad(Load) {}
240 MoveGroup() =
default;
248 using MoveList = std::vector<MoveGroup>;
268 Segment(
Value *Val,
int Begin,
int Len)
269 : Val(Val), Start(Begin),
Size(Len) {}
270 Segment(
const Segment &Seg) =
default;
271 Segment &operator=(
const Segment &Seg) =
default;
278 Block(
Value *Val,
int Len,
int Pos) : Seg(Val, 0, Len), Pos(Pos) {}
280 : Seg(Val, Off, Len), Pos(Pos) {}
288 ByteSpan section(
int Start,
int Length)
const;
289 ByteSpan &shift(
int Offset);
292 int size()
const {
return Blocks.size(); }
293 Block &operator[](
int i) {
return Blocks[i]; }
294 const Block &operator[](
int i)
const {
return Blocks[i]; }
296 std::vector<Block> Blocks;
299 iterator begin() {
return Blocks.begin(); }
300 iterator end() {
return Blocks.end(); }
306 std::optional<AddrInfo> getAddrInfo(
Instruction &In)
const;
307 bool isHvx(
const AddrInfo &AI)
const;
309 [[maybe_unused]]
bool isSectorTy(
Type *Ty)
const;
317 const InstMap &CloneMap = InstMap())
const;
320 const InstMap &CloneMap = InstMap())
const;
344 bool createAddressGroups();
345 MoveList createLoadGroups(
const AddrList &Group)
const;
346 MoveList createStoreGroups(
const AddrList &Group)
const;
347 bool moveTogether(MoveGroup &Move)
const;
348 template <
typename T>
351 void realignLoadGroup(
IRBuilderBase &Builder,
const ByteSpan &VSpan,
352 int ScLen,
Value *AlignVal,
Value *AlignAddr)
const;
353 void realignStoreGroup(
IRBuilderBase &Builder,
const ByteSpan &VSpan,
354 int ScLen,
Value *AlignVal,
Value *AlignAddr)
const;
355 bool realignGroup(
const MoveGroup &Move);
357 int Alignment)
const;
360 AddrGroupMap AddrGroups;
377 const HexagonVectorCombine &HVC;
381 const AlignVectors::AddrGroupMap &AG) {
382 OS <<
"Printing AddrGroups:"
384 for (
auto &It : AG) {
385 OS <<
"\n\tInstruction: ";
387 OS <<
"\n\tAddrInfo: ";
388 for (
auto &AI : It.second)
395 const AlignVectors::AddrList &AL) {
396 OS <<
"\n *** Addr List: ***\n";
397 for (
auto &AG : AL) {
398 OS <<
"\n *** Addr Group: ***\n";
406 const AlignVectors::AddrInfo &AI) {
407 OS <<
"Inst: " << AI.Inst <<
" " << *AI.Inst <<
'\n';
408 OS <<
"Addr: " << *AI.Addr <<
'\n';
409 OS <<
"Type: " << *AI.ValTy <<
'\n';
410 OS <<
"HaveAlign: " << AI.HaveAlign.
value() <<
'\n';
411 OS <<
"NeedAlign: " << AI.NeedAlign.
value() <<
'\n';
412 OS <<
"Offset: " << AI.Offset;
417 const AlignVectors::MoveList &
ML) {
418 OS <<
"\n *** Move List: ***\n";
419 for (
auto &MG :
ML) {
420 OS <<
"\n *** Move Group: ***\n";
428 const AlignVectors::MoveGroup &MG) {
429 OS <<
"IsLoad:" << (MG.IsLoad ?
"yes" :
"no");
430 OS <<
", IsHvx:" << (MG.IsHvx ?
"yes" :
"no") <<
'\n';
433 OS <<
" " << *
I <<
'\n';
436 OS <<
" " << *
I <<
'\n';
438 for (
auto [K, V] : MG.Clones) {
440 K->printAsOperand(OS,
false);
441 OS <<
"\t-> " << *V <<
'\n';
448 OS <<
" @" <<
B.Pos <<
" [" <<
B.Seg.Start <<
',' <<
B.Seg.Size <<
"] ";
449 if (
B.Seg.Val ==
reinterpret_cast<const Value *
>(&
B)) {
450 OS <<
"(self:" <<
B.Seg.Val <<
')';
451 }
else if (
B.Seg.Val !=
nullptr) {
460 const AlignVectors::ByteSpan &BS) {
461 OS <<
"ByteSpan[size=" << BS.size() <<
", extent=" << BS.extent() <<
'\n';
462 for (
const AlignVectors::ByteSpan::Block &
B : BS)
482 HvxIdioms(
const HexagonVectorCombine &HVC_) : HVC(HVC_) {
483 auto *
Int32Ty = HVC.getIntTy(32);
484 HvxI32Ty = HVC.getHvxTy(
Int32Ty,
false);
485 HvxP32Ty = HVC.getHvxTy(
Int32Ty,
true);
506 std::optional<unsigned> RoundAt;
511 -> std::pair<unsigned, Signedness>;
512 auto canonSgn(SValue
X, SValue
Y)
const -> std::pair<SValue, SValue>;
514 auto matchFxpMul(
Instruction &In)
const -> std::optional<FxpOp>;
518 const FxpOp &
Op)
const ->
Value *;
520 bool Rounding)
const ->
Value *;
522 bool Rounding)
const ->
Value *;
525 Value *CarryIn =
nullptr)
const
526 -> std::pair<Value *, Value *>;
531 -> std::pair<Value *, Value *>;
543 std::optional<uint64_t>
546 std::optional<uint64_t> getPHIBaseMinAlignment(
Instruction &In,
557 const HexagonVectorCombine &HVC;
563 const HvxIdioms::FxpOp &
Op) {
564 static const char *SgnNames[] = {
"Positive",
"Signed",
"Unsigned"};
566 if (
Op.RoundAt.has_value()) {
567 if (
Op.Frac != 0 && *
Op.RoundAt ==
Op.Frac - 1) {
570 OS <<
" + 1<<" << *
Op.RoundAt;
573 OS <<
"\n X:(" << SgnNames[
Op.X.Sgn] <<
") " << *
Op.X.Val <<
"\n"
574 <<
" Y:(" << SgnNames[
Op.Y.Sgn] <<
") " << *
Op.Y.Val;
582template <
typename T>
T *getIfUnordered(
T *MaybeT) {
583 return MaybeT && MaybeT->isUnordered() ? MaybeT :
nullptr;
596template <
typename Pred,
typename T>
void erase_if(
T &&container, Pred p) {
634auto AlignVectors::ByteSpan::extent()
const ->
int {
637 int Min = Blocks[0].Pos;
638 int Max = Blocks[0].Pos + Blocks[0].Seg.Size;
639 for (
int i = 1, e =
size(); i !=
e; ++i) {
640 Min = std::min(Min, Blocks[i].Pos);
641 Max = std::max(Max, Blocks[i].Pos + Blocks[i].Seg.Size);
646auto AlignVectors::ByteSpan::section(
int Start,
int Length)
const -> ByteSpan {
648 for (
const ByteSpan::Block &
B : Blocks) {
649 int L = std::max(
B.Pos, Start);
650 int R = std::min(
B.Pos +
B.Seg.Size, Start +
Length);
653 int Off =
L >
B.Pos ?
L -
B.Pos : 0;
654 Section.Blocks.emplace_back(
B.Seg.Val,
B.Seg.Start + Off, R - L, L);
660auto AlignVectors::ByteSpan::shift(
int Offset) -> ByteSpan & {
667 SmallVector<Value *, 8> Values(Blocks.size());
668 for (
int i = 0, e = Blocks.size(); i != e; ++i)
669 Values[i] = Blocks[i].Seg.Val;
680 return Align(
DL.getABITypeAlign(ValTy).value());
683auto AlignVectors::getAddrInfo(Instruction &In)
const
684 -> std::optional<AddrInfo> {
686 return AddrInfo(HVC, L,
L->getPointerOperand(),
L->getType(),
689 return AddrInfo(HVC, S, S->getPointerOperand(),
690 S->getValueOperand()->getType(), S->getAlign());
694 case Intrinsic::masked_load:
695 return AddrInfo(HVC,
II,
II->getArgOperand(0),
II->getType(),
696 II->getParamAlign(0).valueOrOne());
697 case Intrinsic::masked_store:
698 return AddrInfo(HVC,
II,
II->getArgOperand(1),
699 II->getArgOperand(0)->getType(),
700 II->getParamAlign(1).valueOrOne());
706auto AlignVectors::isHvx(
const AddrInfo &AI)
const ->
bool {
710auto AlignVectors::getPayload(
Value *Val)
const ->
Value * {
714 ID =
II->getIntrinsicID();
716 return In->getOperand(0);
721auto AlignVectors::getMask(
Value *Val)
const ->
Value * {
723 switch (
II->getIntrinsicID()) {
724 case Intrinsic::masked_load:
725 return II->getArgOperand(1);
726 case Intrinsic::masked_store:
727 return II->getArgOperand(2);
731 Type *ValTy = getPayload(Val)->getType();
737auto AlignVectors::getPassThrough(
Value *Val)
const ->
Value * {
739 if (
II->getIntrinsicID() == Intrinsic::masked_load)
740 return II->getArgOperand(2);
745auto AlignVectors::createAdjustedPointer(IRBuilderBase &Builder,
Value *Ptr,
746 Type *ValTy,
int Adjust,
747 const InstMap &CloneMap)
const
750 if (Instruction *New = CloneMap.lookup(
I))
752 return Builder.CreatePtrAdd(Ptr, HVC.getConstInt(Adjust),
"gep");
755auto AlignVectors::createAlignedPointer(IRBuilderBase &Builder,
Value *Ptr,
756 Type *ValTy,
int Alignment,
757 const InstMap &CloneMap)
const
761 for (
auto [Old, New] : CloneMap)
762 I->replaceUsesOfWith(Old, New);
767 Value *AsInt = Builder.CreatePtrToInt(Ptr, HVC.getIntTy(),
"pti");
768 Value *
Mask = HVC.getConstInt(-Alignment);
769 Value *
And = Builder.CreateAnd(remap(AsInt), Mask,
"and");
770 return Builder.CreateIntToPtr(
774auto AlignVectors::createLoad(IRBuilderBase &Builder,
Type *ValTy,
Value *Ptr,
781 "Expectning scalar predicate");
782 if (HVC.isFalse(Predicate))
784 if (!HVC.isTrue(Predicate)) {
785 Value *
Load = createPredicatedLoad(Builder, ValTy, Ptr, Predicate,
786 Alignment, MDSources);
787 return Builder.CreateSelect(Mask, Load, PassThru);
791 assert(!HVC.isUndef(Mask));
792 if (HVC.isZero(Mask))
796 if (HVC.isTrue(Mask))
797 return createSimpleLoad(Builder, ValTy, Ptr, EffA.
value(), MDSources);
800 Builder.CreateMaskedLoad(ValTy, Ptr, EffA, Mask, PassThru,
"mld");
806auto AlignVectors::createSimpleLoad(IRBuilderBase &Builder,
Type *ValTy,
807 Value *Ptr,
int Alignment,
811 Instruction *
Load = Builder.CreateAlignedLoad(ValTy, Ptr, EffA,
"ald");
817auto AlignVectors::createPredicatedLoad(IRBuilderBase &Builder,
Type *ValTy,
823 "Predicates 'scalar' vector loads not yet supported");
825 assert(!
Predicate->getType()->isVectorTy() &&
"Expectning scalar predicate");
827 assert(HVC.getSizeOf(ValTy, HVC.Alloc) % EffA.
value() == 0);
829 if (HVC.isFalse(Predicate))
831 if (HVC.isTrue(Predicate))
832 return createSimpleLoad(Builder, ValTy, Ptr, EffA.
value(), MDSources);
834 auto V6_vL32b_pred_ai = HVC.HST.
getIntrinsicId(Hexagon::V6_vL32b_pred_ai);
836 return HVC.createHvxIntrinsic(Builder, V6_vL32b_pred_ai, ValTy,
837 {
Predicate, Ptr, HVC.getConstInt(0)}, {},
841auto AlignVectors::createStore(IRBuilderBase &Builder,
Value *Val,
Value *Ptr,
844 if (HVC.isZero(Mask) || HVC.isUndef(Val) || HVC.isUndef(Mask))
847 "Expectning scalar predicate"));
849 if (HVC.isFalse(Predicate))
851 if (HVC.isTrue(Predicate))
856 if (HVC.isTrue(Mask)) {
858 return createPredicatedStore(Builder, Val, Ptr, Predicate, Alignment,
862 return createSimpleStore(Builder, Val, Ptr, Alignment, MDSources);
868 Builder.CreateMaskedStore(Val, Ptr,
Align(Alignment), Mask);
875 Value *PredLoad = createPredicatedLoad(Builder, Val->getType(), Ptr,
876 Predicate, Alignment, MDSources);
877 Value *Mux = Builder.CreateSelect(Mask, Val, PredLoad);
878 return createPredicatedStore(Builder, Mux, Ptr, Predicate, Alignment,
882auto AlignVectors::createSimpleStore(IRBuilderBase &Builder,
Value *Val,
883 Value *Ptr,
int Alignment,
893auto AlignVectors::createPredicatedStore(IRBuilderBase &Builder,
Value *Val,
900 "Predicates 'scalar' vector stores not yet supported");
902 if (HVC.isFalse(Predicate))
904 if (HVC.isTrue(Predicate))
905 return createSimpleStore(Builder, Val, Ptr, EffA.
value(), MDSources);
907 assert(HVC.getSizeOf(Val, HVC.Alloc) % EffA.
value() == 0);
908 auto V6_vS32b_pred_ai = HVC.HST.
getIntrinsicId(Hexagon::V6_vS32b_pred_ai);
910 return HVC.createHvxIntrinsic(Builder, V6_vS32b_pred_ai,
nullptr,
911 {
Predicate, Ptr, HVC.getConstInt(0), Val}, {},
915auto AlignVectors::getUpwardDeps(Instruction *In, Instruction *
Base)
const
919 "Base and In should be in the same block");
920 assert(
Base->comesBefore(In) &&
"Base should come before In");
923 std::deque<Instruction *> WorkQ = {
In};
924 while (!WorkQ.empty()) {
931 if (
I->getParent() == Parent &&
Base->comesBefore(
I))
939auto AlignVectors::createAddressGroups() ->
bool {
944 auto findBaseAndOffset = [&](AddrInfo &AI) -> std::pair<Instruction *, int> {
945 for (AddrInfo &W : WorkStack) {
946 if (
auto D = HVC.calculatePointerDifference(AI.Addr,
W.Addr))
947 return std::make_pair(
W.Inst, *
D);
949 return std::make_pair(
nullptr, 0);
952 auto traverseBlock = [&](
DomTreeNode *DomN,
auto Visit) ->
void {
954 for (Instruction &
I :
Block) {
955 auto AI = this->getAddrInfo(
I);
958 auto F = findBaseAndOffset(*AI);
960 if (Instruction *BI =
F.first) {
961 AI->Offset =
F.second;
964 WorkStack.push_back(*AI);
965 GroupInst = AI->Inst;
967 AddrGroups[GroupInst].push_back(*AI);
973 while (!WorkStack.empty() && WorkStack.back().Inst->getParent() == &
Block)
974 WorkStack.pop_back();
977 traverseBlock(HVC.DT.
getRootNode(), traverseBlock);
978 assert(WorkStack.empty());
982 AddrGroups.
remove_if([](
auto &
G) {
return G.second.size() == 1; });
986 G.second, [&](
auto &
I) { return HVC.HST.isTypeForHVX(I.ValTy); });
990 return !AddrGroups.
empty();
993auto AlignVectors::createLoadGroups(
const AddrList &Group)
const -> MoveList {
1001 auto tryAddTo = [&](
const AddrInfo &
Info, MoveGroup &Move) {
1002 assert(!Move.Main.empty() &&
"Move group should have non-empty Main");
1006 if (Move.IsHvx != isHvx(
Info))
1010 if (
Base->getParent() !=
Info.Inst->getParent())
1013 if (!HVC.isSafeToMoveBeforeInBB(*
Info.Inst,
Base->getIterator()))
1017 return HVC.isSafeToMoveBeforeInBB(*
I,
Base->getIterator()) &&
1018 HVC.isSafeToClone(*
I);
1020 DepList Deps = getUpwardDeps(
Info.Inst,
Base);
1024 Move.Main.push_back(
Info.Inst);
1029 MoveList LoadGroups;
1031 for (
const AddrInfo &
Info : Group) {
1032 if (!
Info.Inst->mayReadFromMemory())
1034 if (LoadGroups.empty() || !tryAddTo(
Info, LoadGroups.back()))
1035 LoadGroups.emplace_back(
Info, Group.front().Inst, isHvx(
Info),
true);
1039 erase_if(LoadGroups, [](
const MoveGroup &
G) {
return G.Main.size() <= 1; });
1043 erase_if(LoadGroups, [](
const MoveGroup &
G) {
return G.IsHvx; });
1049auto AlignVectors::createStoreGroups(
const AddrList &Group)
const -> MoveList {
1057 auto tryAddTo = [&](
const AddrInfo &
Info, MoveGroup &Move) {
1058 assert(!Move.Main.empty() &&
"Move group should have non-empty Main");
1064 "Not handling stores with return values");
1066 if (Move.IsHvx != isHvx(
Info))
1072 if (
Base->getParent() !=
Info.Inst->getParent())
1074 if (!HVC.isSafeToMoveBeforeInBB(*
Info.Inst,
Base->getIterator(), Move.Main))
1076 Move.Main.push_back(
Info.Inst);
1080 MoveList StoreGroups;
1082 for (
auto I = Group.rbegin(),
E = Group.rend();
I !=
E; ++
I) {
1083 const AddrInfo &
Info = *
I;
1084 if (!
Info.Inst->mayWriteToMemory())
1086 if (StoreGroups.empty() || !tryAddTo(
Info, StoreGroups.back()))
1087 StoreGroups.emplace_back(
Info, Group.front().Inst, isHvx(
Info),
false);
1091 erase_if(StoreGroups, [](
const MoveGroup &
G) {
return G.Main.size() <= 1; });
1095 erase_if(StoreGroups, [](
const MoveGroup &
G) {
return G.IsHvx; });
1100 if (!VADoFullStores) {
1101 erase_if(StoreGroups, [
this](
const MoveGroup &
G) {
1103 auto MaybeInfo = this->getAddrInfo(*S);
1104 assert(MaybeInfo.has_value());
1105 return HVC.HST.isHVXVectorType(
1106 EVT::getEVT(MaybeInfo->ValTy, false));
1114auto AlignVectors::moveTogether(MoveGroup &Move)
const ->
bool {
1116 assert(!Move.Main.empty() &&
"Move group should have non-empty Main");
1122 Move.Clones = cloneBefore(Where->
getIterator(), Move.Deps);
1125 for (Instruction *M : Main) {
1127 M->moveAfter(Where);
1128 for (
auto [Old, New] : Move.Clones)
1129 M->replaceUsesOfWith(Old, New);
1133 for (
int i = 0, e = Move.Deps.size(); i != e; ++i)
1134 Move.Deps[i] = Move.Clones[Move.Deps[i]];
1139 assert(Move.Deps.empty());
1142 for (Instruction *M : Main.drop_front(1)) {
1148 return Move.Main.size() + Move.Deps.size() > 1;
1151template <
typename T>
1156 for (Instruction *
I : Insts) {
1157 assert(HVC.isSafeToClone(*
I));
1159 C->setName(Twine(
"c.") +
I->getName() +
".");
1160 C->insertBefore(To);
1162 for (
auto [Old, New] : Map)
1163 C->replaceUsesOfWith(Old, New);
1164 Map.insert(std::make_pair(
I,
C));
1169auto AlignVectors::realignLoadGroup(IRBuilderBase &Builder,
1170 const ByteSpan &VSpan,
int ScLen,
1175 Type *SecTy = HVC.getByteTy(ScLen);
1176 int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;
1177 bool DoAlign = !HVC.isZero(AlignVal);
1179 BasicBlock *BaseBlock = Builder.GetInsertBlock();
1209 for (
int Index = 0;
Index != NumSectors; ++
Index)
1210 ASpan.Blocks.emplace_back(
nullptr, ScLen, Index * ScLen);
1211 for (
int Index = 0;
Index != NumSectors; ++
Index) {
1212 ASpan.Blocks[
Index].Seg.Val =
1213 reinterpret_cast<Value *
>(&ASpan.Blocks[
Index]);
1219 DenseMap<void *, Instruction *> EarliestUser;
1225 assert(
A->getParent() ==
B->getParent());
1226 return A->comesBefore(
B);
1228 auto earliestUser = [&](
const auto &
Uses) {
1230 for (
const Use &U :
Uses) {
1232 assert(
I !=
nullptr &&
"Load used in a non-instruction?");
1236 if (
I->getParent() == BaseBlock) {
1238 User = std::min(User,
I, isEarlier);
1246 for (
const ByteSpan::Block &
B : VSpan) {
1247 ByteSpan ASection = ASpan.section(
B.Pos,
B.Seg.Size);
1248 for (
const ByteSpan::Block &S : ASection) {
1249 auto &EU = EarliestUser[S.Seg.Val];
1250 EU = std::min(EU, earliestUser(
B.Seg.Val->uses()), isEarlier);
1255 dbgs() <<
"ASpan:\n" << ASpan <<
'\n';
1256 dbgs() <<
"Earliest users of ASpan:\n";
1257 for (
auto &[Val, User] : EarliestUser) {
1258 dbgs() << Val <<
"\n ->" << *
User <<
'\n';
1262 auto createLoad = [&](IRBuilderBase &Builder,
const ByteSpan &VSpan,
1263 int Index,
bool MakePred) {
1265 createAdjustedPointer(Builder, AlignAddr, SecTy, Index * ScLen);
1267 MakePred ? makeTestIfUnaligned(Builder, AlignVal, ScLen) : nullptr;
1272 int Width = (1 + DoAlign) * ScLen;
1273 return this->createLoad(Builder, SecTy, Ptr, Predicate, ScLen, True, Undef,
1274 VSpan.section(Start, Width).values());
1279 assert(
In->getParent() == To->getParent());
1280 DepList Deps = getUpwardDeps(&*In, &*To);
1283 InstMap
Map = cloneBefore(In, Deps);
1284 for (
auto [Old, New] : Map)
1285 In->replaceUsesOfWith(Old, New);
1290 for (
int Index = 0;
Index != NumSectors + 1; ++
Index) {
1298 DoAlign &&
Index > 0 ? EarliestUser[&ASpan[
Index - 1]] :
nullptr;
1300 Index < NumSectors ? EarliestUser[&ASpan[
Index]] :
nullptr;
1301 if (
auto *Where = std::min(PrevAt, ThisAt, isEarlier)) {
1304 createLoad(Builder, VSpan, Index, DoAlign && Index == NumSectors);
1312 if (!HVC.isSafeToMoveBeforeInBB(*Load, BasePos))
1313 moveBefore(
Load->getIterator(), BasePos);
1315 LLVM_DEBUG(
dbgs() <<
"Loads[" << Index <<
"]:" << *Loads[Index] <<
'\n');
1321 for (
int Index = 0;
Index != NumSectors; ++
Index) {
1322 ASpan[
Index].Seg.Val =
nullptr;
1323 if (
auto *Where = EarliestUser[&ASpan[Index]]) {
1329 assert(NextLoad !=
nullptr);
1330 Val = HVC.vralignb(Builder, Val, NextLoad, AlignVal);
1332 ASpan[
Index].Seg.Val = Val;
1337 for (
const ByteSpan::Block &
B : VSpan) {
1338 ByteSpan ASection = ASpan.section(
B.Pos,
B.Seg.Size).shift(-
B.Pos);
1345 std::vector<ByteSpan::Block *> ABlocks;
1346 for (ByteSpan::Block &S : ASection) {
1347 if (S.Seg.Val !=
nullptr)
1348 ABlocks.push_back(&S);
1351 [&](
const ByteSpan::Block *
A,
const ByteSpan::Block *
B) {
1355 for (ByteSpan::Block *S : ABlocks) {
1360 Value *Pay = HVC.vbytes(Builder, getPayload(S->Seg.Val));
1362 HVC.insertb(Builder, Accum, Pay, S->Seg.Start, S->Seg.Size, S->Pos);
1370 Type *ValTy = getPayload(
B.Seg.Val)->getType();
1373 getPassThrough(
B.Seg.Val),
"sel");
1378auto AlignVectors::realignStoreGroup(IRBuilderBase &Builder,
1379 const ByteSpan &VSpan,
int ScLen,
1384 Type *SecTy = HVC.getByteTy(ScLen);
1385 int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;
1386 bool DoAlign = !HVC.isZero(AlignVal);
1389 ByteSpan ASpanV, ASpanM;
1393 auto MakeVec = [](IRBuilderBase &Builder,
Value *Val) ->
Value * {
1397 auto *VecTy = VectorType::get(Ty, 1,
false);
1403 for (
int Index = (DoAlign ? -1 : 0);
Index != NumSectors + DoAlign; ++
Index) {
1407 VSpan.section(Index * ScLen, ScLen).shift(-Index * ScLen);
1412 for (ByteSpan::Block &S : VSection) {
1413 Value *Pay = getPayload(S.Seg.Val);
1415 Pay->
getType(), HVC.getByteTy());
1416 Value *PartM = HVC.insertb(Builder, Zero, HVC.vbytes(Builder, Mask),
1417 S.Seg.Start, S.Seg.Size, S.Pos);
1418 AccumM = Builder.
CreateOr(AccumM, PartM);
1420 Value *PartV = HVC.insertb(Builder, Undef, HVC.vbytes(Builder, Pay),
1421 S.Seg.Start, S.Seg.Size, S.Pos);
1426 ASpanV.Blocks.emplace_back(AccumV, ScLen, Index * ScLen);
1427 ASpanM.Blocks.emplace_back(AccumM, ScLen, Index * ScLen);
1431 dbgs() <<
"ASpanV before vlalign:\n" << ASpanV <<
'\n';
1432 dbgs() <<
"ASpanM before vlalign:\n" << ASpanM <<
'\n';
1437 for (
int Index = 1;
Index != NumSectors + 2; ++
Index) {
1438 Value *PrevV = ASpanV[
Index - 1].Seg.Val, *ThisV = ASpanV[
Index].Seg.Val;
1439 Value *PrevM = ASpanM[
Index - 1].Seg.Val, *ThisM = ASpanM[
Index].Seg.Val;
1441 ASpanV[
Index - 1].Seg.Val = HVC.vlalignb(Builder, PrevV, ThisV, AlignVal);
1442 ASpanM[
Index - 1].Seg.Val = HVC.vlalignb(Builder, PrevM, ThisM, AlignVal);
1447 dbgs() <<
"ASpanV after vlalign:\n" << ASpanV <<
'\n';
1448 dbgs() <<
"ASpanM after vlalign:\n" << ASpanM <<
'\n';
1451 auto createStore = [&](IRBuilderBase &Builder,
const ByteSpan &ASpanV,
1452 const ByteSpan &ASpanM,
int Index,
bool MakePred) {
1455 if (HVC.isUndef(Val) || HVC.isZero(Mask))
1458 createAdjustedPointer(Builder, AlignAddr, SecTy, Index * ScLen);
1460 MakePred ? makeTestIfUnaligned(Builder, AlignVal, ScLen) : nullptr;
1465 int Width = (1 + DoAlign) * ScLen;
1466 this->createStore(Builder, Val, Ptr, Predicate, ScLen,
1467 HVC.vlsb(Builder, Mask),
1468 VSpan.section(Start, Width).values());
1471 for (
int Index = 0;
Index != NumSectors + DoAlign; ++
Index) {
1472 createStore(Builder, ASpanV, ASpanM, Index, DoAlign && Index == NumSectors);
1476auto AlignVectors::realignGroup(
const MoveGroup &Move) ->
bool {
1485 auto getMaxOf = [](
auto Range,
auto GetValue) {
1487 return GetValue(
A) < GetValue(
B);
1491 AddrList &BaseInfos = AddrGroups[Move.Base];
1506 std::set<Instruction *> TestSet(Move.Main.begin(), Move.Main.end());
1510 BaseInfos, std::back_inserter(MoveInfos),
1511 [&TestSet](
const AddrInfo &AI) {
return TestSet.count(AI.Inst); });
1514 const AddrInfo &WithMaxAlign =
1515 getMaxOf(MoveInfos, [](
const AddrInfo &AI) {
return AI.HaveAlign; });
1516 Align MaxGiven = WithMaxAlign.HaveAlign;
1519 const AddrInfo &WithMinOffset =
1520 getMaxOf(MoveInfos, [](
const AddrInfo &AI) {
return -AI.Offset; });
1522 const AddrInfo &WithMaxNeeded =
1523 getMaxOf(MoveInfos, [](
const AddrInfo &AI) {
return AI.NeedAlign; });
1524 Align MinNeeded = WithMaxNeeded.NeedAlign;
1537 InstSimplifyFolder(HVC.DL));
1538 Value *AlignAddr =
nullptr;
1539 Value *AlignVal =
nullptr;
1541 if (MinNeeded <= MaxGiven) {
1542 int Start = WithMinOffset.Offset;
1543 int OffAtMax = WithMaxAlign.Offset;
1550 int Adjust = -
alignTo(OffAtMax - Start, MinNeeded.value());
1551 AlignAddr = createAdjustedPointer(Builder, WithMaxAlign.Addr,
1552 WithMaxAlign.ValTy, Adjust, Move.Clones);
1553 int Diff =
Start - (OffAtMax + Adjust);
1554 AlignVal = HVC.getConstInt(Diff);
1556 assert(
static_cast<decltype(MinNeeded.value())
>(Diff) < MinNeeded.value());
1566 createAlignedPointer(Builder, WithMinOffset.Addr, WithMinOffset.ValTy,
1567 MinNeeded.value(), Move.Clones);
1569 Builder.
CreatePtrToInt(WithMinOffset.Addr, HVC.getIntTy(),
"pti");
1571 for (
auto [Old, New] : Move.Clones)
1572 I->replaceUsesOfWith(Old, New);
1577 for (
const AddrInfo &AI : MoveInfos) {
1578 VSpan.Blocks.emplace_back(AI.Inst, HVC.getSizeOf(AI.ValTy),
1579 AI.Offset - WithMinOffset.Offset);
1586 : std::max<int>(MinNeeded.value(), 4);
1587 assert(!Move.IsHvx || ScLen == 64 || ScLen == 128);
1588 assert(Move.IsHvx || ScLen == 4 || ScLen == 8);
1591 dbgs() <<
"ScLen: " << ScLen <<
"\n";
1592 dbgs() <<
"AlignVal:" << *AlignVal <<
"\n";
1593 dbgs() <<
"AlignAddr:" << *AlignAddr <<
"\n";
1594 dbgs() <<
"VSpan:\n" << VSpan <<
'\n';
1598 realignLoadGroup(Builder, VSpan, ScLen, AlignVal, AlignAddr);
1600 realignStoreGroup(Builder, VSpan, ScLen, AlignVal, AlignAddr);
1602 for (
auto *Inst : Move.Main)
1603 Inst->eraseFromParent();
1608auto AlignVectors::makeTestIfUnaligned(IRBuilderBase &Builder,
Value *AlignVal,
1609 int Alignment)
const ->
Value * {
1610 auto *AlignTy = AlignVal->getType();
1612 AlignVal, ConstantInt::get(AlignTy, Alignment - 1),
"and");
1613 Value *
Zero = ConstantInt::get(AlignTy, 0);
1617auto AlignVectors::isSectorTy(
Type *Ty)
const ->
bool {
1618 if (!HVC.isByteVecTy(Ty))
1620 int Size = HVC.getSizeOf(Ty);
1626auto AlignVectors::run() ->
bool {
1629 if (!createAddressGroups())
1633 dbgs() <<
"Address groups(" << AddrGroups.
size() <<
"):\n";
1634 for (
auto &[In, AL] : AddrGroups) {
1635 for (
const AddrInfo &AI : AL)
1636 dbgs() <<
"---\n" << AI <<
'\n';
1641 MoveList LoadGroups, StoreGroups;
1643 for (
auto &
G : AddrGroups) {
1649 dbgs() <<
"\nLoad groups(" << LoadGroups.size() <<
"):\n";
1650 for (
const MoveGroup &
G : LoadGroups)
1651 dbgs() <<
G <<
"\n";
1652 dbgs() <<
"Store groups(" << StoreGroups.size() <<
"):\n";
1653 for (
const MoveGroup &
G : StoreGroups)
1654 dbgs() <<
G <<
"\n";
1658 unsigned CountLimit = VAGroupCountLimit;
1659 if (CountLimit == 0)
1662 if (LoadGroups.size() > CountLimit) {
1663 LoadGroups.resize(CountLimit);
1664 StoreGroups.clear();
1666 unsigned StoreLimit = CountLimit - LoadGroups.size();
1667 if (StoreGroups.size() > StoreLimit)
1668 StoreGroups.resize(StoreLimit);
1671 for (
auto &M : LoadGroups)
1673 for (
auto &M : StoreGroups)
1678 for (
auto &M : LoadGroups)
1680 for (
auto &M : StoreGroups)
1690auto HvxIdioms::getNumSignificantBits(
Value *V, Instruction *In)
const
1691 -> std::pair<unsigned, Signedness> {
1692 unsigned Bits = HVC.getNumSignificantBits(V, In);
1698 KnownBits Known = HVC.getKnownBits(V, In);
1699 Signedness Sign =
Signed;
1700 unsigned NumToTest = 0;
1704 NumToTest =
Bits - 1;
1717 return {
Bits, Sign};
1720auto HvxIdioms::canonSgn(SValue
X, SValue
Y)
const
1721 -> std::pair<SValue, SValue> {
1734auto HvxIdioms::matchFxpMul(Instruction &In)
const -> std::optional<FxpOp> {
1735 using namespace PatternMatch;
1736 auto *Ty =
In.getType();
1739 return std::nullopt;
1748 auto m_Shr = [](
auto &&
V,
auto &&S) {
1760 if (
Op.Frac > Width)
1761 return std::nullopt;
1768 return std::nullopt;
1776 Op.Opcode = Instruction::Mul;
1778 Op.X.Sgn = getNumSignificantBits(
Op.X.Val, &In).second;
1779 Op.Y.Sgn = getNumSignificantBits(
Op.Y.Val, &In).second;
1784 return std::nullopt;
1787auto HvxIdioms::processFxpMul(Instruction &In,
const FxpOp &
Op)
const
1789 assert(
Op.X.Val->getType() ==
Op.Y.Val->getType());
1792 if (VecTy ==
nullptr)
1795 unsigned ElemWidth = ElemTy->getBitWidth();
1798 if ((HVC.length(VecTy) * ElemWidth) % (8 * HVC.HST.
getVectorLength()) != 0)
1808 if (ElemWidth <= 32 &&
Op.Frac == 0)
1811 auto [BitsX, SignX] = getNumSignificantBits(
Op.X.Val, &In);
1812 auto [BitsY, SignY] = getNumSignificantBits(
Op.Y.Val, &In);
1818 InstSimplifyFolder(HVC.DL));
1820 auto roundUpWidth = [](
unsigned Width) ->
unsigned {
1826 if (Width > 32 && Width % 32 != 0) {
1833 BitsX = roundUpWidth(BitsX);
1834 BitsY = roundUpWidth(BitsY);
1839 unsigned Width = std::max(BitsX, BitsY);
1841 auto *ResizeTy = VectorType::get(HVC.getIntTy(Width), VecTy);
1842 if (Width < ElemWidth) {
1845 }
else if (Width > ElemWidth) {
1852 assert(
X->getType() ==
Y->getType() &&
X->getType() == ResizeTy);
1854 unsigned VecLen = HVC.length(ResizeTy);
1855 unsigned ChopLen = (8 * HVC.HST.
getVectorLength()) / std::min(Width, 32u);
1859 ChopOp.ResTy = VectorType::get(
Op.ResTy->getElementType(), ChopLen,
false);
1861 for (
unsigned V = 0;
V != VecLen / ChopLen; ++
V) {
1862 ChopOp.X.Val = HVC.subvector(Builder,
X, V * ChopLen, ChopLen);
1863 ChopOp.Y.Val = HVC.subvector(Builder,
Y, V * ChopLen, ChopLen);
1864 Results.push_back(processFxpMulChopped(Builder, In, ChopOp));
1879inline bool HvxIdioms::matchScatter(Instruction &In)
const {
1883 return (
II->getIntrinsicID() == Intrinsic::masked_scatter);
1886inline bool HvxIdioms::matchGather(Instruction &In)
const {
1890 return (
II->getIntrinsicID() == Intrinsic::masked_gather);
1893inline bool HvxIdioms::matchMLoad(Instruction &In)
const {
1897 return (
II->getIntrinsicID() == Intrinsic::masked_load);
1900inline bool HvxIdioms::matchMStore(Instruction &In)
const {
1904 return (
II->getIntrinsicID() == Intrinsic::masked_store);
1912 case Instruction::Add:
1913 case Instruction::Sub:
1914 case Instruction::Mul:
1915 case Instruction::And:
1916 case Instruction::Or:
1917 case Instruction::Xor:
1918 case Instruction::AShr:
1919 case Instruction::LShr:
1920 case Instruction::Shl:
1921 case Instruction::UDiv:
1929 assert(Ptr &&
"Unable to extract pointer");
1935 if (
II->getIntrinsicID() == Intrinsic::masked_store)
1936 return II->getOperand(1);
1942 HvxIdioms::DstQualifier &Qual) {
1948 Qual = HvxIdioms::LdSt;
1950 if (
II->getIntrinsicID() == Intrinsic::masked_gather) {
1952 Qual = HvxIdioms::LLVM_Gather;
1953 }
else if (
II->getIntrinsicID() == Intrinsic::masked_scatter) {
1955 Qual = HvxIdioms::LLVM_Scatter;
1956 }
else if (
II->getIntrinsicID() == Intrinsic::masked_store) {
1958 Qual = HvxIdioms::LdSt;
1959 }
else if (
II->getIntrinsicID() ==
1960 Intrinsic::hexagon_V6_vgather_vscattermh) {
1962 Qual = HvxIdioms::HEX_Gather_Scatter;
1963 }
else if (
II->getIntrinsicID() == Intrinsic::hexagon_V6_vscattermh_128B) {
1965 Qual = HvxIdioms::HEX_Scatter;
1966 }
else if (
II->getIntrinsicID() == Intrinsic::hexagon_V6_vgathermh_128B) {
1968 Qual = HvxIdioms::HEX_Gather;
1976 Qual = HvxIdioms::Call;
1981 Qual = HvxIdioms::Arithmetic;
2001 for (
auto &U : In->uses()) {
2005 Users.push_back(Destination);
2017 assert(In &&
"Bad instruction");
2021 "Not a gather Intrinsic");
2050 if (
II &&
II->getIntrinsicID() == Intrinsic::masked_gather)
2060 auto *Src = IE->getOperand(1);
2075 LLVM_DEBUG(
dbgs() <<
" Unable to locate Address from intrinsic\n");
2087 if (
II->getIntrinsicID() == Intrinsic::masked_load)
2088 return II->getType();
2089 if (
II->getIntrinsicID() == Intrinsic::masked_store)
2090 return II->getOperand(0)->getType();
2092 return In->getType();
2101 if (
II->getIntrinsicID() == Intrinsic::masked_load)
2103 if (
II->getIntrinsicID() == Intrinsic::masked_gather)
2115 return cstDataVector;
2117 return GEPIndex->getOperand(0);
2133 LLVM_DEBUG(
dbgs() <<
" Unable to locate Index from intrinsic\n");
2143 assert(
I &&
"Unable to reinterprete cast");
2144 Type *NT = HVC.getHvxTy(HVC.getIntTy(32),
false);
2145 std::vector<unsigned> shuffleMask;
2146 for (
unsigned i = 0; i < 64; ++i)
2147 shuffleMask.push_back(i);
2149 Value *CastShuffle =
2150 Builder.CreateShuffleVector(
I,
I, Mask,
"identity_shuffle");
2151 return Builder.CreateBitCast(CastShuffle, NT,
"cst64_i16_to_32_i32");
2158 assert(
I &&
"Unable to reinterprete cast");
2159 Type *NT = HVC.getHvxTy(HVC.getIntTy(32),
false);
2160 std::vector<unsigned> shuffleMask;
2161 for (
unsigned i = 0; i < 128; ++i)
2162 shuffleMask.push_back(i);
2164 Value *CastShuffle =
2165 Builder.CreateShuffleVector(
I,
I, Mask,
"identity_shuffle");
2166 return Builder.CreateBitCast(CastShuffle, NT,
"cst128_i8_to_32_i32");
2172 unsigned int pattern) {
2173 std::vector<unsigned int> byteMask;
2174 for (
unsigned i = 0; i < 32; ++i)
2175 byteMask.push_back(pattern);
2177 return Builder.CreateIntrinsic(
2179 {llvm::ConstantDataVector::get(Ctx, byteMask), HVC.getConstInt(~0)},
2183Value *HvxIdioms::processVScatter(Instruction &In)
const {
2185 assert(InpTy &&
"Cannot handle no vector type for llvm.scatter/gather");
2186 unsigned InpSize = HVC.getSizeOf(InpTy);
2187 auto *
F =
In.getFunction();
2188 LLVMContext &Ctx =
F->getContext();
2190 assert(ElemTy &&
"llvm.scatter needs integer type argument");
2193 unsigned Elements = HVC.length(InpTy);
2194 dbgs() <<
"\n[Process scatter](" <<
In <<
")\n" << *
In.getParent() <<
"\n";
2195 dbgs() <<
" Input type(" << *InpTy <<
") elements(" <<
Elements
2196 <<
") VecLen(" << InpSize <<
") type(" << *ElemTy <<
") ElemWidth("
2197 << ElemWidth <<
")\n";
2201 InstSimplifyFolder(HVC.DL));
2203 auto *ValueToScatter =
In.getOperand(0);
2204 LLVM_DEBUG(
dbgs() <<
" ValueToScatter : " << *ValueToScatter <<
"\n");
2208 <<
") for vscatter\n");
2233 Value *CastIndex =
nullptr;
2234 if (cstDataVector) {
2236 AllocaInst *IndexesAlloca =
2237 Builder.
CreateAlloca(HVC.getHvxTy(HVC.getIntTy(32),
false));
2238 [[maybe_unused]]
auto *StoreIndexes =
2239 Builder.
CreateStore(cstDataVector, IndexesAlloca);
2240 LLVM_DEBUG(
dbgs() <<
" StoreIndexes : " << *StoreIndexes <<
"\n");
2242 IndexesAlloca,
"reload_index");
2247 CastIndex = Indexes;
2251 if (ElemWidth == 1) {
2254 Type *
NT = HVC.getHvxTy(HVC.getIntTy(32),
false);
2260 HVC.getHvxTy(HVC.getIntTy(32),
true), V6_vunpack, CastIndexes,
nullptr);
2261 LLVM_DEBUG(
dbgs() <<
" UnpackedIndexes : " << *UnpackedIndexes <<
")\n");
2265 [[maybe_unused]]
Value *IndexHi =
2266 HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedIndexes);
2267 [[maybe_unused]]
Value *IndexLo =
2268 HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedIndexes);
2276 HVC.getHvxTy(HVC.getIntTy(32),
true), V6_vunpack, CastSrc,
nullptr);
2277 LLVM_DEBUG(
dbgs() <<
" UnpackedValToScat: " << *UnpackedValueToScatter
2280 [[maybe_unused]]
Value *UVSHi =
2281 HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedValueToScatter);
2282 [[maybe_unused]]
Value *UVSLo =
2283 HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedValueToScatter);
2288 auto *QByteMask =
get_i32_Mask(HVC, Builder, Ctx, 0x00ff00ff);
2291 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermhq_128B,
2297 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermhq_128B,
2301 }
else if (ElemWidth == 2) {
2306 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermh_128B,
2310 }
else if (ElemWidth == 4) {
2312 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermw_128B,
2322Value *HvxIdioms::processVGather(Instruction &In)
const {
2323 [[maybe_unused]]
auto *InpTy =
2325 assert(InpTy &&
"Cannot handle no vector type for llvm.gather");
2326 [[maybe_unused]]
auto *ElemTy =
2328 assert(ElemTy &&
"llvm.gather needs vector of ptr argument");
2329 auto *
F =
In.getFunction();
2330 LLVMContext &Ctx =
F->getContext();
2332 << *
In.getParent() <<
"\n");
2334 << HVC.length(InpTy) <<
") VecLen(" << HVC.getSizeOf(InpTy)
2335 <<
") type(" << *ElemTy <<
") Access alignment("
2336 << *
In.getOperand(1) <<
") AddressSpace("
2337 << ElemTy->getAddressSpace() <<
")\n");
2341 "llvm.gather needs vector for mask");
2343 InstSimplifyFolder(HVC.DL));
2348 HvxIdioms::DstQualifier Qual = HvxIdioms::Undefined;
2354 LLVM_DEBUG(
dbgs() <<
" Destination : " << *Dst <<
" Qual(" << Qual
2360 LLVM_DEBUG(
dbgs() <<
"Could not locate vgather destination ptr\n");
2366 assert(DstType &&
"Cannot handle non vector dst type for llvm.gather");
2381 Type *
NT = HVC.getHvxTy(HVC.getIntTy(32),
false);
2382 if (Qual == HvxIdioms::LdSt || Qual == HvxIdioms::Arithmetic) {
2386 unsigned OutputSize = HVC.getSizeOf(DstType);
2390 <<
" Address space ("
2392 <<
" Result type : " << *DstType
2393 <<
"\n Size in bytes : " << OutputSize
2394 <<
" element type(" << *DstElemTy
2395 <<
")\n ElemWidth : " << ElemWidth <<
" bytes\n");
2398 assert(IndexType &&
"Cannot handle non vector index type for llvm.gather");
2399 unsigned IndexWidth = HVC.DL.
getTypeAllocSize(IndexType->getElementType());
2404 IndexLoad, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2413 if (ElemWidth == 1) {
2418 Value *CastIndexes =
2421 auto *UnpackedIndexes =
2423 V6_vunpack, CastIndexes,
nullptr);
2429 [[maybe_unused]]
Value *IndexHi =
2430 HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedIndexes);
2431 [[maybe_unused]]
Value *IndexLo =
2432 HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedIndexes);
2436 auto *QByteMask =
get_i32_Mask(HVC, Builder, Ctx, 0x00ff00ff);
2440 auto V6_vgather = HVC.HST.
getIntrinsicId(Hexagon::V6_vgathermhq);
2442 Type::getVoidTy(Ctx), V6_vgather,
2443 {Ptr, QByteMask, CastedPtr,
2449 HVC.getHvxTy(HVC.getIntTy(32),
false), Ptr,
"temp_result_hi");
2450 LLVM_DEBUG(
dbgs() <<
" LoadedResultHi : " << *LoadedResultHi <<
"\n");
2455 Type::getVoidTy(Ctx), V6_vgather,
2456 {Ptr, QByteMask, CastedPtr,
2461 HVC.getHvxTy(HVC.getIntTy(32),
false), Ptr,
"temp_result_lo");
2462 LLVM_DEBUG(
dbgs() <<
" LoadedResultLo : " << *LoadedResultLo <<
"\n");
2469 NT, V6_vpackeb, {LoadedResultHi, LoadedResultLo},
nullptr);
2471 [[maybe_unused]]
auto *StoreRes = Builder.
CreateStore(Res, Ptr);
2473 }
else if (ElemWidth == 2) {
2475 if (IndexWidth == 2) {
2483 Value *AdjustedIndex = HVC.createHvxIntrinsic(
2484 Builder, V6_vaslh, NT, {CastIndex, HVC.getConstInt(1)});
2486 <<
" Shifted half index: " << *AdjustedIndex <<
")\n");
2492 Type::getVoidTy(Ctx), V6_vgather,
2496 for (
auto &U : Dst->uses()) {
2498 dbgs() <<
" dst used by: " << *UI <<
"\n";
2500 for (
auto &U :
In.uses()) {
2502 dbgs() <<
" In used by : " << *UI <<
"\n";
2507 HVC.getHvxTy(HVC.getIntTy(16),
false), Ptr,
"temp_result");
2508 LLVM_DEBUG(
dbgs() <<
" LoadedResult : " << *LoadedResult <<
"\n");
2509 In.replaceAllUsesWith(LoadedResult);
2511 dbgs() <<
" Unhandled index type for vgather\n";
2514 }
else if (ElemWidth == 4) {
2515 if (IndexWidth == 4) {
2518 Value *AdjustedIndex = HVC.createHvxIntrinsic(
2519 Builder, V6_vaslh, NT, {Indexes, HVC.getConstInt(2)});
2521 <<
" Shifted word index: " << *AdjustedIndex <<
")\n");
2523 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermw_128B,
2546 Dst->eraseFromParent();
2547 }
else if (Qual == HvxIdioms::LLVM_Scatter) {
2551 assert(DstInpTy &&
"Cannot handle no vector type for llvm.scatter");
2552 unsigned DstInpSize = HVC.getSizeOf(DstInpTy);
2553 unsigned DstElements = HVC.length(DstInpTy);
2555 assert(DstElemTy &&
"llvm.scatter needs vector of ptr argument");
2556 dbgs() <<
" Gather feeds into scatter\n Values to scatter : "
2557 << *Dst->getOperand(0) <<
"\n";
2558 dbgs() <<
" Dst type(" << *DstInpTy <<
") elements(" << DstElements
2559 <<
") VecLen(" << DstInpSize <<
") type(" << *DstElemTy
2560 <<
") Access alignment(" << *Dst->getOperand(2) <<
")\n";
2574 Src, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2594 Value *AdjustedIndex = HVC.createHvxIntrinsic(
2595 Builder, V6_vaslh, NT, {CastIndex, HVC.getConstInt(1)});
2596 LLVM_DEBUG(
dbgs() <<
" Shifted half index: " << *AdjustedIndex <<
")\n");
2599 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
2603 }
else if (Qual == HvxIdioms::HEX_Gather_Scatter) {
2611 if (cstDataVector) {
2616 [[maybe_unused]]
auto *StoreIndexes =
2617 Builder.
CreateStore(cstDataVector, IndexesAlloca);
2618 LLVM_DEBUG(
dbgs() <<
" StoreIndexes : " << *StoreIndexes <<
"\n");
2622 LLVM_DEBUG(
dbgs() <<
" ResultAlloca : " << *ResultAlloca <<
"\n");
2625 IndexLoad, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2629 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
2630 {ResultAlloca, CastedSrc,
2634 HVC.getHvxTy(HVC.getIntTy(16),
false), ResultAlloca,
"temp_result");
2635 LLVM_DEBUG(
dbgs() <<
" LoadedResult : " << *LoadedResult <<
"\n");
2637 In.replaceAllUsesWith(LoadedResult);
2647 Src, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2660 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgather_vscattermh,
2666 }
else if (Qual == HvxIdioms::HEX_Scatter) {
2673 IndexLoad, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2680 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
2685 HVC.getHvxTy(HVC.getIntTy(16),
false), ResultAlloca,
"temp_result");
2686 LLVM_DEBUG(
dbgs() <<
" LoadedResult : " << *LoadedResult <<
"\n");
2687 In.replaceAllUsesWith(LoadedResult);
2688 }
else if (Qual == HvxIdioms::HEX_Gather) {
2693 if (cstDataVector) {
2697 [[maybe_unused]]
auto *StoreIndexes =
2698 Builder.
CreateStore(cstDataVector, IndexesAlloca);
2699 LLVM_DEBUG(
dbgs() <<
" StoreIndexes : " << *StoreIndexes <<
"\n");
2704 <<
"\n AddressSpace: "
2708 IndexLoad, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2712 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
2713 {ResultAlloca, CastedSrc,
2717 HVC.getHvxTy(HVC.getIntTy(16),
false), ResultAlloca,
"temp_result");
2718 LLVM_DEBUG(
dbgs() <<
" LoadedResult : " << *LoadedResult <<
"\n");
2720 In.replaceAllUsesWith(LoadedResult);
2723 }
else if (Qual == HvxIdioms::LLVM_Gather) {
2725 errs() <<
" Underimplemented vgather to vgather sequence\n";
2735std::optional<uint64_t> HvxIdioms::getPHIBaseMinAlignment(Instruction &In,
2736 PHINode *PN)
const {
2738 return std::nullopt;
2740 SmallVector<Value *, 16> Worklist;
2741 SmallPtrSet<Value *, 16> Visited;
2742 uint64_t minPHIAlignment = Value::MaximumAlignment;
2745 while (!Worklist.
empty()) {
2748 if (!Visited.
insert(V).second)
2760 if (KnownAlign.
value() < minPHIAlignment)
2761 minPHIAlignment = KnownAlign.
value();
2764 if (minPHIAlignment != Value::MaximumAlignment)
2765 return minPHIAlignment;
2766 return std::nullopt;
2770std::optional<uint64_t> HvxIdioms::getAlignment(Instruction &In,
2772 SmallPtrSet<Value *, 16> Visited;
2773 return getAlignmentImpl(In, ptr, Visited);
2776std::optional<uint64_t>
2777HvxIdioms::getAlignmentImpl(Instruction &In,
Value *ptr,
2778 SmallPtrSet<Value *, 16> &Visited)
const {
2781 if (!Visited.
insert(ptr).second)
2782 return std::nullopt;
2786 if (KnownAlign.
value() > 1) {
2788 return KnownAlign.
value();
2794 auto baseAlignmentOpt = getPHIBaseMinAlignment(In, PN);
2795 if (!baseAlignmentOpt)
2796 return std::nullopt;
2798 uint64_t minBaseAlignment = *baseAlignmentOpt;
2800 if (minBaseAlignment == 1)
2803 uint64_t minPHIAlignment = minBaseAlignment;
2805 <<
")nodes and min base aligned to (" << minBaseAlignment
2812 uint64_t MemberAlignment = Value::MaximumAlignment;
2814 MemberAlignment = *res;
2816 return std::nullopt;
2818 if (minPHIAlignment > MemberAlignment)
2819 minPHIAlignment = MemberAlignment;
2821 LLVM_DEBUG(
dbgs() <<
" total PHI alignment(" << minPHIAlignment <<
")\n");
2822 return minPHIAlignment;
2826 auto *GEPPtr =
GEP->getPointerOperand();
2829 if (&In == GEPPtr) {
2831 GEPPtr->getType()->getPointerAddressSpace()),
2833 if (
GEP->accumulateConstantOffset(HVC.DL,
Offset)) {
2835 <<
Offset.getZExtValue() <<
")\n");
2836 return Offset.getZExtValue();
2841 return std::nullopt;
2844Value *HvxIdioms::processMStore(Instruction &In)
const {
2845 [[maybe_unused]]
auto *InpTy =
2847 assert(InpTy &&
"Cannot handle no vector type for llvm.masked.store");
2850 << *
In.getParent() <<
"\n");
2852 << HVC.length(InpTy) <<
") VecLen(" << HVC.getSizeOf(InpTy)
2853 <<
") type(" << *InpTy->getElementType() <<
") of size("
2854 << InpTy->getScalarSizeInBits() <<
")bits\n");
2856 assert(CI &&
"Expected llvm.masked.store to be a call");
2857 Align HaveAlign = CI->getParamAlign(1).valueOrOne();
2865 Type *ValTy =
In.getOperand(0)->getType();
2869 if (EffA < HaveAlign)
2873 AttrBuilder AttrB(CI->getContext());
2874 AttrB.addAlignmentAttr(EffA);
2876 CI->getAttributes().addParamAttributes(CI->getContext(), 1, AttrB));
2880Value *HvxIdioms::processMLoad(Instruction &In)
const {
2882 assert(InpTy &&
"Cannot handle non vector type for llvm.masked.store");
2884 << *
In.getParent() <<
"\n");
2886 << HVC.length(InpTy) <<
") VecLen(" << HVC.getSizeOf(InpTy)
2887 <<
") type(" << *InpTy->getElementType() <<
") of size("
2888 << InpTy->getScalarSizeInBits() <<
")bits\n");
2890 assert(CI &&
"Expected to be a call to llvm.masked.load");
2892 Align HaveAlign = CI->getParamAlign(0).valueOrOne();
2900 Type *ValTy =
In.getType();
2903 if (EffA < HaveAlign)
2909 AttrBuilder AttrB(CI->getContext());
2910 AttrB.addAlignmentAttr(EffA);
2912 CI->getAttributes().addParamAttributes(CI->getContext(), 0, AttrB));
2916auto HvxIdioms::processFxpMulChopped(IRBuilderBase &Builder, Instruction &In,
2917 const FxpOp &
Op)
const ->
Value * {
2918 assert(
Op.X.Val->getType() ==
Op.Y.Val->getType());
2920 unsigned Width = InpTy->getScalarSizeInBits();
2923 if (!
Op.RoundAt || *
Op.RoundAt ==
Op.Frac - 1) {
2926 Value *QMul =
nullptr;
2928 QMul = createMulQ15(Builder,
Op.X,
Op.Y, Rounding);
2929 }
else if (Width == 32) {
2930 QMul = createMulQ31(Builder,
Op.X,
Op.Y, Rounding);
2932 if (QMul !=
nullptr)
2938 assert(Width < 32 || Width % 32 == 0);
2948 assert(
Op.Frac != 0 &&
"Unshifted mul should have been skipped");
2949 if (
Op.Frac == 16) {
2951 if (
Value *MulH = createMulH16(Builder,
Op.X,
Op.Y))
2955 Value *Prod32 = createMul16(Builder,
Op.X,
Op.Y);
2958 ConstantInt::get(Prod32->
getType(), 1ull << *
Op.RoundAt);
2959 Prod32 = Builder.
CreateAdd(Prod32, RoundVal,
"add");
2964 ? Builder.
CreateAShr(Prod32, ShiftAmt,
"asr")
2965 : Builder.
CreateLShr(Prod32, ShiftAmt,
"lsr");
2966 return Builder.
CreateTrunc(Shifted, InpTy,
"trn");
2973 auto WordX = HVC.splitVectorElements(Builder,
Op.X.Val, 32);
2974 auto WordY = HVC.splitVectorElements(Builder,
Op.Y.Val, 32);
2975 auto WordP = createMulLong(Builder, WordX,
Op.X.Sgn, WordY,
Op.Y.Sgn);
2980 if (
Op.RoundAt.has_value()) {
2983 RoundV[*
Op.RoundAt / 32] =
2984 ConstantInt::get(HvxWordTy, 1ull << (*
Op.RoundAt % 32));
2985 WordP = createAddLong(Builder, WordP, RoundV);
2991 unsigned SkipWords =
Op.Frac / 32;
2992 Constant *ShiftAmt = ConstantInt::get(HvxWordTy,
Op.Frac % 32);
2994 for (
int Dst = 0, End = WordP.size() - SkipWords; Dst != End; ++Dst) {
2995 int Src = Dst + SkipWords;
2997 if (Src + 1 < End) {
3008 WordP.resize(WordP.size() - SkipWords);
3010 return HVC.joinVectorElements(Builder, WordP,
Op.ResTy);
3013auto HvxIdioms::createMulQ15(IRBuilderBase &Builder, SValue
X, SValue
Y,
3014 bool Rounding)
const ->
Value * {
3015 assert(
X.Val->getType() ==
Y.Val->getType());
3016 assert(
X.Val->getType()->getScalarType() == HVC.getIntTy(16));
3023 auto V6_vmpyhvsrs = HVC.HST.
getIntrinsicId(Hexagon::V6_vmpyhvsrs);
3024 return HVC.createHvxIntrinsic(Builder, V6_vmpyhvsrs,
X.Val->getType(),
3028auto HvxIdioms::createMulQ31(IRBuilderBase &Builder, SValue
X, SValue
Y,
3029 bool Rounding)
const ->
Value * {
3030 Type *InpTy =
X.Val->getType();
3031 assert(InpTy ==
Y.Val->getType());
3043 HVC.createHvxIntrinsic(Builder, V6_vmpyewuh, InpTy, {
X.Val,
Y.Val});
3044 return HVC.createHvxIntrinsic(Builder, V6_vmpyo_acc, InpTy,
3045 {V1,
X.Val,
Y.Val});
3048auto HvxIdioms::createAddCarry(IRBuilderBase &Builder,
Value *
X,
Value *
Y,
3049 Value *CarryIn)
const
3050 -> std::pair<Value *, Value *> {
3051 assert(
X->getType() ==
Y->getType());
3060 if (CarryIn ==
nullptr)
3062 Args.push_back(CarryIn);
3064 Value *Ret = HVC.createHvxIntrinsic(Builder, AddCarry,
3068 return {
Result, CarryOut};
3075 if (CarryIn !=
nullptr) {
3076 unsigned Width = VecTy->getScalarSizeInBits();
3079 for (
unsigned i = 0, e = 32 / Width; i !=
e; ++i)
3080 Mask = (Mask << Width) | 1;
3084 HVC.createHvxIntrinsic(Builder, V6_vandqrt,
nullptr,
3085 {CarryIn, HVC.getConstInt(Mask)});
3086 Result1 = Builder.
CreateAdd(
X, ValueIn,
"add");
3092 return {Result2, Builder.
CreateOr(CarryOut1, CarryOut2,
"orb")};
3095auto HvxIdioms::createMul16(IRBuilderBase &Builder, SValue
X, SValue
Y)
const
3098 std::tie(
X,
Y) = canonSgn(
X,
Y);
3111 HVC.createHvxIntrinsic(Builder, V6_vmpyh, HvxP32Ty, {
Y.Val,
X.Val});
3113 return HVC.vshuff(Builder, HVC.sublo(Builder,
P), HVC.subhi(Builder,
P));
3116auto HvxIdioms::createMulH16(IRBuilderBase &Builder, SValue
X, SValue
Y)
const
3118 Type *HvxI16Ty = HVC.getHvxTy(HVC.getIntTy(16),
false);
3123 return HVC.createHvxIntrinsic(Builder, V6_vmpyuhvs, HvxI16Ty,
3128 Type *HvxP16Ty = HVC.getHvxTy(HVC.getIntTy(16),
true);
3131 unsigned Len = HVC.length(HvxP16Ty) / 2;
3133 SmallVector<int, 128> PickOdd(Len);
3134 for (
int i = 0; i !=
static_cast<int>(
Len); ++i)
3135 PickOdd[i] = 2 * i + 1;
3138 HVC.sublo(Builder, Pair16), HVC.subhi(Builder, Pair16), PickOdd,
"shf");
3141auto HvxIdioms::createMul32(IRBuilderBase &Builder, SValue
X, SValue
Y)
const
3142 -> std::pair<Value *, Value *> {
3143 assert(
X.Val->getType() ==
Y.Val->getType());
3144 assert(
X.Val->getType() == HvxI32Ty);
3147 std::tie(
X,
Y) = canonSgn(
X,
Y);
3150 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyss_parts;
3152 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyus_parts;
3154 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyuu_parts;
3157 Value *Parts = HVC.createHvxIntrinsic(Builder, V6_vmpy_parts,
nullptr,
3158 {
X.Val,
Y.Val}, {HvxI32Ty});
3167 assert(WordX.size() == WordY.size());
3168 unsigned Idx = 0,
Length = WordX.size();
3172 if (HVC.isZero(WordX[Idx]))
3173 Sum[Idx] = WordY[Idx];
3174 else if (HVC.isZero(WordY[Idx]))
3175 Sum[Idx] = WordX[Idx];
3181 Value *Carry =
nullptr;
3182 for (; Idx !=
Length; ++Idx) {
3183 std::tie(Sum[Idx], Carry) =
3184 createAddCarry(Builder, WordX[Idx], WordY[Idx], Carry);
3198 for (
int i = 0, e = WordX.size(); i != e; ++i) {
3199 for (
int j = 0, f = WordY.size(); j != f; ++j) {
3201 Signedness SX = (i + 1 ==
e) ? SgnX :
Unsigned;
3203 auto [
Lo,
Hi] = createMul32(Builder, {WordX[i], SX}, {WordY[
j],
SY});
3204 Products[i +
j + 0].push_back(
Lo);
3205 Products[i +
j + 1].push_back(
Hi);
3219 for (
int i = 0, e = Products.size(); i != e; ++i) {
3220 while (Products[i].
size() > 1) {
3221 Value *Carry =
nullptr;
3222 for (
int j = i;
j !=
e; ++
j) {
3223 auto &ProdJ = Products[
j];
3224 auto [Sum, CarryOut] = createAddCarry(Builder, pop_back_or_zero(ProdJ),
3225 pop_back_or_zero(ProdJ), Carry);
3226 ProdJ.insert(ProdJ.begin(), Sum);
3233 for (
auto &
P : Products) {
3234 assert(
P.size() == 1 &&
"Should have been added together");
3241auto HvxIdioms::run() ->
bool {
3244 for (BasicBlock &
B : HVC.F) {
3245 for (
auto It =
B.rbegin(); It !=
B.rend(); ++It) {
3246 if (
auto Fxm = matchFxpMul(*It)) {
3247 Value *
New = processFxpMul(*It, *Fxm);
3253 It->replaceAllUsesWith(New);
3255 It = StartOver ?
B.rbegin()
3258 }
else if (matchGather(*It)) {
3264 It->eraseFromParent();
3268 }
else if (matchScatter(*It)) {
3274 It->eraseFromParent();
3278 }
else if (matchMLoad(*It)) {
3284 }
else if (matchMStore(*It)) {
3299auto HexagonVectorCombine::run() ->
bool {
3301 dbgs() <<
"Module before HexagonVectorCombine\n" << *
F.getParent();
3304 if (HST.useHVXOps()) {
3306 Changed |= AlignVectors(*this).run();
3308 Changed |= HvxIdioms(*this).run();
3312 dbgs() <<
"Module " << (
Changed ?
"(modified)" :
"(unchanged)")
3313 <<
" after HexagonVectorCombine\n"
3319auto HexagonVectorCombine::getIntTy(
unsigned Width)
const -> IntegerType * {
3323auto HexagonVectorCombine::getByteTy(
int ElemCount)
const ->
Type * {
3325 IntegerType *ByteTy = Type::getInt8Ty(
F.getContext());
3328 return VectorType::get(ByteTy, ElemCount,
false);
3331auto HexagonVectorCombine::getBoolTy(
int ElemCount)
const ->
Type * {
3333 IntegerType *BoolTy = Type::getInt1Ty(
F.getContext());
3336 return VectorType::get(BoolTy, ElemCount,
false);
3339auto HexagonVectorCombine::getConstInt(
int Val,
unsigned Width)
const
3344auto HexagonVectorCombine::isZero(
const Value *Val)
const ->
bool {
3346 return C->isZeroValue();
3350auto HexagonVectorCombine::getIntValue(
const Value *Val)
const
3351 -> std::optional<APInt> {
3353 return CI->getValue();
3354 return std::nullopt;
3357auto HexagonVectorCombine::isUndef(
const Value *Val)
const ->
bool {
3361auto HexagonVectorCombine::isTrue(
const Value *Val)
const ->
bool {
3365auto HexagonVectorCombine::isFalse(
const Value *Val)
const ->
bool {
3369auto HexagonVectorCombine::getHvxTy(
Type *ElemTy,
bool Pair)
const
3375 "Invalid HVX element type");
3376 unsigned HwLen = HST.getVectorLength();
3378 return VectorType::get(ElemTy, Pair ? 2 * NumElems : NumElems,
3382auto HexagonVectorCombine::getSizeOf(
const Value *Val, SizeKind Kind)
const
3384 return getSizeOf(Val->
getType(), Kind);
3387auto HexagonVectorCombine::getSizeOf(
const Type *Ty, SizeKind Kind)
const
3389 auto *NcTy =
const_cast<Type *
>(Ty);
3392 return DL.getTypeStoreSize(NcTy).getFixedValue();
3394 return DL.getTypeAllocSize(NcTy).getFixedValue();
3399auto HexagonVectorCombine::getTypeAlignment(
Type *Ty)
const ->
int {
3402 if (HST.isTypeForHVX(Ty))
3403 return HST.getVectorLength();
3404 return DL.getABITypeAlign(Ty).value();
3407auto HexagonVectorCombine::length(
Value *Val)
const ->
size_t {
3408 return length(Val->
getType());
3411auto HexagonVectorCombine::length(
Type *Ty)
const ->
size_t {
3413 assert(VecTy &&
"Must be a vector type");
3414 return VecTy->getElementCount().getFixedValue();
3417auto HexagonVectorCombine::simplify(
Value *V)
const ->
Value * {
3419 SimplifyQuery Q(
DL, &TLI, &DT, &AC, In);
3426auto HexagonVectorCombine::insertb(IRBuilderBase &Builder,
Value *Dst,
3428 int Where)
const ->
Value * {
3429 assert(isByteVecTy(Dst->getType()) && isByteVecTy(Src->getType()));
3430 int SrcLen = getSizeOf(Src);
3431 int DstLen = getSizeOf(Dst);
3437 Value *P2Src = vresize(Builder, Src, P2Len,
Poison);
3438 Value *P2Dst = vresize(Builder, Dst, P2Len,
Poison);
3441 for (
int i = 0; i != P2Len; ++i) {
3445 (Where <= i && i < Where +
Length) ? P2Len + Start + (i - Where) : i;
3449 return vresize(Builder, P2Insert, DstLen,
Poison);
3452auto HexagonVectorCombine::vlalignb(IRBuilderBase &Builder,
Value *
Lo,
3454 assert(
Lo->getType() ==
Hi->getType() &&
"Argument type mismatch");
3457 int VecLen = getSizeOf(
Hi);
3458 if (
auto IntAmt = getIntValue(Amt))
3459 return getElementRange(Builder,
Lo,
Hi, VecLen - IntAmt->getSExtValue(),
3462 if (HST.isTypeForHVX(
Hi->getType())) {
3463 assert(
static_cast<unsigned>(VecLen) == HST.getVectorLength() &&
3464 "Expecting an exact HVX type");
3465 return createHvxIntrinsic(Builder, HST.getIntrinsicId(Hexagon::V6_vlalignb),
3466 Hi->getType(), {Hi, Lo, Amt});
3474 Builder.
CreateTrunc(Shift, Type::getInt32Ty(
F.getContext()),
"trn");
3479 return vralignb(Builder,
Lo,
Hi,
Sub);
3484auto HexagonVectorCombine::vralignb(IRBuilderBase &Builder,
Value *
Lo,
3486 assert(
Lo->getType() ==
Hi->getType() &&
"Argument type mismatch");
3489 int VecLen = getSizeOf(
Lo);
3490 if (
auto IntAmt = getIntValue(Amt))
3491 return getElementRange(Builder,
Lo,
Hi, IntAmt->getSExtValue(), VecLen);
3493 if (HST.isTypeForHVX(
Lo->getType())) {
3494 assert(
static_cast<unsigned>(VecLen) == HST.getVectorLength() &&
3495 "Expecting an exact HVX type");
3496 return createHvxIntrinsic(Builder, HST.getIntrinsicId(Hexagon::V6_valignb),
3497 Lo->getType(), {Hi, Lo, Amt});
3504 Builder.
CreateTrunc(Shift, Type::getInt32Ty(
F.getContext()),
"trn");
3508 Type *Int64Ty = Type::getInt64Ty(
F.getContext());
3520auto HexagonVectorCombine::concat(IRBuilderBase &Builder,
3524 std::vector<Value *> Work[2];
3525 int ThisW = 0, OtherW = 1;
3527 Work[ThisW].assign(Vecs.begin(), Vecs.end());
3528 while (Work[ThisW].
size() > 1) {
3530 SMask.
resize(length(Ty) * 2);
3531 std::iota(SMask.
begin(), SMask.
end(), 0);
3533 Work[OtherW].clear();
3534 if (Work[ThisW].
size() % 2 != 0)
3536 for (
int i = 0, e = Work[ThisW].
size(); i <
e; i += 2) {
3538 Work[ThisW][i], Work[ThisW][i + 1], SMask,
"shf");
3539 Work[OtherW].push_back(Joined);
3547 SMask.
resize(Vecs.size() * length(Vecs.front()->getType()));
3548 std::iota(SMask.
begin(), SMask.
end(), 0);
3553auto HexagonVectorCombine::vresize(IRBuilderBase &Builder,
Value *Val,
3557 assert(ValTy->getElementType() == Pad->getType());
3559 int CurSize = length(ValTy);
3560 if (CurSize == NewSize)
3563 if (CurSize > NewSize)
3564 return getElementRange(Builder, Val, Val, 0, NewSize);
3566 SmallVector<int, 128> SMask(NewSize);
3567 std::iota(SMask.
begin(), SMask.
begin() + CurSize, 0);
3568 std::fill(SMask.
begin() + CurSize, SMask.
end(), CurSize);
3573auto HexagonVectorCombine::rescale(IRBuilderBase &Builder,
Value *Mask,
3580 Type *FromSTy = FromTy->getScalarType();
3581 Type *ToSTy = ToTy->getScalarType();
3582 if (FromSTy == ToSTy)
3585 int FromSize = getSizeOf(FromSTy);
3586 int ToSize = getSizeOf(ToSTy);
3587 assert(FromSize % ToSize == 0 || ToSize % FromSize == 0);
3590 int FromCount = length(MaskTy);
3591 int ToCount = (FromCount * FromSize) / ToSize;
3592 assert((FromCount * FromSize) % ToSize == 0);
3594 auto *FromITy =
getIntTy(FromSize * 8);
3595 auto *ToITy =
getIntTy(ToSize * 8);
3600 Mask, VectorType::get(FromITy, FromCount,
false),
"sxt");
3602 Ext, VectorType::get(ToITy, ToCount,
false),
"cst");
3604 Cast, VectorType::get(getBoolTy(), ToCount,
false),
"trn");
3608auto HexagonVectorCombine::vlsb(IRBuilderBase &Builder,
Value *Val)
const
3611 if (ScalarTy == getBoolTy())
3614 Value *Bytes = vbytes(Builder, Val);
3616 return Builder.
CreateTrunc(Bytes, getBoolTy(getSizeOf(VecTy)),
"trn");
3619 return Builder.
CreateTrunc(Bytes, getBoolTy(),
"trn");
3623auto HexagonVectorCombine::vbytes(IRBuilderBase &Builder,
Value *Val)
const
3626 if (ScalarTy == getByteTy())
3629 if (ScalarTy != getBoolTy())
3630 return Builder.
CreateBitCast(Val, getByteTy(getSizeOf(Val)),
"cst");
3633 return Builder.
CreateSExt(Val, VectorType::get(getByteTy(), VecTy),
"sxt");
3634 return Builder.
CreateSExt(Val, getByteTy(),
"sxt");
3637auto HexagonVectorCombine::subvector(IRBuilderBase &Builder,
Value *Val,
3638 unsigned Start,
unsigned Length)
const
3641 return getElementRange(Builder, Val, Val, Start,
Length);
3644auto HexagonVectorCombine::sublo(IRBuilderBase &Builder,
Value *Val)
const
3646 size_t Len = length(Val);
3647 assert(Len % 2 == 0 &&
"Length should be even");
3648 return subvector(Builder, Val, 0, Len / 2);
3651auto HexagonVectorCombine::subhi(IRBuilderBase &Builder,
Value *Val)
const
3653 size_t Len = length(Val);
3654 assert(Len % 2 == 0 &&
"Length should be even");
3655 return subvector(Builder, Val, Len / 2, Len / 2);
3658auto HexagonVectorCombine::vdeal(IRBuilderBase &Builder,
Value *Val0,
3660 assert(Val0->getType() == Val1->getType());
3661 int Len = length(Val0);
3662 SmallVector<int, 128>
Mask(2 * Len);
3664 for (
int i = 0; i !=
Len; ++i) {
3671auto HexagonVectorCombine::vshuff(IRBuilderBase &Builder,
Value *Val0,
3673 assert(Val0->getType() == Val1->getType());
3674 int Len = length(Val0);
3675 SmallVector<int, 128>
Mask(2 * Len);
3677 for (
int i = 0; i !=
Len; ++i) {
3678 Mask[2 * i + 0] = i;
3684auto HexagonVectorCombine::createHvxIntrinsic(IRBuilderBase &Builder,
3690 auto getCast = [&](IRBuilderBase &Builder,
Value *Val,
3692 Type *SrcTy = Val->getType();
3693 if (SrcTy == DestTy)
3698 assert(HST.isTypeForHVX(SrcTy,
true));
3700 Type *BoolTy = Type::getInt1Ty(
F.getContext());
3705 unsigned HwLen = HST.getVectorLength();
3706 Intrinsic::ID TC = HwLen == 64 ? Intrinsic::hexagon_V6_pred_typecast
3707 : Intrinsic::hexagon_V6_pred_typecast_128B;
3716 SmallVector<Value *, 4> IntrArgs;
3717 for (
int i = 0, e =
Args.size(); i != e; ++i) {
3719 Type *
T = IntrTy->getParamType(i);
3720 if (
A->getType() !=
T) {
3726 StringRef MaybeName = !IntrTy->getReturnType()->isVoidTy() ?
"cup" :
"";
3727 CallInst *
Call = Builder.
CreateCall(IntrFn, IntrArgs, MaybeName);
3734 if (RetTy ==
nullptr || CallTy == RetTy)
3737 assert(HST.isTypeForHVX(CallTy,
true));
3738 return getCast(Builder,
Call, RetTy);
3741auto HexagonVectorCombine::splitVectorElements(IRBuilderBase &Builder,
3743 unsigned ToWidth)
const
3758 assert(VecTy->getElementType()->isIntegerTy());
3759 unsigned FromWidth = VecTy->getScalarSizeInBits();
3761 assert(ToWidth <= FromWidth &&
"Breaking up into wider elements?");
3762 unsigned NumResults = FromWidth / ToWidth;
3766 unsigned Length = length(VecTy);
3770 auto splitInHalf = [&](
unsigned Begin,
unsigned End,
auto splitFunc) ->
void {
3774 if (Begin + 1 == End)
3780 auto *VTy = VectorType::get(
getIntTy(Width / 2), 2 *
Length,
false);
3783 Value *Res =
vdeal(Builder, sublo(Builder, VVal), subhi(Builder, VVal));
3785 unsigned Half = (Begin + End) / 2;
3786 Results[Begin] = sublo(Builder, Res);
3787 Results[Half] = subhi(Builder, Res);
3789 splitFunc(Begin, Half, splitFunc);
3790 splitFunc(Half, End, splitFunc);
3793 splitInHalf(0, NumResults, splitInHalf);
3797auto HexagonVectorCombine::joinVectorElements(IRBuilderBase &Builder,
3799 VectorType *ToType)
const
3801 assert(ToType->getElementType()->isIntegerTy());
3812 unsigned ToWidth = ToType->getScalarSizeInBits();
3813 unsigned Width = Inputs.front()->getType()->getScalarSizeInBits();
3814 assert(Width <= ToWidth);
3816 unsigned Length = length(Inputs.front()->getType());
3818 unsigned NeedInputs = ToWidth / Width;
3819 if (Inputs.size() != NeedInputs) {
3824 Last, ConstantInt::get(
Last->getType(), Width - 1),
"asr");
3825 Inputs.resize(NeedInputs, Sign);
3828 while (Inputs.size() > 1) {
3831 for (
int i = 0, e = Inputs.size(); i < e; i += 2) {
3832 Value *Res =
vshuff(Builder, Inputs[i], Inputs[i + 1]);
3835 Inputs.resize(Inputs.size() / 2);
3838 assert(Inputs.front()->getType() == ToType);
3839 return Inputs.front();
3842auto HexagonVectorCombine::calculatePointerDifference(
Value *Ptr0,
3844 -> std::optional<int> {
3846 const SCEV *Scev0 = SE.getSCEV(Ptr0);
3847 const SCEV *Scev1 = SE.getSCEV(Ptr1);
3848 const SCEV *ScevDiff = SE.getMinusSCEV(Scev0, Scev1);
3850 APInt
V =
Const->getAPInt();
3851 if (
V.isSignedIntN(8 *
sizeof(
int)))
3852 return static_cast<int>(
V.getSExtValue());
3859 I->eraseFromParent();
3861 SmallVector<Instruction *, 8> ToErase;
3864#define CallBuilder(B, F) \
3867 if (auto *I = dyn_cast<Instruction>(V)) \
3868 B_.ToErase.push_back(I); \
3872 auto Simplify = [
this](
Value *
V) {
3878 auto StripBitCast = [](
Value *
V) {
3880 V =
C->getOperand(0);
3884 Ptr0 = StripBitCast(Ptr0);
3885 Ptr1 = StripBitCast(Ptr1);
3887 return std::nullopt;
3891 if (Gep0->getPointerOperand() != Gep1->getPointerOperand())
3892 return std::nullopt;
3893 if (Gep0->getSourceElementType() != Gep1->getSourceElementType())
3894 return std::nullopt;
3896 Builder
B(Gep0->getParent());
3897 int Scale = getSizeOf(Gep0->getSourceElementType(),
Alloc);
3900 if (Gep0->getNumOperands() != 2 || Gep1->getNumOperands() != 2)
3901 return std::nullopt;
3903 Value *Idx0 = Gep0->getOperand(1);
3904 Value *Idx1 = Gep1->getOperand(1);
3909 return Diff->getSExtValue() * Scale;
3911 KnownBits Known0 = getKnownBits(Idx0, Gep0);
3912 KnownBits Known1 = getKnownBits(Idx1, Gep1);
3915 return std::nullopt;
3923 Diff0 =
C->getSExtValue();
3925 return std::nullopt;
3934 Diff1 =
C->getSExtValue();
3936 return std::nullopt;
3939 return (Diff0 + Diff1) * Scale;
3944auto HexagonVectorCombine::getNumSignificantBits(
const Value *V,
3945 const Instruction *CtxI)
const
3950auto HexagonVectorCombine::getKnownBits(
const Value *V,
3951 const Instruction *CtxI)
const
3956auto HexagonVectorCombine::isSafeToClone(
const Instruction &In)
const ->
bool {
3957 if (
In.mayHaveSideEffects() ||
In.isAtomic() ||
In.isVolatile() ||
3958 In.isFenceLike() ||
In.mayReadOrWriteMemory()) {
3966template <
typename T>
3967auto HexagonVectorCombine::isSafeToMoveBeforeInBB(
const Instruction &In,
3969 const T &IgnoreInsts)
const
3972 [
this](
const Instruction &
I) -> std::optional<MemoryLocation> {
3974 switch (
II->getIntrinsicID()) {
3975 case Intrinsic::masked_load:
3977 case Intrinsic::masked_store:
3993 bool MayWrite =
In.mayWriteToMemory();
3994 auto MaybeLoc = getLocOrNone(In);
3996 auto From =
In.getIterator();
3999 bool MoveUp = (To !=
Block.end() && To->comesBefore(&In));
4001 MoveUp ? std::make_pair(To, From) : std::make_pair(std::next(From), To);
4002 for (
auto It =
Range.first; It !=
Range.second; ++It) {
4003 const Instruction &I = *It;
4004 if (llvm::is_contained(IgnoreInsts, &I))
4007 if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
4008 if (II->getIntrinsicID() == Intrinsic::assume)
4015 if (!CB->hasFnAttr(Attribute::WillReturn))
4017 if (!CB->hasFnAttr(Attribute::NoSync))
4020 if (
I.mayReadOrWriteMemory()) {
4021 auto MaybeLocI = getLocOrNone(I);
4022 if (MayWrite || I.mayWriteToMemory()) {
4023 if (!MaybeLoc || !MaybeLocI)
4025 if (!AA.isNoAlias(*MaybeLoc, *MaybeLocI))
4033auto HexagonVectorCombine::isByteVecTy(
Type *Ty)
const ->
bool {
4035 return VecTy->getElementType() == getByteTy();
4039auto HexagonVectorCombine::getElementRange(IRBuilderBase &Builder,
Value *
Lo,
4043 SmallVector<int, 128> SMask(
Length);
4044 std::iota(SMask.
begin(), SMask.
end(), Start);
4051class HexagonVectorCombineLegacy :
public FunctionPass {
4055 HexagonVectorCombineLegacy() : FunctionPass(
ID) {}
4057 StringRef getPassName()
const override {
return "Hexagon Vector Combine"; }
4059 void getAnalysisUsage(AnalysisUsage &AU)
const override {
4067 FunctionPass::getAnalysisUsage(AU);
4071 if (skipFunction(
F))
4073 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
4074 AssumptionCache &AC =
4075 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
4076 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
4077 ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
4078 TargetLibraryInfo &TLI =
4079 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
F);
4080 auto &TM = getAnalysis<TargetPassConfig>().getTM<HexagonTargetMachine>();
4081 HexagonVectorCombine HVC(
F, AA, AC, DT, SE, TLI, TM);
4087char HexagonVectorCombineLegacy::ID = 0;
4090 "Hexagon Vector Combine",
false,
false)
4101 return new HexagonVectorCombineLegacy();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Prepare AGPR Alloc
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
static IntegerType * getIntTy(IRBuilderBase &B, const TargetLibraryInfo *TLI)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
static Value * locateIndexesFromIntrinsic(Instruction *In)
Instruction * locateDestination(Instruction *In, HvxIdioms::DstQualifier &Qual)
Value * getReinterpretiveCast_i8_to_i32(const HexagonVectorCombine &HVC, IRBuilderBase &Builder, LLVMContext &Ctx, Value *I)
static Value * locateIndexesFromGEP(Value *In)
#define CallBuilder(B, F)
Value * getPointer(Value *Ptr)
#define DEFAULT_HVX_VTCM_PAGE_SIZE
static Value * locateAddressFromIntrinsic(Instruction *In)
static Instruction * selectDestination(Instruction *In, HvxIdioms::DstQualifier &Qual)
Value * get_i32_Mask(const HexagonVectorCombine &HVC, IRBuilderBase &Builder, LLVMContext &Ctx, unsigned int pattern)
bool isArithmetic(unsigned Opc)
static Type * getIndexType(Value *In)
GetElementPtrInst * locateGepFromIntrinsic(Instruction *In)
Value * getReinterpretiveCast_i16_to_i32(const HexagonVectorCombine &HVC, IRBuilderBase &Builder, LLVMContext &Ctx, Value *I)
static Align effectiveAlignForValueTy(const DataLayout &DL, Type *ValTy, int Requested)
iv Induction Variable Users
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static bool isCandidate(const MachineInstr *MI, Register &DefedReg, Register FrameReg)
static bool isUndef(const MachineInstr &MI)
This file implements a map that provides insertion order iteration.
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Remove Loads Into Fake Uses
static ConstantInt * getConstInt(MDNode *MD, unsigned NumOp)
This file defines the SmallVector class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
Target-Independent Code Generator Pass Configuration Options pass.
static uint32_t getAlignment(const MCSectionCOFF &Sec)
static const uint32_t IV[8]
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
InstListType::const_iterator const_iterator
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
AttributeList getAttributes() const
Return the attributes for this call.
@ ICMP_ULT
unsigned less than
static LLVM_ABI Constant * get(LLVMContext &Context, ArrayRef< uint8_t > Elts)
get() constructors - Return a constant with vector type with an element count and element type matchi...
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
LLVM_ABI Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
LLVM_ABI TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
iterator_range< iterator > children()
DomTreeNodeBase< NodeT > * getRootNode()
getRootNode - This returns the entry node for the CFG of the function.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
const BasicBlock & back() const
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Value * getPointerOperand()
bool isHVXVectorType(EVT VecTy, bool IncludeBool=false) const
bool useHVXV62Ops() const
bool useHVXV69Ops() const
unsigned getVectorLength() const
bool useHVXV66Ops() const
bool isTypeForHVX(Type *VecTy, bool IncludeBool=false) const
Intrinsic::ID getIntrinsicId(unsigned Opc) const
Common base class shared among various IRBuilders.
AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
const char * getOpcodeName() const
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
This class implements a map that also provides access to all stored values in a deterministic order.
void remove_if(Predicate Pred)
Remove the elements that match the predicate.
bool doesNotAccessMemory() const
Whether this function accesses no memory.
bool onlyAccessesInaccessibleMem() const
Whether this function only (at most) accesses inaccessible memory.
static LLVM_ABI std::optional< MemoryLocation > getOrNone(const Instruction *Inst)
static LLVM_ABI MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
The main scalar evolution driver.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Target-Independent Code Generator Pass Configuration Options.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
const ParentTy * getParent() const
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Rounding
Possible values of current rounding mode, which is specified in bits 23:22 of FPCR.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
friend class Instruction
Iterator for Instructions in a `BasicBlock.
LLVM_ABI Instruction * getTerminator() const
LLVM_ABI Instruction & front() const
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createHexagonVectorCombineLegacyPass()
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
MemoryEffectsBase< IRMemLocation > MemoryEffects
Summary of how a function affects memory in the program.
LLVM_ABI Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
DomTreeNodeBase< BasicBlock > DomTreeNode
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
auto reverse(ContainerTy &&C)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Get the upper bound on bit size for this Value Op as a signed integer.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
LLVM_ABI bool mayHaveNonDefUseDependency(const Instruction &I)
Returns true if the result or effects of the given instructions I depend values not reachable through...
MaskT vshuff(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
MaskT vdeal(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.