34#include "llvm/IR/IntrinsicsHexagon.h"
58#define DEBUG_TYPE "hexagon-vc"
63#define DEFAULT_HVX_VTCM_PAGE_SIZE 131072
78class HexagonVectorCombine {
83 :
F(F_),
DL(
F.getDataLayout()),
AA(AA_), AC(AC_), DT(DT_),
93 Type *getByteTy(
int ElemCount = 0)
const;
96 Type *getBoolTy(
int ElemCount = 0)
const;
100 std::optional<APInt> getIntValue(
const Value *Val)
const;
106 bool isTrue(
const Value *Val)
const;
108 bool isFalse(
const Value *Val)
const;
117 int getSizeOf(
const Value *Val, SizeKind Kind = Store)
const;
118 int getSizeOf(
const Type *Ty, SizeKind Kind = Store)
const;
119 int getTypeAlignment(
Type *Ty)
const;
120 size_t length(
Value *Val)
const;
121 size_t length(
Type *Ty)
const;
126 int Length,
int Where)
const;
150 unsigned ToWidth)
const;
154 std::optional<int> calculatePointerDifference(
Value *Ptr0,
Value *Ptr1)
const;
156 unsigned getNumSignificantBits(
const Value *V,
163 template <
typename T = std::vector<Instruction *>>
166 const T &IgnoreInsts = {})
const;
169 [[maybe_unused]]
bool isByteVecTy(
Type *Ty)
const;
182 int Start,
int Length)
const;
201 AlignVectors(
const HexagonVectorCombine &HVC_) : HVC(HVC_) {}
206 using InstList = std::vector<Instruction *>;
210 AddrInfo(
const AddrInfo &) =
default;
213 : Inst(
I), Addr(
A), ValTy(
T), HaveAlign(
H),
214 NeedAlign(HVC.getTypeAlignment(ValTy)) {}
215 AddrInfo &operator=(
const AddrInfo &) =
default;
226 using AddrList = std::vector<AddrInfo>;
230 return A->comesBefore(
B);
233 using DepList = std::set<Instruction *, InstrLess>;
236 MoveGroup(
const AddrInfo &AI,
Instruction *
B,
bool Hvx,
bool Load)
237 :
Base(
B), Main{AI.Inst}, Clones{}, IsHvx(Hvx), IsLoad(Load) {}
238 MoveGroup() =
default;
246 using MoveList = std::vector<MoveGroup>;
266 Segment(
Value *Val,
int Begin,
int Len)
267 : Val(Val), Start(Begin),
Size(Len) {}
268 Segment(
const Segment &Seg) =
default;
269 Segment &operator=(
const Segment &Seg) =
default;
276 Block(
Value *Val,
int Len,
int Pos) : Seg(Val, 0, Len), Pos(Pos) {}
278 : Seg(Val, Off, Len), Pos(Pos) {}
286 ByteSpan section(
int Start,
int Length)
const;
287 ByteSpan &shift(
int Offset);
290 int size()
const {
return Blocks.size(); }
291 Block &operator[](
int i) {
return Blocks[i]; }
292 const Block &operator[](
int i)
const {
return Blocks[i]; }
294 std::vector<Block> Blocks;
297 iterator begin() {
return Blocks.begin(); }
298 iterator end() {
return Blocks.end(); }
304 std::optional<AddrInfo> getAddrInfo(
Instruction &In)
const;
305 bool isHvx(
const AddrInfo &AI)
const;
307 [[maybe_unused]]
bool isSectorTy(
Type *Ty)
const;
315 const InstMap &CloneMap = InstMap())
const;
318 const InstMap &CloneMap = InstMap())
const;
342 bool createAddressGroups();
343 MoveList createLoadGroups(
const AddrList &Group)
const;
344 MoveList createStoreGroups(
const AddrList &Group)
const;
345 bool moveTogether(MoveGroup &Move)
const;
346 template <
typename T>
349 void realignLoadGroup(
IRBuilderBase &Builder,
const ByteSpan &VSpan,
350 int ScLen,
Value *AlignVal,
Value *AlignAddr)
const;
351 void realignStoreGroup(
IRBuilderBase &Builder,
const ByteSpan &VSpan,
352 int ScLen,
Value *AlignVal,
Value *AlignAddr)
const;
353 bool realignGroup(
const MoveGroup &Move)
const;
356 int Alignment)
const;
363 std::map<Instruction *, AddrList> AddrGroups;
364 const HexagonVectorCombine &HVC;
368 const AlignVectors::AddrInfo &AI) {
369 OS <<
"Inst: " << AI.Inst <<
" " << *AI.Inst <<
'\n';
370 OS <<
"Addr: " << *AI.Addr <<
'\n';
371 OS <<
"Type: " << *AI.ValTy <<
'\n';
372 OS <<
"HaveAlign: " << AI.HaveAlign.
value() <<
'\n';
373 OS <<
"NeedAlign: " << AI.NeedAlign.
value() <<
'\n';
374 OS <<
"Offset: " << AI.Offset;
379 const AlignVectors::MoveGroup &MG) {
380 OS <<
"IsLoad:" << (MG.IsLoad ?
"yes" :
"no");
381 OS <<
", IsHvx:" << (MG.IsHvx ?
"yes" :
"no") <<
'\n';
384 OS <<
" " << *
I <<
'\n';
387 OS <<
" " << *
I <<
'\n';
389 for (
auto [K, V] : MG.Clones) {
391 K->printAsOperand(OS,
false);
392 OS <<
"\t-> " << *V <<
'\n';
399 OS <<
" @" <<
B.Pos <<
" [" <<
B.Seg.Start <<
',' <<
B.Seg.Size <<
"] ";
400 if (
B.Seg.Val ==
reinterpret_cast<const Value *
>(&
B)) {
401 OS <<
"(self:" <<
B.Seg.Val <<
')';
402 }
else if (
B.Seg.Val !=
nullptr) {
411 const AlignVectors::ByteSpan &BS) {
412 OS <<
"ByteSpan[size=" << BS.size() <<
", extent=" << BS.extent() <<
'\n';
413 for (
const AlignVectors::ByteSpan::Block &
B : BS)
433 HvxIdioms(
const HexagonVectorCombine &HVC_) : HVC(HVC_) {
434 auto *
Int32Ty = HVC.getIntTy(32);
435 HvxI32Ty = HVC.getHvxTy(
Int32Ty,
false);
436 HvxP32Ty = HVC.getHvxTy(
Int32Ty,
true);
457 std::optional<unsigned> RoundAt;
462 -> std::pair<unsigned, Signedness>;
463 auto canonSgn(SValue
X, SValue
Y)
const -> std::pair<SValue, SValue>;
465 auto matchFxpMul(
Instruction &In)
const -> std::optional<FxpOp>;
469 const FxpOp &
Op)
const ->
Value *;
471 bool Rounding)
const ->
Value *;
473 bool Rounding)
const ->
Value *;
476 Value *CarryIn =
nullptr)
const
477 -> std::pair<Value *, Value *>;
482 -> std::pair<Value *, Value *>;
496 const HexagonVectorCombine &HVC;
502 const HvxIdioms::FxpOp &
Op) {
503 static const char *SgnNames[] = {
"Positive",
"Signed",
"Unsigned"};
505 if (
Op.RoundAt.has_value()) {
506 if (
Op.Frac != 0 && *
Op.RoundAt ==
Op.Frac - 1) {
509 OS <<
" + 1<<" << *
Op.RoundAt;
512 OS <<
"\n X:(" << SgnNames[
Op.X.Sgn] <<
") " << *
Op.X.Val <<
"\n"
513 <<
" Y:(" << SgnNames[
Op.Y.Sgn] <<
") " << *
Op.Y.Val;
521template <
typename T>
T *getIfUnordered(
T *MaybeT) {
522 return MaybeT && MaybeT->isUnordered() ? MaybeT :
nullptr;
534#if !defined(_MSC_VER) || _MSC_VER >= 1926
538template <
typename Pred,
typename... Ts>
539void erase_if(std::map<Ts...> &map, Pred p)
541template <
typename Pred,
typename T,
typename U>
542void erase_if(std::map<T, U> &map, Pred p)
545 for (
auto i = map.begin(), e = map.end(); i != e;) {
554template <
typename Pred,
typename T>
void erase_if(
T &&container, Pred p) {
592auto AlignVectors::ByteSpan::extent()
const ->
int {
595 int Min = Blocks[0].Pos;
596 int Max = Blocks[0].Pos + Blocks[0].Seg.Size;
597 for (
int i = 1, e =
size(); i !=
e; ++i) {
598 Min = std::min(Min, Blocks[i].Pos);
599 Max = std::max(Max, Blocks[i].Pos + Blocks[i].Seg.Size);
604auto AlignVectors::ByteSpan::section(
int Start,
int Length)
const -> ByteSpan {
606 for (
const ByteSpan::Block &
B : Blocks) {
607 int L = std::max(
B.Pos, Start);
608 int R = std::min(
B.Pos +
B.Seg.Size, Start +
Length);
611 int Off =
L >
B.Pos ?
L -
B.Pos : 0;
612 Section.Blocks.emplace_back(
B.Seg.Val,
B.Seg.Start + Off, R - L, L);
618auto AlignVectors::ByteSpan::shift(
int Offset) -> ByteSpan & {
625 SmallVector<Value *, 8> Values(Blocks.size());
626 for (
int i = 0, e = Blocks.size(); i != e; ++i)
627 Values[i] = Blocks[i].Seg.Val;
631auto AlignVectors::getAddrInfo(Instruction &In)
const
632 -> std::optional<AddrInfo> {
634 return AddrInfo(HVC, L,
L->getPointerOperand(),
L->getType(),
637 return AddrInfo(HVC, S, S->getPointerOperand(),
638 S->getValueOperand()->getType(), S->getAlign());
642 case Intrinsic::masked_load:
643 return AddrInfo(HVC,
II,
II->getArgOperand(0),
II->getType(),
644 II->getParamAlign(0).valueOrOne());
645 case Intrinsic::masked_store:
646 return AddrInfo(HVC,
II,
II->getArgOperand(1),
647 II->getArgOperand(0)->getType(),
648 II->getParamAlign(1).valueOrOne());
654auto AlignVectors::isHvx(
const AddrInfo &AI)
const ->
bool {
658auto AlignVectors::getPayload(
Value *Val)
const ->
Value * {
662 ID =
II->getIntrinsicID();
664 return In->getOperand(0);
669auto AlignVectors::getMask(
Value *Val)
const ->
Value * {
671 switch (
II->getIntrinsicID()) {
672 case Intrinsic::masked_load:
673 return II->getArgOperand(1);
674 case Intrinsic::masked_store:
675 return II->getArgOperand(2);
679 Type *ValTy = getPayload(Val)->getType();
685auto AlignVectors::getPassThrough(
Value *Val)
const ->
Value * {
687 if (
II->getIntrinsicID() == Intrinsic::masked_load)
688 return II->getArgOperand(2);
693auto AlignVectors::createAdjustedPointer(IRBuilderBase &Builder,
Value *Ptr,
694 Type *ValTy,
int Adjust,
695 const InstMap &CloneMap)
const
698 if (Instruction *New = CloneMap.lookup(
I))
700 return Builder.CreatePtrAdd(Ptr, HVC.getConstInt(Adjust),
"gep");
703auto AlignVectors::createAlignedPointer(IRBuilderBase &Builder,
Value *Ptr,
704 Type *ValTy,
int Alignment,
705 const InstMap &CloneMap)
const
709 for (
auto [Old, New] : CloneMap)
710 I->replaceUsesOfWith(Old, New);
715 Value *AsInt = Builder.CreatePtrToInt(Ptr, HVC.getIntTy(),
"pti");
716 Value *
Mask = HVC.getConstInt(-Alignment);
717 Value *
And = Builder.CreateAnd(remap(AsInt), Mask,
"and");
718 return Builder.CreateIntToPtr(
722auto AlignVectors::createLoad(IRBuilderBase &Builder,
Type *ValTy,
Value *Ptr,
730 "Expectning scalar predicate");
731 if (HVC.isFalse(Predicate))
733 if (!HVC.isTrue(Predicate) && HvxHasPredLoad) {
734 Value *
Load = createPredicatedLoad(Builder, ValTy, Ptr, Predicate,
735 Alignment, MDSources);
736 return Builder.CreateSelect(Mask, Load, PassThru);
740 assert(!HVC.isUndef(Mask));
741 if (HVC.isZero(Mask))
743 if (HVC.isTrue(Mask))
744 return createSimpleLoad(Builder, ValTy, Ptr, Alignment, MDSources);
747 Mask, PassThru,
"mld");
752auto AlignVectors::createSimpleLoad(IRBuilderBase &Builder,
Type *ValTy,
753 Value *Ptr,
int Alignment,
757 Builder.CreateAlignedLoad(ValTy, Ptr,
Align(Alignment),
"ald");
762auto AlignVectors::createPredicatedLoad(IRBuilderBase &Builder,
Type *ValTy,
768 "Predicates 'scalar' vector loads not yet supported");
770 assert(!
Predicate->getType()->isVectorTy() &&
"Expectning scalar predicate");
771 assert(HVC.getSizeOf(ValTy, HVC.Alloc) % Alignment == 0);
772 if (HVC.isFalse(Predicate))
774 if (HVC.isTrue(Predicate))
775 return createSimpleLoad(Builder, ValTy, Ptr, Alignment, MDSources);
777 auto V6_vL32b_pred_ai = HVC.HST.
getIntrinsicId(Hexagon::V6_vL32b_pred_ai);
779 return HVC.createHvxIntrinsic(Builder, V6_vL32b_pred_ai, ValTy,
780 {
Predicate, Ptr, HVC.getConstInt(0)}, {},
784auto AlignVectors::createStore(IRBuilderBase &Builder,
Value *Val,
Value *Ptr,
787 if (HVC.isZero(Mask) || HVC.isUndef(Val) || HVC.isUndef(Mask))
790 "Expectning scalar predicate"));
792 if (HVC.isFalse(Predicate))
794 if (HVC.isTrue(Predicate))
799 if (HVC.isTrue(Mask)) {
801 return createPredicatedStore(Builder, Val, Ptr, Predicate, Alignment,
805 return createSimpleStore(Builder, Val, Ptr, Alignment, MDSources);
811 Builder.CreateMaskedStore(Val, Ptr,
Align(Alignment), Mask);
818 Value *PredLoad = createPredicatedLoad(Builder, Val->getType(), Ptr,
819 Predicate, Alignment, MDSources);
820 Value *Mux = Builder.CreateSelect(Mask, Val, PredLoad);
821 return createPredicatedStore(Builder, Mux, Ptr, Predicate, Alignment,
825auto AlignVectors::createSimpleStore(IRBuilderBase &Builder,
Value *Val,
826 Value *Ptr,
int Alignment,
834auto AlignVectors::createPredicatedStore(IRBuilderBase &Builder,
Value *Val,
840 "Predicates 'scalar' vector stores not yet supported");
842 if (HVC.isFalse(Predicate))
844 if (HVC.isTrue(Predicate))
845 return createSimpleStore(Builder, Val, Ptr, Alignment, MDSources);
847 assert(HVC.getSizeOf(Val, HVC.Alloc) % Alignment == 0);
848 auto V6_vS32b_pred_ai = HVC.HST.
getIntrinsicId(Hexagon::V6_vS32b_pred_ai);
850 return HVC.createHvxIntrinsic(Builder, V6_vS32b_pred_ai,
nullptr,
851 {
Predicate, Ptr, HVC.getConstInt(0), Val}, {},
855auto AlignVectors::getUpwardDeps(Instruction *In, Instruction *
Base)
const
859 "Base and In should be in the same block");
860 assert(
Base->comesBefore(In) &&
"Base should come before In");
863 std::deque<Instruction *> WorkQ = {
In};
864 while (!WorkQ.empty()) {
871 if (
I->getParent() == Parent &&
Base->comesBefore(
I))
879auto AlignVectors::createAddressGroups() ->
bool {
884 auto findBaseAndOffset = [&](AddrInfo &AI) -> std::pair<Instruction *, int> {
885 for (AddrInfo &W : WorkStack) {
886 if (
auto D = HVC.calculatePointerDifference(AI.Addr,
W.Addr))
887 return std::make_pair(
W.Inst, *
D);
889 return std::make_pair(
nullptr, 0);
892 auto traverseBlock = [&](
DomTreeNode *DomN,
auto Visit) ->
void {
894 for (Instruction &
I :
Block) {
895 auto AI = this->getAddrInfo(
I);
898 auto F = findBaseAndOffset(*AI);
900 if (Instruction *BI =
F.first) {
901 AI->Offset =
F.second;
904 WorkStack.push_back(*AI);
905 GroupInst = AI->Inst;
907 AddrGroups[GroupInst].push_back(*AI);
913 while (!WorkStack.empty() && WorkStack.back().Inst->getParent() == &
Block)
914 WorkStack.pop_back();
917 traverseBlock(HVC.DT.
getRootNode(), traverseBlock);
918 assert(WorkStack.empty());
923 erase_if(AddrGroups, [](
auto &
G) {
return G.second.size() == 1; });
927 G.second, [&](
auto &
I) { return HVC.HST.isTypeForHVX(I.ValTy); });
930 return !AddrGroups.empty();
933auto AlignVectors::createLoadGroups(
const AddrList &Group)
const -> MoveList {
941 auto tryAddTo = [&](
const AddrInfo &
Info, MoveGroup &Move) {
942 assert(!Move.Main.empty() &&
"Move group should have non-empty Main");
946 if (Move.IsHvx != isHvx(
Info))
950 if (
Base->getParent() !=
Info.Inst->getParent())
953 if (!HVC.isSafeToMoveBeforeInBB(*
Info.Inst,
Base->getIterator()))
957 return HVC.isSafeToMoveBeforeInBB(*
I,
Base->getIterator()) &&
958 HVC.isSafeToClone(*
I);
960 DepList Deps = getUpwardDeps(
Info.Inst,
Base);
964 Move.Main.push_back(
Info.Inst);
971 for (
const AddrInfo &
Info : Group) {
972 if (!
Info.Inst->mayReadFromMemory())
974 if (LoadGroups.empty() || !tryAddTo(
Info, LoadGroups.back()))
975 LoadGroups.emplace_back(
Info, Group.front().Inst, isHvx(
Info),
true);
979 erase_if(LoadGroups, [](
const MoveGroup &
G) {
return G.Main.size() <= 1; });
983 erase_if(LoadGroups, [](
const MoveGroup &
G) {
return G.IsHvx; });
988auto AlignVectors::createStoreGroups(
const AddrList &Group)
const -> MoveList {
996 auto tryAddTo = [&](
const AddrInfo &
Info, MoveGroup &Move) {
997 assert(!Move.Main.empty() &&
"Move group should have non-empty Main");
1003 "Not handling stores with return values");
1005 if (Move.IsHvx != isHvx(
Info))
1011 if (
Base->getParent() !=
Info.Inst->getParent())
1013 if (!HVC.isSafeToMoveBeforeInBB(*
Info.Inst,
Base->getIterator(), Move.Main))
1015 Move.Main.push_back(
Info.Inst);
1019 MoveList StoreGroups;
1021 for (
auto I = Group.rbegin(),
E = Group.rend();
I !=
E; ++
I) {
1022 const AddrInfo &
Info = *
I;
1023 if (!
Info.Inst->mayWriteToMemory())
1025 if (StoreGroups.empty() || !tryAddTo(
Info, StoreGroups.back()))
1026 StoreGroups.emplace_back(
Info, Group.front().Inst, isHvx(
Info),
false);
1030 erase_if(StoreGroups, [](
const MoveGroup &
G) {
return G.Main.size() <= 1; });
1034 erase_if(StoreGroups, [](
const MoveGroup &
G) {
return G.IsHvx; });
1039 if (!VADoFullStores) {
1040 erase_if(StoreGroups, [
this](
const MoveGroup &
G) {
1042 auto MaybeInfo = this->getAddrInfo(*S);
1043 assert(MaybeInfo.has_value());
1044 return HVC.HST.isHVXVectorType(
1045 EVT::getEVT(MaybeInfo->ValTy, false));
1053auto AlignVectors::moveTogether(MoveGroup &Move)
const ->
bool {
1055 assert(!Move.Main.empty() &&
"Move group should have non-empty Main");
1061 Move.Clones = cloneBefore(Where->
getIterator(), Move.Deps);
1064 for (Instruction *M : Main) {
1066 M->moveAfter(Where);
1067 for (
auto [Old, New] : Move.Clones)
1068 M->replaceUsesOfWith(Old, New);
1072 for (
int i = 0, e = Move.Deps.size(); i != e; ++i)
1073 Move.Deps[i] = Move.Clones[Move.Deps[i]];
1078 assert(Move.Deps.empty());
1081 for (Instruction *M : Main.drop_front(1)) {
1087 return Move.Main.size() + Move.Deps.size() > 1;
1090template <
typename T>
1095 for (Instruction *
I : Insts) {
1096 assert(HVC.isSafeToClone(*
I));
1098 C->setName(Twine(
"c.") +
I->getName() +
".");
1099 C->insertBefore(To);
1101 for (
auto [Old, New] : Map)
1102 C->replaceUsesOfWith(Old, New);
1103 Map.insert(std::make_pair(
I,
C));
1108auto AlignVectors::realignLoadGroup(IRBuilderBase &Builder,
1109 const ByteSpan &VSpan,
int ScLen,
1114 Type *SecTy = HVC.getByteTy(ScLen);
1115 int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;
1116 bool DoAlign = !HVC.isZero(AlignVal);
1118 BasicBlock *BaseBlock = Builder.GetInsertBlock();
1148 for (
int Index = 0;
Index != NumSectors; ++
Index)
1149 ASpan.Blocks.emplace_back(
nullptr, ScLen, Index * ScLen);
1150 for (
int Index = 0;
Index != NumSectors; ++
Index) {
1151 ASpan.Blocks[
Index].Seg.Val =
1152 reinterpret_cast<Value *
>(&ASpan.Blocks[
Index]);
1158 DenseMap<void *, Instruction *> EarliestUser;
1164 assert(
A->getParent() ==
B->getParent());
1165 return A->comesBefore(
B);
1167 auto earliestUser = [&](
const auto &
Uses) {
1169 for (
const Use &U :
Uses) {
1171 assert(
I !=
nullptr &&
"Load used in a non-instruction?");
1175 if (
I->getParent() == BaseBlock) {
1177 User = std::min(User,
I, isEarlier);
1185 for (
const ByteSpan::Block &
B : VSpan) {
1186 ByteSpan ASection = ASpan.section(
B.Pos,
B.Seg.Size);
1187 for (
const ByteSpan::Block &S : ASection) {
1188 auto &EU = EarliestUser[S.Seg.Val];
1189 EU = std::min(EU, earliestUser(
B.Seg.Val->uses()), isEarlier);
1194 dbgs() <<
"ASpan:\n" << ASpan <<
'\n';
1195 dbgs() <<
"Earliest users of ASpan:\n";
1196 for (
auto &[Val, User] : EarliestUser) {
1197 dbgs() << Val <<
"\n ->" << *
User <<
'\n';
1201 auto createLoad = [&](IRBuilderBase &Builder,
const ByteSpan &VSpan,
1202 int Index,
bool MakePred) {
1204 createAdjustedPointer(Builder, AlignAddr, SecTy, Index * ScLen);
1206 MakePred ? makeTestIfUnaligned(Builder, AlignVal, ScLen) : nullptr;
1211 int Width = (1 + DoAlign) * ScLen;
1212 return this->createLoad(Builder, SecTy, Ptr, Predicate, ScLen, True, Undef,
1213 VSpan.section(Start, Width).values());
1218 assert(
In->getParent() == To->getParent());
1219 DepList Deps = getUpwardDeps(&*In, &*To);
1222 InstMap
Map = cloneBefore(In, Deps);
1223 for (
auto [Old, New] : Map)
1224 In->replaceUsesOfWith(Old, New);
1229 for (
int Index = 0;
Index != NumSectors + 1; ++
Index) {
1237 DoAlign &&
Index > 0 ? EarliestUser[&ASpan[
Index - 1]] :
nullptr;
1239 Index < NumSectors ? EarliestUser[&ASpan[
Index]] :
nullptr;
1240 if (
auto *Where = std::min(PrevAt, ThisAt, isEarlier)) {
1243 createLoad(Builder, VSpan, Index, DoAlign && Index == NumSectors);
1251 if (!HVC.isSafeToMoveBeforeInBB(*Load, BasePos))
1252 moveBefore(
Load->getIterator(), BasePos);
1254 LLVM_DEBUG(
dbgs() <<
"Loads[" << Index <<
"]:" << *Loads[Index] <<
'\n');
1260 for (
int Index = 0;
Index != NumSectors; ++
Index) {
1261 ASpan[
Index].Seg.Val =
nullptr;
1262 if (
auto *Where = EarliestUser[&ASpan[Index]]) {
1268 assert(NextLoad !=
nullptr);
1269 Val = HVC.vralignb(Builder, Val, NextLoad, AlignVal);
1271 ASpan[
Index].Seg.Val = Val;
1276 for (
const ByteSpan::Block &
B : VSpan) {
1277 ByteSpan ASection = ASpan.section(
B.Pos,
B.Seg.Size).shift(-
B.Pos);
1284 std::vector<ByteSpan::Block *> ABlocks;
1285 for (ByteSpan::Block &S : ASection) {
1286 if (S.Seg.Val !=
nullptr)
1287 ABlocks.push_back(&S);
1290 [&](
const ByteSpan::Block *
A,
const ByteSpan::Block *
B) {
1294 for (ByteSpan::Block *S : ABlocks) {
1299 Value *Pay = HVC.vbytes(Builder, getPayload(S->Seg.Val));
1301 HVC.insertb(Builder, Accum, Pay, S->Seg.Start, S->Seg.Size, S->Pos);
1309 Type *ValTy = getPayload(
B.Seg.Val)->getType();
1312 getPassThrough(
B.Seg.Val),
"sel");
1317auto AlignVectors::realignStoreGroup(IRBuilderBase &Builder,
1318 const ByteSpan &VSpan,
int ScLen,
1323 Type *SecTy = HVC.getByteTy(ScLen);
1324 int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;
1325 bool DoAlign = !HVC.isZero(AlignVal);
1328 ByteSpan ASpanV, ASpanM;
1332 auto MakeVec = [](IRBuilderBase &Builder,
Value *Val) ->
Value * {
1336 auto *VecTy = VectorType::get(Ty, 1,
false);
1342 for (
int Index = (DoAlign ? -1 : 0);
Index != NumSectors + DoAlign; ++
Index) {
1346 VSpan.section(Index * ScLen, ScLen).shift(-Index * ScLen);
1351 for (ByteSpan::Block &S : VSection) {
1352 Value *Pay = getPayload(S.Seg.Val);
1354 Pay->
getType(), HVC.getByteTy());
1355 Value *PartM = HVC.insertb(Builder, Zero, HVC.vbytes(Builder, Mask),
1356 S.Seg.Start, S.Seg.Size, S.Pos);
1357 AccumM = Builder.
CreateOr(AccumM, PartM);
1359 Value *PartV = HVC.insertb(Builder, Undef, HVC.vbytes(Builder, Pay),
1360 S.Seg.Start, S.Seg.Size, S.Pos);
1365 ASpanV.Blocks.emplace_back(AccumV, ScLen, Index * ScLen);
1366 ASpanM.Blocks.emplace_back(AccumM, ScLen, Index * ScLen);
1370 dbgs() <<
"ASpanV before vlalign:\n" << ASpanV <<
'\n';
1371 dbgs() <<
"ASpanM before vlalign:\n" << ASpanM <<
'\n';
1376 for (
int Index = 1;
Index != NumSectors + 2; ++
Index) {
1377 Value *PrevV = ASpanV[
Index - 1].Seg.Val, *ThisV = ASpanV[
Index].Seg.Val;
1378 Value *PrevM = ASpanM[
Index - 1].Seg.Val, *ThisM = ASpanM[
Index].Seg.Val;
1380 ASpanV[
Index - 1].Seg.Val = HVC.vlalignb(Builder, PrevV, ThisV, AlignVal);
1381 ASpanM[
Index - 1].Seg.Val = HVC.vlalignb(Builder, PrevM, ThisM, AlignVal);
1386 dbgs() <<
"ASpanV after vlalign:\n" << ASpanV <<
'\n';
1387 dbgs() <<
"ASpanM after vlalign:\n" << ASpanM <<
'\n';
1390 auto createStore = [&](IRBuilderBase &Builder,
const ByteSpan &ASpanV,
1391 const ByteSpan &ASpanM,
int Index,
bool MakePred) {
1394 if (HVC.isUndef(Val) || HVC.isZero(Mask))
1397 createAdjustedPointer(Builder, AlignAddr, SecTy, Index * ScLen);
1399 MakePred ? makeTestIfUnaligned(Builder, AlignVal, ScLen) : nullptr;
1404 int Width = (1 + DoAlign) * ScLen;
1405 this->createStore(Builder, Val, Ptr, Predicate, ScLen,
1406 HVC.vlsb(Builder, Mask),
1407 VSpan.section(Start, Width).values());
1410 for (
int Index = 0;
Index != NumSectors + DoAlign; ++
Index) {
1411 createStore(Builder, ASpanV, ASpanM, Index, DoAlign && Index == NumSectors);
1415auto AlignVectors::realignGroup(
const MoveGroup &Move)
const ->
bool {
1424 auto getMaxOf = [](
auto Range,
auto GetValue) {
1426 return GetValue(
A) < GetValue(
B);
1430 const AddrList &BaseInfos = AddrGroups.at(Move.Base);
1445 std::set<Instruction *> TestSet(Move.Main.begin(), Move.Main.end());
1448 BaseInfos, std::back_inserter(MoveInfos),
1449 [&TestSet](
const AddrInfo &AI) {
return TestSet.count(AI.Inst); });
1452 const AddrInfo &WithMaxAlign =
1453 getMaxOf(MoveInfos, [](
const AddrInfo &AI) {
return AI.HaveAlign; });
1454 Align MaxGiven = WithMaxAlign.HaveAlign;
1457 const AddrInfo &WithMinOffset =
1458 getMaxOf(MoveInfos, [](
const AddrInfo &AI) {
return -AI.Offset; });
1460 const AddrInfo &WithMaxNeeded =
1461 getMaxOf(MoveInfos, [](
const AddrInfo &AI) {
return AI.NeedAlign; });
1462 Align MinNeeded = WithMaxNeeded.NeedAlign;
1475 InstSimplifyFolder(HVC.DL));
1476 Value *AlignAddr =
nullptr;
1477 Value *AlignVal =
nullptr;
1479 if (MinNeeded <= MaxGiven) {
1480 int Start = WithMinOffset.Offset;
1481 int OffAtMax = WithMaxAlign.Offset;
1488 int Adjust = -
alignTo(OffAtMax - Start, MinNeeded.value());
1489 AlignAddr = createAdjustedPointer(Builder, WithMaxAlign.Addr,
1490 WithMaxAlign.ValTy, Adjust, Move.Clones);
1491 int Diff =
Start - (OffAtMax + Adjust);
1492 AlignVal = HVC.getConstInt(Diff);
1494 assert(
static_cast<decltype(MinNeeded.value())
>(Diff) < MinNeeded.value());
1504 createAlignedPointer(Builder, WithMinOffset.Addr, WithMinOffset.ValTy,
1505 MinNeeded.value(), Move.Clones);
1507 Builder.
CreatePtrToInt(WithMinOffset.Addr, HVC.getIntTy(),
"pti");
1509 for (
auto [Old, New] : Move.Clones)
1510 I->replaceUsesOfWith(Old, New);
1515 for (
const AddrInfo &AI : MoveInfos) {
1516 VSpan.Blocks.emplace_back(AI.Inst, HVC.getSizeOf(AI.ValTy),
1517 AI.Offset - WithMinOffset.Offset);
1524 : std::max<int>(MinNeeded.value(), 4);
1525 assert(!Move.IsHvx || ScLen == 64 || ScLen == 128);
1526 assert(Move.IsHvx || ScLen == 4 || ScLen == 8);
1529 dbgs() <<
"ScLen: " << ScLen <<
"\n";
1530 dbgs() <<
"AlignVal:" << *AlignVal <<
"\n";
1531 dbgs() <<
"AlignAddr:" << *AlignAddr <<
"\n";
1532 dbgs() <<
"VSpan:\n" << VSpan <<
'\n';
1536 realignLoadGroup(Builder, VSpan, ScLen, AlignVal, AlignAddr);
1538 realignStoreGroup(Builder, VSpan, ScLen, AlignVal, AlignAddr);
1540 for (
auto *Inst : Move.Main)
1541 Inst->eraseFromParent();
1546auto AlignVectors::makeTestIfUnaligned(IRBuilderBase &Builder,
Value *AlignVal,
1547 int Alignment)
const ->
Value * {
1548 auto *AlignTy = AlignVal->getType();
1550 AlignVal, ConstantInt::get(AlignTy, Alignment - 1),
"and");
1551 Value *
Zero = ConstantInt::get(AlignTy, 0);
1555auto AlignVectors::isSectorTy(
Type *Ty)
const ->
bool {
1556 if (!HVC.isByteVecTy(Ty))
1558 int Size = HVC.getSizeOf(Ty);
1564auto AlignVectors::run() ->
bool {
1567 if (!createAddressGroups())
1571 dbgs() <<
"Address groups(" << AddrGroups.size() <<
"):\n";
1572 for (
auto &[In, AL] : AddrGroups) {
1573 for (
const AddrInfo &AI : AL)
1574 dbgs() <<
"---\n" << AI <<
'\n';
1579 MoveList LoadGroups, StoreGroups;
1581 for (
auto &
G : AddrGroups) {
1587 dbgs() <<
"\nLoad groups(" << LoadGroups.size() <<
"):\n";
1588 for (
const MoveGroup &
G : LoadGroups)
1589 dbgs() <<
G <<
"\n";
1590 dbgs() <<
"Store groups(" << StoreGroups.size() <<
"):\n";
1591 for (
const MoveGroup &
G : StoreGroups)
1592 dbgs() <<
G <<
"\n";
1596 unsigned CountLimit = VAGroupCountLimit;
1597 if (CountLimit == 0)
1600 if (LoadGroups.size() > CountLimit) {
1601 LoadGroups.resize(CountLimit);
1602 StoreGroups.clear();
1604 unsigned StoreLimit = CountLimit - LoadGroups.size();
1605 if (StoreGroups.size() > StoreLimit)
1606 StoreGroups.resize(StoreLimit);
1609 for (
auto &M : LoadGroups)
1611 for (
auto &M : StoreGroups)
1616 for (
auto &M : LoadGroups)
1618 for (
auto &M : StoreGroups)
1628auto HvxIdioms::getNumSignificantBits(
Value *V, Instruction *In)
const
1629 -> std::pair<unsigned, Signedness> {
1630 unsigned Bits = HVC.getNumSignificantBits(V, In);
1636 KnownBits Known = HVC.getKnownBits(V, In);
1637 Signedness Sign =
Signed;
1638 unsigned NumToTest = 0;
1642 NumToTest =
Bits - 1;
1655 return {
Bits, Sign};
1658auto HvxIdioms::canonSgn(SValue
X, SValue
Y)
const
1659 -> std::pair<SValue, SValue> {
1672auto HvxIdioms::matchFxpMul(Instruction &In)
const -> std::optional<FxpOp> {
1673 using namespace PatternMatch;
1674 auto *Ty =
In.getType();
1677 return std::nullopt;
1686 auto m_Shr = [](
auto &&
V,
auto &&S) {
1698 if (
Op.Frac > Width)
1699 return std::nullopt;
1706 return std::nullopt;
1714 Op.Opcode = Instruction::Mul;
1716 Op.X.Sgn = getNumSignificantBits(
Op.X.Val, &In).second;
1717 Op.Y.Sgn = getNumSignificantBits(
Op.Y.Val, &In).second;
1722 return std::nullopt;
1725auto HvxIdioms::processFxpMul(Instruction &In,
const FxpOp &
Op)
const
1727 assert(
Op.X.Val->getType() ==
Op.Y.Val->getType());
1730 if (VecTy ==
nullptr)
1733 unsigned ElemWidth = ElemTy->getBitWidth();
1736 if ((HVC.length(VecTy) * ElemWidth) % (8 * HVC.HST.
getVectorLength()) != 0)
1746 if (ElemWidth <= 32 &&
Op.Frac == 0)
1749 auto [BitsX, SignX] = getNumSignificantBits(
Op.X.Val, &In);
1750 auto [BitsY, SignY] = getNumSignificantBits(
Op.Y.Val, &In);
1756 InstSimplifyFolder(HVC.DL));
1758 auto roundUpWidth = [](
unsigned Width) ->
unsigned {
1764 if (Width > 32 && Width % 32 != 0) {
1771 BitsX = roundUpWidth(BitsX);
1772 BitsY = roundUpWidth(BitsY);
1777 unsigned Width = std::max(BitsX, BitsY);
1779 auto *ResizeTy = VectorType::get(HVC.getIntTy(Width), VecTy);
1780 if (Width < ElemWidth) {
1783 }
else if (Width > ElemWidth) {
1790 assert(
X->getType() ==
Y->getType() &&
X->getType() == ResizeTy);
1792 unsigned VecLen = HVC.length(ResizeTy);
1793 unsigned ChopLen = (8 * HVC.HST.
getVectorLength()) / std::min(Width, 32u);
1797 ChopOp.ResTy = VectorType::get(
Op.ResTy->getElementType(), ChopLen,
false);
1799 for (
unsigned V = 0;
V != VecLen / ChopLen; ++
V) {
1800 ChopOp.X.Val = HVC.subvector(Builder,
X, V * ChopLen, ChopLen);
1801 ChopOp.Y.Val = HVC.subvector(Builder,
Y, V * ChopLen, ChopLen);
1802 Results.push_back(processFxpMulChopped(Builder, In, ChopOp));
1817inline bool HvxIdioms::matchScatter(Instruction &In)
const {
1821 return (
II->getIntrinsicID() == Intrinsic::masked_scatter);
1824inline bool HvxIdioms::matchGather(Instruction &In)
const {
1828 return (
II->getIntrinsicID() == Intrinsic::masked_gather);
1836 case Instruction::Add:
1837 case Instruction::Sub:
1838 case Instruction::Mul:
1839 case Instruction::And:
1840 case Instruction::Or:
1841 case Instruction::Xor:
1842 case Instruction::AShr:
1843 case Instruction::LShr:
1844 case Instruction::Shl:
1845 case Instruction::UDiv:
1853 assert(Ptr &&
"Unable to extract pointer");
1859 if (
II->getIntrinsicID() == Intrinsic::masked_store)
1860 return II->getOperand(1);
1866 HvxIdioms::DstQualifier &Qual) {
1872 Qual = HvxIdioms::LdSt;
1874 if (
II->getIntrinsicID() == Intrinsic::masked_gather) {
1876 Qual = HvxIdioms::LLVM_Gather;
1877 }
else if (
II->getIntrinsicID() == Intrinsic::masked_scatter) {
1879 Qual = HvxIdioms::LLVM_Scatter;
1880 }
else if (
II->getIntrinsicID() == Intrinsic::masked_store) {
1882 Qual = HvxIdioms::LdSt;
1883 }
else if (
II->getIntrinsicID() ==
1884 Intrinsic::hexagon_V6_vgather_vscattermh) {
1886 Qual = HvxIdioms::HEX_Gather_Scatter;
1887 }
else if (
II->getIntrinsicID() == Intrinsic::hexagon_V6_vscattermh_128B) {
1889 Qual = HvxIdioms::HEX_Scatter;
1890 }
else if (
II->getIntrinsicID() == Intrinsic::hexagon_V6_vgathermh_128B) {
1892 Qual = HvxIdioms::HEX_Gather;
1900 Qual = HvxIdioms::Call;
1905 Qual = HvxIdioms::Arithmetic;
1925 for (
auto &U : In->uses()) {
1929 Users.push_back(Destination);
1941 assert(In &&
"Bad instruction");
1945 "Not a gather Intrinsic");
1974 if (
II &&
II->getIntrinsicID() == Intrinsic::masked_gather)
1984 auto *Src = IE->getOperand(1);
1999 LLVM_DEBUG(
dbgs() <<
" Unable to locate Address from intrinsic\n");
2011 if (
II->getIntrinsicID() == Intrinsic::masked_load)
2012 return II->getType();
2013 if (
II->getIntrinsicID() == Intrinsic::masked_store)
2014 return II->getOperand(0)->getType();
2016 return In->getType();
2025 if (
II->getIntrinsicID() == Intrinsic::masked_load)
2027 if (
II->getIntrinsicID() == Intrinsic::masked_gather)
2039 return cstDataVector;
2041 return GEPIndex->getOperand(0);
2057 LLVM_DEBUG(
dbgs() <<
" Unable to locate Index from intrinsic\n");
2067 assert(
I &&
"Unable to reinterprete cast");
2068 Type *NT = HVC.getHvxTy(HVC.getIntTy(32),
false);
2069 std::vector<unsigned> shuffleMask;
2070 for (
unsigned i = 0; i < 64; ++i)
2071 shuffleMask.push_back(i);
2073 Value *CastShuffle =
2074 Builder.CreateShuffleVector(
I,
I, Mask,
"identity_shuffle");
2075 return Builder.CreateBitCast(CastShuffle, NT,
"cst64_i16_to_32_i32");
2082 assert(
I &&
"Unable to reinterprete cast");
2083 Type *NT = HVC.getHvxTy(HVC.getIntTy(32),
false);
2084 std::vector<unsigned> shuffleMask;
2085 for (
unsigned i = 0; i < 128; ++i)
2086 shuffleMask.push_back(i);
2088 Value *CastShuffle =
2089 Builder.CreateShuffleVector(
I,
I, Mask,
"identity_shuffle");
2090 return Builder.CreateBitCast(CastShuffle, NT,
"cst128_i8_to_32_i32");
2096 unsigned int pattern) {
2097 std::vector<unsigned int> byteMask;
2098 for (
unsigned i = 0; i < 32; ++i)
2099 byteMask.push_back(pattern);
2101 return Builder.CreateIntrinsic(
2103 {llvm::ConstantDataVector::get(Ctx, byteMask), HVC.getConstInt(~0)},
2107Value *HvxIdioms::processVScatter(Instruction &In)
const {
2109 assert(InpTy &&
"Cannot handle no vector type for llvm.scatter/gather");
2110 unsigned InpSize = HVC.getSizeOf(InpTy);
2111 auto *
F =
In.getFunction();
2112 LLVMContext &Ctx =
F->getContext();
2114 assert(ElemTy &&
"llvm.scatter needs integer type argument");
2117 unsigned Elements = HVC.length(InpTy);
2118 dbgs() <<
"\n[Process scatter](" <<
In <<
")\n" << *
In.getParent() <<
"\n";
2119 dbgs() <<
" Input type(" << *InpTy <<
") elements(" <<
Elements
2120 <<
") VecLen(" << InpSize <<
") type(" << *ElemTy <<
") ElemWidth("
2121 << ElemWidth <<
")\n";
2125 InstSimplifyFolder(HVC.DL));
2127 auto *ValueToScatter =
In.getOperand(0);
2128 LLVM_DEBUG(
dbgs() <<
" ValueToScatter : " << *ValueToScatter <<
"\n");
2132 <<
") for vscatter\n");
2157 Value *CastIndex =
nullptr;
2158 if (cstDataVector) {
2160 AllocaInst *IndexesAlloca =
2161 Builder.
CreateAlloca(HVC.getHvxTy(HVC.getIntTy(32),
false));
2162 [[maybe_unused]]
auto *StoreIndexes =
2163 Builder.
CreateStore(cstDataVector, IndexesAlloca);
2164 LLVM_DEBUG(
dbgs() <<
" StoreIndexes : " << *StoreIndexes <<
"\n");
2166 IndexesAlloca,
"reload_index");
2171 CastIndex = Indexes;
2175 if (ElemWidth == 1) {
2178 Type *
NT = HVC.getHvxTy(HVC.getIntTy(32),
false);
2184 HVC.getHvxTy(HVC.getIntTy(32),
true), V6_vunpack, CastIndexes,
nullptr);
2185 LLVM_DEBUG(
dbgs() <<
" UnpackedIndexes : " << *UnpackedIndexes <<
")\n");
2189 [[maybe_unused]]
Value *IndexHi =
2190 HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedIndexes);
2191 [[maybe_unused]]
Value *IndexLo =
2192 HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedIndexes);
2200 HVC.getHvxTy(HVC.getIntTy(32),
true), V6_vunpack, CastSrc,
nullptr);
2201 LLVM_DEBUG(
dbgs() <<
" UnpackedValToScat: " << *UnpackedValueToScatter
2204 [[maybe_unused]]
Value *UVSHi =
2205 HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedValueToScatter);
2206 [[maybe_unused]]
Value *UVSLo =
2207 HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedValueToScatter);
2212 auto *QByteMask =
get_i32_Mask(HVC, Builder, Ctx, 0x00ff00ff);
2215 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermhq_128B,
2221 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermhq_128B,
2225 }
else if (ElemWidth == 2) {
2230 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermh_128B,
2234 }
else if (ElemWidth == 4) {
2236 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermw_128B,
2246Value *HvxIdioms::processVGather(Instruction &In)
const {
2247 [[maybe_unused]]
auto *InpTy =
2249 assert(InpTy &&
"Cannot handle no vector type for llvm.gather");
2250 [[maybe_unused]]
auto *ElemTy =
2252 assert(ElemTy &&
"llvm.gather needs vector of ptr argument");
2253 auto *
F =
In.getFunction();
2254 LLVMContext &Ctx =
F->getContext();
2256 << *
In.getParent() <<
"\n");
2258 << HVC.length(InpTy) <<
") VecLen(" << HVC.getSizeOf(InpTy)
2259 <<
") type(" << *ElemTy <<
") Access alignment("
2260 << *
In.getOperand(1) <<
") AddressSpace("
2261 << ElemTy->getAddressSpace() <<
")\n");
2265 "llvm.gather needs vector for mask");
2267 InstSimplifyFolder(HVC.DL));
2272 HvxIdioms::DstQualifier Qual = HvxIdioms::Undefined;
2278 LLVM_DEBUG(
dbgs() <<
" Destination : " << *Dst <<
" Qual(" << Qual
2284 LLVM_DEBUG(
dbgs() <<
"Could not locate vgather destination ptr\n");
2290 assert(DstType &&
"Cannot handle non vector dst type for llvm.gather");
2305 Type *
NT = HVC.getHvxTy(HVC.getIntTy(32),
false);
2306 if (Qual == HvxIdioms::LdSt || Qual == HvxIdioms::Arithmetic) {
2310 unsigned OutputSize = HVC.getSizeOf(DstType);
2314 <<
" Address space ("
2316 <<
" Result type : " << *DstType
2317 <<
"\n Size in bytes : " << OutputSize
2318 <<
" element type(" << *DstElemTy
2319 <<
")\n ElemWidth : " << ElemWidth <<
" bytes\n");
2322 assert(IndexType &&
"Cannot handle non vector index type for llvm.gather");
2323 unsigned IndexWidth = HVC.DL.
getTypeAllocSize(IndexType->getElementType());
2328 IndexLoad, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2337 if (ElemWidth == 1) {
2342 Value *CastIndexes =
2345 auto *UnpackedIndexes =
2347 V6_vunpack, CastIndexes,
nullptr);
2353 [[maybe_unused]]
Value *IndexHi =
2354 HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedIndexes);
2355 [[maybe_unused]]
Value *IndexLo =
2356 HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedIndexes);
2360 auto *QByteMask =
get_i32_Mask(HVC, Builder, Ctx, 0x00ff00ff);
2364 auto V6_vgather = HVC.HST.
getIntrinsicId(Hexagon::V6_vgathermhq);
2366 Type::getVoidTy(Ctx), V6_vgather,
2367 {Ptr, QByteMask, CastedPtr,
2373 HVC.getHvxTy(HVC.getIntTy(32),
false), Ptr,
"temp_result_hi");
2374 LLVM_DEBUG(
dbgs() <<
" LoadedResultHi : " << *LoadedResultHi <<
"\n");
2379 Type::getVoidTy(Ctx), V6_vgather,
2380 {Ptr, QByteMask, CastedPtr,
2385 HVC.getHvxTy(HVC.getIntTy(32),
false), Ptr,
"temp_result_lo");
2386 LLVM_DEBUG(
dbgs() <<
" LoadedResultLo : " << *LoadedResultLo <<
"\n");
2393 NT, V6_vpackeb, {LoadedResultHi, LoadedResultLo},
nullptr);
2395 [[maybe_unused]]
auto *StoreRes = Builder.
CreateStore(Res, Ptr);
2397 }
else if (ElemWidth == 2) {
2399 if (IndexWidth == 2) {
2407 Value *AdjustedIndex = HVC.createHvxIntrinsic(
2408 Builder, V6_vaslh, NT, {CastIndex, HVC.getConstInt(1)});
2410 <<
" Shifted half index: " << *AdjustedIndex <<
")\n");
2416 Type::getVoidTy(Ctx), V6_vgather,
2420 for (
auto &U : Dst->uses()) {
2422 dbgs() <<
" dst used by: " << *UI <<
"\n";
2424 for (
auto &U :
In.uses()) {
2426 dbgs() <<
" In used by : " << *UI <<
"\n";
2431 HVC.getHvxTy(HVC.getIntTy(16),
false), Ptr,
"temp_result");
2432 LLVM_DEBUG(
dbgs() <<
" LoadedResult : " << *LoadedResult <<
"\n");
2433 In.replaceAllUsesWith(LoadedResult);
2435 dbgs() <<
" Unhandled index type for vgather\n";
2438 }
else if (ElemWidth == 4) {
2439 if (IndexWidth == 4) {
2442 Value *AdjustedIndex = HVC.createHvxIntrinsic(
2443 Builder, V6_vaslh, NT, {Indexes, HVC.getConstInt(2)});
2445 <<
" Shifted word index: " << *AdjustedIndex <<
")\n");
2447 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermw_128B,
2470 Dst->eraseFromParent();
2471 }
else if (Qual == HvxIdioms::LLVM_Scatter) {
2474 assert(DstInpTy &&
"Cannot handle no vector type for llvm.scatter");
2475 [[maybe_unused]]
unsigned DstInpSize = HVC.getSizeOf(DstInpTy);
2476 [[maybe_unused]]
unsigned DstElements = HVC.length(DstInpTy);
2477 [[maybe_unused]]
auto *DstElemTy =
2479 assert(DstElemTy &&
"llvm.scatter needs vector of ptr argument");
2480 LLVM_DEBUG(
dbgs() <<
" Gather feeds into scatter\n Values to scatter : "
2481 << *Dst->getOperand(0) <<
"\n");
2483 << DstElements <<
") VecLen(" << DstInpSize <<
") type("
2484 << *DstElemTy <<
") Access alignment("
2485 << *Dst->getOperand(2) <<
")\n");
2498 Src, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2518 Value *AdjustedIndex = HVC.createHvxIntrinsic(
2519 Builder, V6_vaslh, NT, {CastIndex, HVC.getConstInt(1)});
2520 LLVM_DEBUG(
dbgs() <<
" Shifted half index: " << *AdjustedIndex <<
")\n");
2523 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
2527 }
else if (Qual == HvxIdioms::HEX_Gather_Scatter) {
2535 if (cstDataVector) {
2540 [[maybe_unused]]
auto *StoreIndexes =
2541 Builder.
CreateStore(cstDataVector, IndexesAlloca);
2542 LLVM_DEBUG(
dbgs() <<
" StoreIndexes : " << *StoreIndexes <<
"\n");
2546 LLVM_DEBUG(
dbgs() <<
" ResultAlloca : " << *ResultAlloca <<
"\n");
2549 IndexLoad, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2553 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
2554 {ResultAlloca, CastedSrc,
2558 HVC.getHvxTy(HVC.getIntTy(16),
false), ResultAlloca,
"temp_result");
2559 LLVM_DEBUG(
dbgs() <<
" LoadedResult : " << *LoadedResult <<
"\n");
2561 In.replaceAllUsesWith(LoadedResult);
2571 Src, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2584 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgather_vscattermh,
2590 }
else if (Qual == HvxIdioms::HEX_Scatter) {
2597 IndexLoad, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2604 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
2609 HVC.getHvxTy(HVC.getIntTy(16),
false), ResultAlloca,
"temp_result");
2610 LLVM_DEBUG(
dbgs() <<
" LoadedResult : " << *LoadedResult <<
"\n");
2611 In.replaceAllUsesWith(LoadedResult);
2612 }
else if (Qual == HvxIdioms::HEX_Gather) {
2617 if (cstDataVector) {
2621 [[maybe_unused]]
auto *StoreIndexes =
2622 Builder.
CreateStore(cstDataVector, IndexesAlloca);
2623 LLVM_DEBUG(
dbgs() <<
" StoreIndexes : " << *StoreIndexes <<
"\n");
2628 <<
"\n AddressSpace: "
2632 IndexLoad, Type::getInt32Ty(Ctx),
"cst_ptr_to_i32");
2636 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
2637 {ResultAlloca, CastedSrc,
2641 HVC.getHvxTy(HVC.getIntTy(16),
false), ResultAlloca,
"temp_result");
2642 LLVM_DEBUG(
dbgs() <<
" LoadedResult : " << *LoadedResult <<
"\n");
2644 In.replaceAllUsesWith(LoadedResult);
2647 }
else if (Qual == HvxIdioms::LLVM_Gather) {
2649 errs() <<
" Underimplemented vgather to vgather sequence\n";
2657auto HvxIdioms::processFxpMulChopped(IRBuilderBase &Builder, Instruction &In,
2658 const FxpOp &
Op)
const ->
Value * {
2659 assert(
Op.X.Val->getType() ==
Op.Y.Val->getType());
2661 unsigned Width = InpTy->getScalarSizeInBits();
2664 if (!
Op.RoundAt || *
Op.RoundAt ==
Op.Frac - 1) {
2667 Value *QMul =
nullptr;
2669 QMul = createMulQ15(Builder,
Op.X,
Op.Y, Rounding);
2670 }
else if (Width == 32) {
2671 QMul = createMulQ31(Builder,
Op.X,
Op.Y, Rounding);
2673 if (QMul !=
nullptr)
2679 assert(Width < 32 || Width % 32 == 0);
2689 assert(
Op.Frac != 0 &&
"Unshifted mul should have been skipped");
2690 if (
Op.Frac == 16) {
2692 if (
Value *MulH = createMulH16(Builder,
Op.X,
Op.Y))
2696 Value *Prod32 = createMul16(Builder,
Op.X,
Op.Y);
2698 Value *RoundVal = ConstantInt::get(Prod32->
getType(), 1 << *
Op.RoundAt);
2699 Prod32 = Builder.
CreateAdd(Prod32, RoundVal,
"add");
2704 ? Builder.
CreateAShr(Prod32, ShiftAmt,
"asr")
2705 : Builder.
CreateLShr(Prod32, ShiftAmt,
"lsr");
2706 return Builder.
CreateTrunc(Shifted, InpTy,
"trn");
2713 auto WordX = HVC.splitVectorElements(Builder,
Op.X.Val, 32);
2714 auto WordY = HVC.splitVectorElements(Builder,
Op.Y.Val, 32);
2715 auto WordP = createMulLong(Builder, WordX,
Op.X.Sgn, WordY,
Op.Y.Sgn);
2720 if (
Op.RoundAt.has_value()) {
2723 RoundV[*
Op.RoundAt / 32] =
2724 ConstantInt::get(HvxWordTy, 1 << (*
Op.RoundAt % 32));
2725 WordP = createAddLong(Builder, WordP, RoundV);
2731 unsigned SkipWords =
Op.Frac / 32;
2732 Constant *ShiftAmt = ConstantInt::get(HvxWordTy,
Op.Frac % 32);
2734 for (
int Dst = 0, End = WordP.size() - SkipWords; Dst != End; ++Dst) {
2735 int Src = Dst + SkipWords;
2737 if (Src + 1 < End) {
2748 WordP.resize(WordP.size() - SkipWords);
2750 return HVC.joinVectorElements(Builder, WordP,
Op.ResTy);
2753auto HvxIdioms::createMulQ15(IRBuilderBase &Builder, SValue
X, SValue
Y,
2754 bool Rounding)
const ->
Value * {
2755 assert(
X.Val->getType() ==
Y.Val->getType());
2756 assert(
X.Val->getType()->getScalarType() == HVC.getIntTy(16));
2763 auto V6_vmpyhvsrs = HVC.HST.
getIntrinsicId(Hexagon::V6_vmpyhvsrs);
2764 return HVC.createHvxIntrinsic(Builder, V6_vmpyhvsrs,
X.Val->getType(),
2768auto HvxIdioms::createMulQ31(IRBuilderBase &Builder, SValue
X, SValue
Y,
2769 bool Rounding)
const ->
Value * {
2770 Type *InpTy =
X.Val->getType();
2771 assert(InpTy ==
Y.Val->getType());
2783 HVC.createHvxIntrinsic(Builder, V6_vmpyewuh, InpTy, {
X.Val,
Y.Val});
2784 return HVC.createHvxIntrinsic(Builder, V6_vmpyo_acc, InpTy,
2785 {V1,
X.Val,
Y.Val});
2788auto HvxIdioms::createAddCarry(IRBuilderBase &Builder,
Value *
X,
Value *
Y,
2789 Value *CarryIn)
const
2790 -> std::pair<Value *, Value *> {
2791 assert(
X->getType() ==
Y->getType());
2800 if (CarryIn ==
nullptr)
2802 Args.push_back(CarryIn);
2804 Value *Ret = HVC.createHvxIntrinsic(Builder, AddCarry,
2808 return {
Result, CarryOut};
2815 if (CarryIn !=
nullptr) {
2816 unsigned Width = VecTy->getScalarSizeInBits();
2819 for (
unsigned i = 0, e = 32 / Width; i !=
e; ++i)
2820 Mask = (Mask << Width) | 1;
2824 HVC.createHvxIntrinsic(Builder, V6_vandqrt,
nullptr,
2825 {CarryIn, HVC.getConstInt(Mask)});
2826 Result1 = Builder.
CreateAdd(
X, ValueIn,
"add");
2832 return {Result2, Builder.
CreateOr(CarryOut1, CarryOut2,
"orb")};
2835auto HvxIdioms::createMul16(IRBuilderBase &Builder, SValue
X, SValue
Y)
const
2838 std::tie(
X,
Y) = canonSgn(
X,
Y);
2851 HVC.createHvxIntrinsic(Builder, V6_vmpyh, HvxP32Ty, {
Y.Val,
X.Val});
2853 return HVC.vshuff(Builder, HVC.sublo(Builder,
P), HVC.subhi(Builder,
P));
2856auto HvxIdioms::createMulH16(IRBuilderBase &Builder, SValue
X, SValue
Y)
const
2858 Type *HvxI16Ty = HVC.getHvxTy(HVC.getIntTy(16),
false);
2863 return HVC.createHvxIntrinsic(Builder, V6_vmpyuhvs, HvxI16Ty,
2868 Type *HvxP16Ty = HVC.getHvxTy(HVC.getIntTy(16),
true);
2871 unsigned Len = HVC.length(HvxP16Ty) / 2;
2873 SmallVector<int, 128> PickOdd(Len);
2874 for (
int i = 0; i !=
static_cast<int>(
Len); ++i)
2875 PickOdd[i] = 2 * i + 1;
2878 HVC.sublo(Builder, Pair16), HVC.subhi(Builder, Pair16), PickOdd,
"shf");
2881auto HvxIdioms::createMul32(IRBuilderBase &Builder, SValue
X, SValue
Y)
const
2882 -> std::pair<Value *, Value *> {
2883 assert(
X.Val->getType() ==
Y.Val->getType());
2884 assert(
X.Val->getType() == HvxI32Ty);
2887 std::tie(
X,
Y) = canonSgn(
X,
Y);
2890 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyss_parts;
2892 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyus_parts;
2894 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyuu_parts;
2897 Value *Parts = HVC.createHvxIntrinsic(Builder, V6_vmpy_parts,
nullptr,
2898 {
X.Val,
Y.Val}, {HvxI32Ty});
2907 assert(WordX.size() == WordY.size());
2908 unsigned Idx = 0,
Length = WordX.size();
2912 if (HVC.isZero(WordX[Idx]))
2913 Sum[Idx] = WordY[Idx];
2914 else if (HVC.isZero(WordY[Idx]))
2915 Sum[Idx] = WordX[Idx];
2921 Value *Carry =
nullptr;
2922 for (; Idx !=
Length; ++Idx) {
2923 std::tie(Sum[Idx], Carry) =
2924 createAddCarry(Builder, WordX[Idx], WordY[Idx], Carry);
2938 for (
int i = 0, e = WordX.size(); i != e; ++i) {
2939 for (
int j = 0, f = WordY.size(); j != f; ++j) {
2941 Signedness SX = (i + 1 ==
e) ? SgnX :
Unsigned;
2943 auto [
Lo,
Hi] = createMul32(Builder, {WordX[i], SX}, {WordY[
j],
SY});
2944 Products[i +
j + 0].push_back(
Lo);
2945 Products[i +
j + 1].push_back(
Hi);
2959 for (
int i = 0, e = Products.size(); i != e; ++i) {
2960 while (Products[i].
size() > 1) {
2961 Value *Carry =
nullptr;
2962 for (
int j = i;
j !=
e; ++
j) {
2963 auto &ProdJ = Products[
j];
2964 auto [Sum, CarryOut] = createAddCarry(Builder, pop_back_or_zero(ProdJ),
2965 pop_back_or_zero(ProdJ), Carry);
2966 ProdJ.insert(ProdJ.begin(), Sum);
2973 for (
auto &
P : Products) {
2974 assert(
P.size() == 1 &&
"Should have been added together");
2981auto HvxIdioms::run() ->
bool {
2984 for (BasicBlock &
B : HVC.F) {
2985 for (
auto It =
B.rbegin(); It !=
B.rend(); ++It) {
2986 if (
auto Fxm = matchFxpMul(*It)) {
2987 Value *
New = processFxpMul(*It, *Fxm);
2993 It->replaceAllUsesWith(New);
2995 It = StartOver ?
B.rbegin()
2998 }
else if (matchGather(*It)) {
3004 It->eraseFromParent();
3008 }
else if (matchScatter(*It)) {
3014 It->eraseFromParent();
3027auto HexagonVectorCombine::run() ->
bool {
3029 dbgs() <<
"Module before HexagonVectorCombine\n" << *
F.getParent();
3032 if (HST.useHVXOps()) {
3034 Changed |= AlignVectors(*this).run();
3036 Changed |= HvxIdioms(*this).run();
3040 dbgs() <<
"Module " << (
Changed ?
"(modified)" :
"(unchanged)")
3041 <<
" after HexagonVectorCombine\n"
3047auto HexagonVectorCombine::getIntTy(
unsigned Width)
const -> IntegerType * {
3051auto HexagonVectorCombine::getByteTy(
int ElemCount)
const ->
Type * {
3053 IntegerType *ByteTy = Type::getInt8Ty(
F.getContext());
3056 return VectorType::get(ByteTy, ElemCount,
false);
3059auto HexagonVectorCombine::getBoolTy(
int ElemCount)
const ->
Type * {
3061 IntegerType *BoolTy = Type::getInt1Ty(
F.getContext());
3064 return VectorType::get(BoolTy, ElemCount,
false);
3067auto HexagonVectorCombine::getConstInt(
int Val,
unsigned Width)
const
3072auto HexagonVectorCombine::isZero(
const Value *Val)
const ->
bool {
3074 return C->isZeroValue();
3078auto HexagonVectorCombine::getIntValue(
const Value *Val)
const
3079 -> std::optional<APInt> {
3081 return CI->getValue();
3082 return std::nullopt;
3085auto HexagonVectorCombine::isUndef(
const Value *Val)
const ->
bool {
3089auto HexagonVectorCombine::isTrue(
const Value *Val)
const ->
bool {
3093auto HexagonVectorCombine::isFalse(
const Value *Val)
const ->
bool {
3097auto HexagonVectorCombine::getHvxTy(
Type *ElemTy,
bool Pair)
const
3103 "Invalid HVX element type");
3104 unsigned HwLen = HST.getVectorLength();
3106 return VectorType::get(ElemTy, Pair ? 2 * NumElems : NumElems,
3110auto HexagonVectorCombine::getSizeOf(
const Value *Val, SizeKind Kind)
const
3112 return getSizeOf(Val->
getType(), Kind);
3115auto HexagonVectorCombine::getSizeOf(
const Type *Ty, SizeKind Kind)
const
3117 auto *NcTy =
const_cast<Type *
>(Ty);
3120 return DL.getTypeStoreSize(NcTy).getFixedValue();
3122 return DL.getTypeAllocSize(NcTy).getFixedValue();
3127auto HexagonVectorCombine::getTypeAlignment(
Type *Ty)
const ->
int {
3130 if (HST.isTypeForHVX(Ty))
3131 return HST.getVectorLength();
3132 return DL.getABITypeAlign(Ty).value();
3135auto HexagonVectorCombine::length(
Value *Val)
const ->
size_t {
3136 return length(Val->
getType());
3139auto HexagonVectorCombine::length(
Type *Ty)
const ->
size_t {
3141 assert(VecTy &&
"Must be a vector type");
3142 return VecTy->getElementCount().getFixedValue();
3145auto HexagonVectorCombine::simplify(
Value *V)
const ->
Value * {
3147 SimplifyQuery Q(
DL, &TLI, &DT, &AC, In);
3154auto HexagonVectorCombine::insertb(IRBuilderBase &Builder,
Value *Dst,
3156 int Where)
const ->
Value * {
3157 assert(isByteVecTy(Dst->getType()) && isByteVecTy(Src->getType()));
3158 int SrcLen = getSizeOf(Src);
3159 int DstLen = getSizeOf(Dst);
3165 Value *P2Src = vresize(Builder, Src, P2Len,
Poison);
3166 Value *P2Dst = vresize(Builder, Dst, P2Len,
Poison);
3169 for (
int i = 0; i != P2Len; ++i) {
3173 (Where <= i && i < Where +
Length) ? P2Len + Start + (i - Where) : i;
3177 return vresize(Builder, P2Insert, DstLen,
Poison);
3180auto HexagonVectorCombine::vlalignb(IRBuilderBase &Builder,
Value *
Lo,
3182 assert(
Lo->getType() ==
Hi->getType() &&
"Argument type mismatch");
3185 int VecLen = getSizeOf(
Hi);
3186 if (
auto IntAmt = getIntValue(Amt))
3187 return getElementRange(Builder,
Lo,
Hi, VecLen - IntAmt->getSExtValue(),
3190 if (HST.isTypeForHVX(
Hi->getType())) {
3191 assert(
static_cast<unsigned>(VecLen) == HST.getVectorLength() &&
3192 "Expecting an exact HVX type");
3193 return createHvxIntrinsic(Builder, HST.getIntrinsicId(Hexagon::V6_vlalignb),
3194 Hi->getType(), {Hi, Lo, Amt});
3202 Builder.
CreateTrunc(Shift, Type::getInt32Ty(
F.getContext()),
"trn");
3207 return vralignb(Builder,
Lo,
Hi,
Sub);
3212auto HexagonVectorCombine::vralignb(IRBuilderBase &Builder,
Value *
Lo,
3214 assert(
Lo->getType() ==
Hi->getType() &&
"Argument type mismatch");
3217 int VecLen = getSizeOf(
Lo);
3218 if (
auto IntAmt = getIntValue(Amt))
3219 return getElementRange(Builder,
Lo,
Hi, IntAmt->getSExtValue(), VecLen);
3221 if (HST.isTypeForHVX(
Lo->getType())) {
3222 assert(
static_cast<unsigned>(VecLen) == HST.getVectorLength() &&
3223 "Expecting an exact HVX type");
3224 return createHvxIntrinsic(Builder, HST.getIntrinsicId(Hexagon::V6_valignb),
3225 Lo->getType(), {Hi, Lo, Amt});
3232 Builder.
CreateTrunc(Shift, Type::getInt32Ty(
F.getContext()),
"trn");
3236 Type *Int64Ty = Type::getInt64Ty(
F.getContext());
3248auto HexagonVectorCombine::concat(IRBuilderBase &Builder,
3252 std::vector<Value *> Work[2];
3253 int ThisW = 0, OtherW = 1;
3255 Work[ThisW].assign(Vecs.begin(), Vecs.end());
3256 while (Work[ThisW].
size() > 1) {
3258 SMask.
resize(length(Ty) * 2);
3259 std::iota(SMask.
begin(), SMask.
end(), 0);
3261 Work[OtherW].clear();
3262 if (Work[ThisW].
size() % 2 != 0)
3264 for (
int i = 0, e = Work[ThisW].
size(); i <
e; i += 2) {
3266 Work[ThisW][i], Work[ThisW][i + 1], SMask,
"shf");
3267 Work[OtherW].push_back(Joined);
3275 SMask.
resize(Vecs.size() * length(Vecs.front()->getType()));
3276 std::iota(SMask.
begin(), SMask.
end(), 0);
3281auto HexagonVectorCombine::vresize(IRBuilderBase &Builder,
Value *Val,
3285 assert(ValTy->getElementType() == Pad->getType());
3287 int CurSize = length(ValTy);
3288 if (CurSize == NewSize)
3291 if (CurSize > NewSize)
3292 return getElementRange(Builder, Val, Val, 0, NewSize);
3294 SmallVector<int, 128> SMask(NewSize);
3295 std::iota(SMask.
begin(), SMask.
begin() + CurSize, 0);
3296 std::fill(SMask.
begin() + CurSize, SMask.
end(), CurSize);
3301auto HexagonVectorCombine::rescale(IRBuilderBase &Builder,
Value *Mask,
3308 Type *FromSTy = FromTy->getScalarType();
3309 Type *ToSTy = ToTy->getScalarType();
3310 if (FromSTy == ToSTy)
3313 int FromSize = getSizeOf(FromSTy);
3314 int ToSize = getSizeOf(ToSTy);
3315 assert(FromSize % ToSize == 0 || ToSize % FromSize == 0);
3318 int FromCount = length(MaskTy);
3319 int ToCount = (FromCount * FromSize) / ToSize;
3320 assert((FromCount * FromSize) % ToSize == 0);
3322 auto *FromITy =
getIntTy(FromSize * 8);
3323 auto *ToITy =
getIntTy(ToSize * 8);
3328 Mask, VectorType::get(FromITy, FromCount,
false),
"sxt");
3330 Ext, VectorType::get(ToITy, ToCount,
false),
"cst");
3332 Cast, VectorType::get(getBoolTy(), ToCount,
false),
"trn");
3336auto HexagonVectorCombine::vlsb(IRBuilderBase &Builder,
Value *Val)
const
3339 if (ScalarTy == getBoolTy())
3342 Value *Bytes = vbytes(Builder, Val);
3344 return Builder.
CreateTrunc(Bytes, getBoolTy(getSizeOf(VecTy)),
"trn");
3347 return Builder.
CreateTrunc(Bytes, getBoolTy(),
"trn");
3351auto HexagonVectorCombine::vbytes(IRBuilderBase &Builder,
Value *Val)
const
3354 if (ScalarTy == getByteTy())
3357 if (ScalarTy != getBoolTy())
3358 return Builder.
CreateBitCast(Val, getByteTy(getSizeOf(Val)),
"cst");
3361 return Builder.
CreateSExt(Val, VectorType::get(getByteTy(), VecTy),
"sxt");
3362 return Builder.
CreateSExt(Val, getByteTy(),
"sxt");
3365auto HexagonVectorCombine::subvector(IRBuilderBase &Builder,
Value *Val,
3366 unsigned Start,
unsigned Length)
const
3369 return getElementRange(Builder, Val, Val, Start,
Length);
3372auto HexagonVectorCombine::sublo(IRBuilderBase &Builder,
Value *Val)
const
3374 size_t Len = length(Val);
3375 assert(Len % 2 == 0 &&
"Length should be even");
3376 return subvector(Builder, Val, 0, Len / 2);
3379auto HexagonVectorCombine::subhi(IRBuilderBase &Builder,
Value *Val)
const
3381 size_t Len = length(Val);
3382 assert(Len % 2 == 0 &&
"Length should be even");
3383 return subvector(Builder, Val, Len / 2, Len / 2);
3386auto HexagonVectorCombine::vdeal(IRBuilderBase &Builder,
Value *Val0,
3388 assert(Val0->getType() == Val1->getType());
3389 int Len = length(Val0);
3390 SmallVector<int, 128>
Mask(2 * Len);
3392 for (
int i = 0; i !=
Len; ++i) {
3399auto HexagonVectorCombine::vshuff(IRBuilderBase &Builder,
Value *Val0,
3401 assert(Val0->getType() == Val1->getType());
3402 int Len = length(Val0);
3403 SmallVector<int, 128>
Mask(2 * Len);
3405 for (
int i = 0; i !=
Len; ++i) {
3406 Mask[2 * i + 0] = i;
3412auto HexagonVectorCombine::createHvxIntrinsic(IRBuilderBase &Builder,
3418 auto getCast = [&](IRBuilderBase &Builder,
Value *Val,
3420 Type *SrcTy = Val->getType();
3421 if (SrcTy == DestTy)
3426 assert(HST.isTypeForHVX(SrcTy,
true));
3428 Type *BoolTy = Type::getInt1Ty(
F.getContext());
3433 unsigned HwLen = HST.getVectorLength();
3434 Intrinsic::ID TC = HwLen == 64 ? Intrinsic::hexagon_V6_pred_typecast
3435 : Intrinsic::hexagon_V6_pred_typecast_128B;
3445 for (
int i = 0, e =
Args.size(); i != e; ++i) {
3447 Type *
T = IntrTy->getParamType(i);
3448 if (
A->getType() !=
T) {
3454 StringRef MaybeName = !IntrTy->getReturnType()->isVoidTy() ?
"cup" :
"";
3455 CallInst *
Call = Builder.
CreateCall(IntrFn, IntrArgs, MaybeName);
3462 if (RetTy ==
nullptr || CallTy == RetTy)
3465 assert(HST.isTypeForHVX(CallTy,
true));
3466 return getCast(Builder,
Call, RetTy);
3469auto HexagonVectorCombine::splitVectorElements(IRBuilderBase &Builder,
3471 unsigned ToWidth)
const
3486 assert(VecTy->getElementType()->isIntegerTy());
3487 unsigned FromWidth = VecTy->getScalarSizeInBits();
3489 assert(ToWidth <= FromWidth &&
"Breaking up into wider elements?");
3490 unsigned NumResults = FromWidth / ToWidth;
3494 unsigned Length = length(VecTy);
3498 auto splitInHalf = [&](
unsigned Begin,
unsigned End,
auto splitFunc) ->
void {
3502 if (Begin + 1 == End)
3508 auto *VTy = VectorType::get(
getIntTy(Width / 2), 2 *
Length,
false);
3511 Value *Res =
vdeal(Builder, sublo(Builder, VVal), subhi(Builder, VVal));
3513 unsigned Half = (Begin + End) / 2;
3514 Results[Begin] = sublo(Builder, Res);
3515 Results[Half] = subhi(Builder, Res);
3517 splitFunc(Begin, Half, splitFunc);
3518 splitFunc(Half, End, splitFunc);
3521 splitInHalf(0, NumResults, splitInHalf);
3525auto HexagonVectorCombine::joinVectorElements(IRBuilderBase &Builder,
3527 VectorType *ToType)
const
3529 assert(ToType->getElementType()->isIntegerTy());
3540 unsigned ToWidth = ToType->getScalarSizeInBits();
3541 unsigned Width = Inputs.front()->getType()->getScalarSizeInBits();
3542 assert(Width <= ToWidth);
3544 unsigned Length = length(Inputs.front()->getType());
3546 unsigned NeedInputs = ToWidth / Width;
3547 if (Inputs.size() != NeedInputs) {
3552 Last, ConstantInt::get(
Last->getType(), Width - 1),
"asr");
3553 Inputs.resize(NeedInputs, Sign);
3556 while (Inputs.size() > 1) {
3559 for (
int i = 0, e = Inputs.size(); i < e; i += 2) {
3560 Value *Res =
vshuff(Builder, Inputs[i], Inputs[i + 1]);
3563 Inputs.resize(Inputs.size() / 2);
3566 assert(Inputs.front()->getType() == ToType);
3567 return Inputs.front();
3570auto HexagonVectorCombine::calculatePointerDifference(
Value *Ptr0,
3572 -> std::optional<int> {
3574 const SCEV *Scev0 = SE.getSCEV(Ptr0);
3575 const SCEV *Scev1 = SE.getSCEV(Ptr1);
3576 const SCEV *ScevDiff = SE.getMinusSCEV(Scev0, Scev1);
3578 APInt
V =
Const->getAPInt();
3579 if (
V.isSignedIntN(8 *
sizeof(
int)))
3580 return static_cast<int>(
V.getSExtValue());
3587 I->eraseFromParent();
3589 SmallVector<Instruction *, 8> ToErase;
3592#define CallBuilder(B, F) \
3595 if (auto *I = dyn_cast<Instruction>(V)) \
3596 B_.ToErase.push_back(I); \
3600 auto Simplify = [
this](
Value *
V) {
3606 auto StripBitCast = [](
Value *
V) {
3608 V =
C->getOperand(0);
3612 Ptr0 = StripBitCast(Ptr0);
3613 Ptr1 = StripBitCast(Ptr1);
3615 return std::nullopt;
3619 if (Gep0->getPointerOperand() != Gep1->getPointerOperand())
3620 return std::nullopt;
3621 if (Gep0->getSourceElementType() != Gep1->getSourceElementType())
3622 return std::nullopt;
3624 Builder
B(Gep0->getParent());
3625 int Scale = getSizeOf(Gep0->getSourceElementType(),
Alloc);
3628 if (Gep0->getNumOperands() != 2 || Gep1->getNumOperands() != 2)
3629 return std::nullopt;
3631 Value *Idx0 = Gep0->getOperand(1);
3632 Value *Idx1 = Gep1->getOperand(1);
3637 return Diff->getSExtValue() * Scale;
3639 KnownBits Known0 = getKnownBits(Idx0, Gep0);
3640 KnownBits Known1 = getKnownBits(Idx1, Gep1);
3643 return std::nullopt;
3651 Diff0 =
C->getSExtValue();
3653 return std::nullopt;
3662 Diff1 =
C->getSExtValue();
3664 return std::nullopt;
3667 return (Diff0 + Diff1) * Scale;
3672auto HexagonVectorCombine::getNumSignificantBits(
const Value *V,
3673 const Instruction *CtxI)
const
3678auto HexagonVectorCombine::getKnownBits(
const Value *V,
3679 const Instruction *CtxI)
const
3684auto HexagonVectorCombine::isSafeToClone(
const Instruction &In)
const ->
bool {
3685 if (
In.mayHaveSideEffects() ||
In.isAtomic() ||
In.isVolatile() ||
3686 In.isFenceLike() ||
In.mayReadOrWriteMemory()) {
3694template <
typename T>
3695auto HexagonVectorCombine::isSafeToMoveBeforeInBB(
const Instruction &In,
3697 const T &IgnoreInsts)
const
3700 [
this](
const Instruction &
I) -> std::optional<MemoryLocation> {
3702 switch (
II->getIntrinsicID()) {
3703 case Intrinsic::masked_load:
3705 case Intrinsic::masked_store:
3721 bool MayWrite =
In.mayWriteToMemory();
3722 auto MaybeLoc = getLocOrNone(In);
3724 auto From =
In.getIterator();
3727 bool MoveUp = (To !=
Block.end() && To->comesBefore(&In));
3729 MoveUp ? std::make_pair(To, From) : std::make_pair(std::next(From), To);
3730 for (
auto It =
Range.first; It !=
Range.second; ++It) {
3731 const Instruction &I = *It;
3732 if (llvm::is_contained(IgnoreInsts, &I))
3735 if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
3736 if (II->getIntrinsicID() == Intrinsic::assume)
3743 if (!CB->hasFnAttr(Attribute::WillReturn))
3745 if (!CB->hasFnAttr(Attribute::NoSync))
3748 if (
I.mayReadOrWriteMemory()) {
3749 auto MaybeLocI = getLocOrNone(I);
3750 if (MayWrite || I.mayWriteToMemory()) {
3751 if (!MaybeLoc || !MaybeLocI)
3753 if (!AA.isNoAlias(*MaybeLoc, *MaybeLocI))
3761auto HexagonVectorCombine::isByteVecTy(
Type *Ty)
const ->
bool {
3763 return VecTy->getElementType() == getByteTy();
3767auto HexagonVectorCombine::getElementRange(IRBuilderBase &Builder,
Value *
Lo,
3771 SmallVector<int, 128> SMask(
Length);
3772 std::iota(SMask.
begin(), SMask.
end(), Start);
3779class HexagonVectorCombineLegacy :
public FunctionPass {
3783 HexagonVectorCombineLegacy() : FunctionPass(
ID) {}
3785 StringRef getPassName()
const override {
return "Hexagon Vector Combine"; }
3787 void getAnalysisUsage(AnalysisUsage &AU)
const override {
3795 FunctionPass::getAnalysisUsage(AU);
3799 if (skipFunction(
F))
3801 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
3802 AssumptionCache &AC =
3803 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
3804 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
3805 ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
3806 TargetLibraryInfo &TLI =
3807 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
F);
3808 auto &TM = getAnalysis<TargetPassConfig>().getTM<HexagonTargetMachine>();
3809 HexagonVectorCombine HVC(
F, AA, AC, DT, SE, TLI, TM);
3815char HexagonVectorCombineLegacy::ID = 0;
3818 "Hexagon Vector Combine",
false,
false)
3829 return new HexagonVectorCombineLegacy();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Prepare AGPR Alloc
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
static IntegerType * getIntTy(IRBuilderBase &B, const TargetLibraryInfo *TLI)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
shuff Hexagon Optimize Shuffle Vector
static Value * locateIndexesFromIntrinsic(Instruction *In)
Instruction * locateDestination(Instruction *In, HvxIdioms::DstQualifier &Qual)
Value * getReinterpretiveCast_i8_to_i32(const HexagonVectorCombine &HVC, IRBuilderBase &Builder, LLVMContext &Ctx, Value *I)
static Value * locateIndexesFromGEP(Value *In)
#define CallBuilder(B, F)
Value * getPointer(Value *Ptr)
#define DEFAULT_HVX_VTCM_PAGE_SIZE
static Value * locateAddressFromIntrinsic(Instruction *In)
static Instruction * selectDestination(Instruction *In, HvxIdioms::DstQualifier &Qual)
Value * get_i32_Mask(const HexagonVectorCombine &HVC, IRBuilderBase &Builder, LLVMContext &Ctx, unsigned int pattern)
bool isArithmetic(unsigned Opc)
static Type * getIndexType(Value *In)
GetElementPtrInst * locateGepFromIntrinsic(Instruction *In)
Value * getReinterpretiveCast_i16_to_i32(const HexagonVectorCombine &HVC, IRBuilderBase &Builder, LLVMContext &Ctx, Value *I)
iv Induction Variable Users
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static bool isCandidate(const MachineInstr *MI, Register &DefedReg, Register FrameReg)
static bool isUndef(const MachineInstr &MI)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Remove Loads Into Fake Uses
static ConstantInt * getConstInt(MDNode *MD, unsigned NumOp)
This file defines the SmallVector class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
Target-Independent Code Generator Pass Configuration Options pass.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
InstListType::const_iterator const_iterator
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
AttributeList getAttributes() const
Return the attributes for this call.
@ ICMP_ULT
unsigned less than
static LLVM_ABI Constant * get(LLVMContext &Context, ArrayRef< uint8_t > Elts)
get() constructors - Return a constant with vector type with an element count and element type matchi...
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
LLVM_ABI TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
iterator_range< iterator > children()
DomTreeNodeBase< NodeT > * getRootNode()
getRootNode - This returns the entry node for the CFG of the function.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
const BasicBlock & back() const
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Value * getPointerOperand()
bool isHVXVectorType(EVT VecTy, bool IncludeBool=false) const
bool useHVXV62Ops() const
bool useHVXV69Ops() const
unsigned getVectorLength() const
bool useHVXV66Ops() const
bool isTypeForHVX(Type *VecTy, bool IncludeBool=false) const
Intrinsic::ID getIntrinsicId(unsigned Opc) const
Common base class shared among various IRBuilders.
AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
const char * getOpcodeName() const
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
bool doesNotAccessMemory() const
Whether this function accesses no memory.
bool onlyAccessesInaccessibleMem() const
Whether this function only (at most) accesses inaccessible memory.
static LLVM_ABI std::optional< MemoryLocation > getOrNone(const Instruction *Inst)
static LLVM_ABI MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
The main scalar evolution driver.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Target-Independent Code Generator Pass Configuration Options.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
const ParentTy * getParent() const
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Rounding
Possible values of current rounding mode, which is specified in bits 23:22 of FPCR.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
friend class Instruction
Iterator for Instructions in a `BasicBlock.
LLVM_ABI Instruction * getTerminator() const
LLVM_ABI Instruction & front() const
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createHexagonVectorCombineLegacyPass()
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
MemoryEffectsBase< IRMemLocation > MemoryEffects
Summary of how a function affects memory in the program.
LLVM_ABI Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
DomTreeNodeBase< BasicBlock > DomTreeNode
auto reverse(ContainerTy &&C)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Get the upper bound on bit size for this Value Op as a signed integer.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
LLVM_ABI bool mayHaveNonDefUseDependency(const Instruction &I)
Returns true if the result or effects of the given instructions I depend values not reachable through...
MaskT vshuff(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
MaskT vdeal(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.