28#include "llvm/IR/IntrinsicsAMDGPU.h"
36#define DEBUG_TYPE "AMDGPUtti"
40struct AMDGPUImageDMaskIntrinsic {
44#define GET_AMDGPUImageDMaskIntrinsicTable_IMPL
45#include "AMDGPUGenSearchableTables.inc"
56 "nans handled separately");
73 Type *VTy = V.getType();
82 APFloat FloatValue(ConstFloat->getValueAPF());
83 bool LosesInfo =
true;
92 APInt IntValue(ConstInt->getValue());
111 Type *VTy = V.getType();
137 Func(Args, OverloadTys);
153 bool RemoveOldIntr = &OldIntr != &InstToReplace;
162static std::optional<Instruction *>
167 if (
const auto *LZMappingInfo =
169 if (
auto *ConstantLod =
171 if (ConstantLod->isZero() || ConstantLod->isNegative()) {
176 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
177 Args.erase(Args.begin() + ImageDimIntr->LodIndex);
184 if (
const auto *MIPMappingInfo =
186 if (
auto *ConstantMip =
188 if (ConstantMip->isZero()) {
193 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
194 Args.erase(Args.begin() + ImageDimIntr->MipIndex);
201 if (
const auto *BiasMappingInfo =
203 if (
auto *ConstantBias =
205 if (ConstantBias->isZero()) {
210 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
211 Args.erase(Args.begin() + ImageDimIntr->BiasIndex);
212 ArgTys.erase(ArgTys.begin() + ImageDimIntr->BiasTyArg);
219 if (
const auto *OffsetMappingInfo =
221 if (
auto *ConstantOffset =
223 if (ConstantOffset->isZero()) {
226 OffsetMappingInfo->NoOffset, ImageDimIntr->
Dim);
228 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
229 Args.erase(Args.begin() + ImageDimIntr->OffsetIndex);
236 if (ST->hasD16Images()) {
246 if (
II.hasOneUse()) {
249 if (
User->getOpcode() == Instruction::FPTrunc &&
253 [&](
auto &Args,
auto &ArgTys) {
256 ArgTys[0] = User->getType();
265 bool AllHalfExtracts =
true;
267 for (
User *U :
II.users()) {
269 if (!Ext || !Ext->hasOneUse()) {
270 AllHalfExtracts =
false;
275 if (!Tr || !Tr->getType()->isHalfTy()) {
276 AllHalfExtracts =
false;
283 if (!ExtractTruncPairs.
empty() && AllHalfExtracts) {
294 OverloadTys[0] = HalfVecTy;
297 M, ImageDimIntr->
Intr, OverloadTys);
299 II.mutateType(HalfVecTy);
300 II.setCalledFunction(HalfDecl);
303 for (
auto &[Ext, Tr] : ExtractTruncPairs) {
304 Value *Idx = Ext->getIndexOperand();
306 Builder.SetInsertPoint(Tr);
308 Value *HalfExtract = Builder.CreateExtractElement(&
II, Idx);
311 Tr->replaceAllUsesWith(HalfExtract);
314 for (
auto &[Ext, Tr] : ExtractTruncPairs) {
325 if (!ST->hasA16() && !ST->hasG16())
332 bool FloatCoord =
false;
334 bool OnlyDerivatives =
false;
337 OperandIndex < ImageDimIntr->VAddrEnd; OperandIndex++) {
338 Value *Coord =
II.getOperand(OperandIndex);
341 if (OperandIndex < ImageDimIntr->CoordStart ||
346 OnlyDerivatives =
true;
355 if (!OnlyDerivatives && !ST->hasA16())
356 OnlyDerivatives =
true;
359 if (!OnlyDerivatives && ImageDimIntr->
NumBiasArgs != 0) {
362 "Only image instructions with a sampler can have a bias");
364 OnlyDerivatives =
true;
367 if (OnlyDerivatives && (!ST->hasG16() || ImageDimIntr->
GradientStart ==
375 II,
II,
II.getIntrinsicID(), IC, [&](
auto &Args,
auto &ArgTys) {
376 ArgTys[ImageDimIntr->GradientTyArg] = CoordType;
377 if (!OnlyDerivatives) {
378 ArgTys[ImageDimIntr->CoordTyArg] = CoordType;
381 if (ImageDimIntr->NumBiasArgs != 0)
382 ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext());
388 OperandIndex < EndIndex; OperandIndex++) {
390 convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder);
395 Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
396 Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder);
425 Value *Src =
nullptr;
428 if (Src->getType()->isHalfTy())
445 unsigned VWidth = VTy->getNumElements();
448 for (
int i = VWidth - 1; i > 0; --i) {
470 unsigned VWidth = VTy->getNumElements();
476 SVI->getShuffleMask(ShuffleMask);
478 for (
int I = VWidth - 1;
I > 0; --
I) {
479 if (ShuffleMask.empty()) {
530 unsigned LaneArgIdx)
const {
531 unsigned MaskBits = ST->getWavefrontSizeLog2();
545 Value *LaneArg =
II.getArgOperand(LaneArgIdx);
548 if (MaskedConst != LaneArg) {
549 II.getOperandUse(LaneArgIdx).set(MaskedConst);
561 CallInst *NewCall =
B.CreateCall(&NewCallee,
Ops, OpBundles);
577 if (ST.isWave32() &&
match(V, W32Pred))
579 if (ST.isWave64() &&
match(V, W64Pred))
588 const auto IID =
II.getIntrinsicID();
589 assert(IID == Intrinsic::amdgcn_readlane ||
590 IID == Intrinsic::amdgcn_readfirstlane ||
591 IID == Intrinsic::amdgcn_permlane64);
601 const bool IsReadLane = (IID == Intrinsic::amdgcn_readlane);
605 Value *LaneID =
nullptr;
607 LaneID =
II.getOperand(1);
621 const auto DoIt = [&](
unsigned OpIdx,
625 Ops.push_back(LaneID);
641 return DoIt(0,
II.getCalledFunction());
645 Type *SrcTy = Src->getType();
651 return DoIt(0, Remangled);
659 return DoIt(1,
II.getCalledFunction());
661 return DoIt(0,
II.getCalledFunction());
672 unsigned Depth = 0) {
682 return CI->getZExtValue();
691 std::optional<unsigned>
LHS =
695 std::optional<unsigned>
RHS =
704 return CI ? std::optional<unsigned>(CI->getZExtValue()) : std::nullopt;
712 unsigned WaveSize = ST.getWavefrontSize();
714 for (
unsigned Lane :
seq(WaveSize)) {
716 if (!Val || *Val >= WaveSize)
725template <
unsigned Period>
727 static_assert(
isPowerOf2_32(Period),
"Period must be a power of two");
728 for (
unsigned I = Period,
E = Ids.
size();
I <
E; ++
I)
729 if (Ids[
I] != Ids[
I % Period] + (
I & ~(Period - 1)))
737 for (
unsigned I = 0;
I <
N; ++
I)
753 return Ids[3] << 6 | Ids[2] << 4 | Ids[1] << 2 | Ids[0];
760 for (
unsigned J = 0; J <
N; ++J)
761 if (Ids[J] != (
N - 1) - J)
773 for (
unsigned J = 1; J < 16; ++J)
774 if (Ids[J] != (Ids[0] + J) % 16)
792 unsigned Mask = Ids[0];
795 for (
unsigned J = 0; J < 16; ++J)
796 if (Ids[J] != (Mask ^ J))
806 unsigned Selector = 0;
807 for (
unsigned J = 0; J < 8; ++J)
808 Selector |= Ids[J] << (J * 3);
817 for (
unsigned J = 0; J < 16; ++J)
818 Sel |=
static_cast<uint64_t>(Ids[J] & 0xF) << (J * 4);
825 if (Ids.
size() != 64)
827 for (
unsigned J = 0; J < 64; ++J)
828 if (Ids[J] != (J ^ 32))
839 for (
unsigned J = 0; J < 16; ++J) {
840 if (Ids[J] < 16 || Ids[J] >= 32)
842 if (Ids[J + 16] != Ids[J] - 16)
853static std::optional<unsigned>
862 unsigned AndMask = 0, OrMask = 0, XorMask = 0;
863 for (
unsigned B = 0;
B < 5; ++
B) {
864 unsigned Bit0 = (Ids[0] >>
B) & 1;
865 unsigned Bit1 = (Ids[1u <<
B] >>
B) & 1;
868 XorMask |= Bit0 <<
B;
876 for (
unsigned I :
seq(32u)) {
877 unsigned Expected = ((
I & AndMask) | OrMask) ^ XorMask;
892static std::optional<unsigned>
903 for (
unsigned I = 0;
I < 32; ++
I)
904 if (Ids[
I] != (
I +
N) % 32)
916 return B.CreateIntrinsic(Intrinsic::amdgcn_update_dpp, {Ty},
918 B.getInt32(0xF),
B.getInt32(0xF),
B.getTrue()});
923 return B.CreateIntrinsic(Intrinsic::amdgcn_mov_dpp8, {Val->
getType()},
924 {Val,
B.getInt32(Selector)});
931 return B.CreateIntrinsic(Intrinsic::amdgcn_permlane16, {Ty},
933 B.getInt32(
Hi),
B.getFalse(),
B.getFalse()});
941 return B.CreateIntrinsic(Intrinsic::amdgcn_permlanex16, {Ty},
943 B.getInt32(
Hi),
B.getFalse(),
B.getFalse()});
951 assert(
DL.getTypeSizeInBits(OrigTy) == 32 &&
952 "ds_swizzle only supports 32-bit operands");
956 Src =
B.CreatePtrToInt(Src, I32Ty);
957 else if (OrigTy != I32Ty)
958 Src =
B.CreateBitCast(Src, I32Ty);
959 Value *Result =
B.CreateIntrinsic(Intrinsic::amdgcn_ds_swizzle, {},
962 return B.CreateIntToPtr(Result, OrigTy);
964 return B.CreateBitCast(Result, OrigTy);
970 return B.CreateIntrinsic(Intrinsic::amdgcn_permlane64, {Val->
getType()},
981 [](
const auto &
E) {
return E.value() ==
E.index(); }))
1005 if (ST.hasDPPRowShare()) {
1010 if (ST.hasDPP() && ST.hasGFX10Insts()) {
1020 if (ST.hasPermlane16Insts()) {
1040 if (ST.hasDsSwizzleRotateMode()) {
1053static std::optional<Instruction *>
1057 if (
DL.getTypeSizeInBits(
II.getType()) != 32)
1058 return std::nullopt;
1060 if (!ST.isWaveSizeKnown())
1061 return std::nullopt;
1063 unsigned WaveSize = ST.getWavefrontSize();
1064 bool IsBpermute =
II.getIntrinsicID() == Intrinsic::amdgcn_ds_bpermute;
1065 Value *Src =
II.getArgOperand(IsBpermute ? 1 : 0);
1066 Value *Index =
II.getArgOperand(IsBpermute ? 0 : 1);
1071 for (
unsigned Lane :
seq(WaveSize)) {
1073 if (!Val || (*Val & 3) || (*Val >> 2) >= WaveSize)
1074 return std::nullopt;
1075 Ids[Lane] = *Val >> 2;
1079 return std::nullopt;
1084 return std::nullopt;
1088std::optional<Instruction *>
1092 case Intrinsic::amdgcn_implicitarg_ptr: {
1093 if (
II.getFunction()->hasFnAttribute(
"amdgpu-no-implicitarg-ptr"))
1095 uint64_t ImplicitArgBytes = ST->getImplicitArgNumBytes(*
II.getFunction());
1098 II.getAttributes().getRetDereferenceableOrNullBytes();
1099 if (CurrentOrNullBytes != 0) {
1102 uint64_t NewBytes = std::max(CurrentOrNullBytes, ImplicitArgBytes);
1105 II.removeRetAttr(Attribute::DereferenceableOrNull);
1109 uint64_t CurrentBytes =
II.getAttributes().getRetDereferenceableBytes();
1110 uint64_t NewBytes = std::max(CurrentBytes, ImplicitArgBytes);
1111 if (NewBytes != CurrentBytes) {
1117 return std::nullopt;
1119 case Intrinsic::amdgcn_rcp: {
1120 Value *Src =
II.getArgOperand(0);
1131 if (
II.isStrictFP())
1135 const APFloat &ArgVal =
C->getValueAPF();
1153 auto IID = SrcCI->getIntrinsicID();
1158 if (IID == Intrinsic::amdgcn_sqrt || IID == Intrinsic::sqrt) {
1168 SrcCI->getModule(), Intrinsic::amdgcn_rsq, {SrcCI->getType()});
1171 II.setFastMathFlags(InnerFMF);
1173 II.setCalledFunction(NewDecl);
1179 case Intrinsic::amdgcn_sqrt:
1180 case Intrinsic::amdgcn_rsq:
1181 case Intrinsic::amdgcn_tanh: {
1182 Value *Src =
II.getArgOperand(0);
1194 if (IID == Intrinsic::amdgcn_sqrt && Src->getType()->isHalfTy()) {
1196 II.getModule(), Intrinsic::sqrt, {II.getType()});
1197 II.setCalledFunction(NewDecl);
1203 case Intrinsic::amdgcn_log:
1204 case Intrinsic::amdgcn_exp2: {
1205 const bool IsLog = IID == Intrinsic::amdgcn_log;
1206 const bool IsExp = IID == Intrinsic::amdgcn_exp2;
1207 Value *Src =
II.getArgOperand(0);
1217 if (
C->isInfinity()) {
1220 if (!
C->isNegative())
1224 if (IsExp &&
C->isNegative())
1228 if (
II.isStrictFP())
1232 Constant *Quieted = ConstantFP::get(Ty,
C->getValue().makeQuiet());
1237 if (
C->isZero() || (
C->getValue().isDenormal() && Ty->isFloatTy())) {
1239 : ConstantFP::get(Ty, 1.0);
1243 if (IsLog &&
C->isNegative())
1251 case Intrinsic::amdgcn_frexp_mant:
1252 case Intrinsic::amdgcn_frexp_exp: {
1253 Value *Src =
II.getArgOperand(0);
1259 if (IID == Intrinsic::amdgcn_frexp_mant) {
1261 II, ConstantFP::get(
II.getContext(), Significand));
1281 case Intrinsic::amdgcn_class: {
1282 Value *Src0 =
II.getArgOperand(0);
1283 Value *Src1 =
II.getArgOperand(1);
1287 II.getModule(), Intrinsic::is_fpclass, Src0->
getType()));
1290 II.setArgOperand(1, ConstantInt::get(Src1->
getType(),
1311 case Intrinsic::amdgcn_cvt_pkrtz: {
1312 auto foldFPTruncToF16RTZ = [](
Value *Arg) ->
Value * {
1325 return ConstantFP::get(HalfTy, Val);
1328 Value *Src =
nullptr;
1330 if (Src->getType()->isHalfTy())
1337 if (
Value *Src0 = foldFPTruncToF16RTZ(
II.getArgOperand(0))) {
1338 if (
Value *Src1 = foldFPTruncToF16RTZ(
II.getArgOperand(1))) {
1348 case Intrinsic::amdgcn_cvt_pknorm_i16:
1349 case Intrinsic::amdgcn_cvt_pknorm_u16:
1350 case Intrinsic::amdgcn_cvt_pk_i16:
1351 case Intrinsic::amdgcn_cvt_pk_u16: {
1352 Value *Src0 =
II.getArgOperand(0);
1353 Value *Src1 =
II.getArgOperand(1);
1365 case Intrinsic::amdgcn_cvt_off_f32_i4: {
1366 Value* Arg =
II.getArgOperand(0);
1380 constexpr size_t ResValsSize = 16;
1381 static constexpr float ResVals[ResValsSize] = {
1382 0.0, 0.0625, 0.125, 0.1875, 0.25, 0.3125, 0.375, 0.4375,
1383 -0.5, -0.4375, -0.375, -0.3125, -0.25, -0.1875, -0.125, -0.0625};
1385 ConstantFP::get(Ty, ResVals[CArg->
getZExtValue() & (ResValsSize - 1)]);
1388 case Intrinsic::amdgcn_ubfe:
1389 case Intrinsic::amdgcn_sbfe: {
1391 Value *Src =
II.getArgOperand(0);
1398 unsigned IntSize = Ty->getIntegerBitWidth();
1403 if ((Width & (IntSize - 1)) == 0) {
1408 if (Width >= IntSize) {
1410 II, 2, ConstantInt::get(CWidth->
getType(), Width & (IntSize - 1)));
1421 ConstantInt::get(COffset->
getType(),
Offset & (IntSize - 1)));
1425 bool Signed = IID == Intrinsic::amdgcn_sbfe;
1427 if (!CWidth || !COffset)
1437 if (
Offset + Width < IntSize) {
1441 RightShift->takeName(&
II);
1448 RightShift->takeName(&
II);
1451 case Intrinsic::amdgcn_exp:
1452 case Intrinsic::amdgcn_exp_row:
1453 case Intrinsic::amdgcn_exp_compr: {
1459 bool IsCompr = IID == Intrinsic::amdgcn_exp_compr;
1461 for (
int I = 0;
I < (IsCompr ? 2 : 4); ++
I) {
1462 if ((!IsCompr && (EnBits & (1 <<
I)) == 0) ||
1463 (IsCompr && ((EnBits & (0x3 << (2 *
I))) == 0))) {
1464 Value *Src =
II.getArgOperand(
I + 2);
1478 case Intrinsic::amdgcn_fmed3: {
1479 Value *Src0 =
II.getArgOperand(0);
1480 Value *Src1 =
II.getArgOperand(1);
1481 Value *Src2 =
II.getArgOperand(2);
1483 for (
Value *Src : {Src0, Src1, Src2}) {
1488 if (
II.isStrictFP())
1525 const APFloat *ConstSrc0 =
nullptr;
1526 const APFloat *ConstSrc1 =
nullptr;
1527 const APFloat *ConstSrc2 =
nullptr;
1532 const bool IsPosInfinity = ConstSrc0 && ConstSrc0->
isPosInfinity();
1552 const bool IsPosInfinity = ConstSrc1 && ConstSrc1->
isPosInfinity();
1575 auto *Quieted = ConstantFP::get(
II.getType(), ConstSrc2->
makeQuiet());
1595 CI->copyFastMathFlags(&
II);
1621 II.setArgOperand(0, Src0);
1622 II.setArgOperand(1, Src1);
1623 II.setArgOperand(2, Src2);
1633 ConstantFP::get(
II.getType(), Result));
1638 if (!ST->hasMed3_16())
1647 IID, {
X->getType()}, {
X,
Y, Z}, &
II,
II.getName());
1655 case Intrinsic::amdgcn_icmp:
1656 case Intrinsic::amdgcn_fcmp: {
1660 bool IsInteger = IID == Intrinsic::amdgcn_icmp;
1667 Value *Src0 =
II.getArgOperand(0);
1668 Value *Src1 =
II.getArgOperand(1);
1695 II.setArgOperand(0, Src1);
1696 II.setArgOperand(1, Src0);
1698 2, ConstantInt::get(CC->
getType(),
static_cast<int>(SwapPred)));
1745 ? Intrinsic::amdgcn_fcmp
1746 : Intrinsic::amdgcn_icmp;
1751 unsigned Width = CmpType->getBitWidth();
1752 unsigned NewWidth = Width;
1760 else if (Width <= 32)
1762 else if (Width <= 64)
1767 if (Width != NewWidth) {
1777 }
else if (!Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isHalfTy())
1780 Value *Args[] = {SrcLHS, SrcRHS,
1781 ConstantInt::get(CC->
getType(), SrcPred)};
1783 NewIID, {
II.getType(), SrcLHS->
getType()}, Args);
1790 case Intrinsic::amdgcn_mbcnt_hi:
1795 case Intrinsic::amdgcn_mbcnt_lo: {
1808 if (std::optional<ConstantRange> ExistingRange =
II.getRange()) {
1809 ComputedRange = ComputedRange.
intersectWith(*ExistingRange);
1810 if (ComputedRange == *ExistingRange)
1814 II.addRangeRetAttr(ComputedRange);
1817 case Intrinsic::amdgcn_ballot: {
1818 Value *Arg =
II.getArgOperand(0);
1823 if (Src->isZero()) {
1828 if (ST->isWave32() &&
II.getType()->getIntegerBitWidth() == 64) {
1835 {IC.Builder.getInt32Ty()},
1836 {II.getArgOperand(0)}),
1843 case Intrinsic::amdgcn_wavefrontsize: {
1844 if (ST->isWaveSizeKnown())
1846 II, ConstantInt::get(
II.getType(), ST->getWavefrontSize()));
1849 case Intrinsic::amdgcn_wqm_vote: {
1856 case Intrinsic::amdgcn_kill: {
1858 if (!
C || !
C->getZExtValue())
1864 case Intrinsic::amdgcn_s_sendmsg:
1865 case Intrinsic::amdgcn_s_sendmsghalt: {
1871 Value *M0Val =
II.getArgOperand(1);
1877 decodeMsg(MsgImm->getZExtValue(), MsgId, OpId, StreamId, *ST);
1879 if (!msgDoesNotUseM0(MsgId, *ST))
1883 II.dropUBImplyingAttrsAndMetadata();
1887 case Intrinsic::amdgcn_update_dpp: {
1888 Value *Old =
II.getArgOperand(0);
1893 if (BC->isNullValue() || RM->getZExtValue() != 0xF ||
1900 case Intrinsic::amdgcn_permlane16:
1901 case Intrinsic::amdgcn_permlane16_var:
1902 case Intrinsic::amdgcn_permlanex16:
1903 case Intrinsic::amdgcn_permlanex16_var: {
1905 Value *VDstIn =
II.getArgOperand(0);
1910 unsigned int FiIdx = (IID == Intrinsic::amdgcn_permlane16 ||
1911 IID == Intrinsic::amdgcn_permlanex16)
1918 unsigned int BcIdx = FiIdx + 1;
1927 case Intrinsic::amdgcn_wave_shuffle:
1929 case Intrinsic::amdgcn_permlane64:
1930 case Intrinsic::amdgcn_readfirstlane:
1931 case Intrinsic::amdgcn_readlane:
1932 case Intrinsic::amdgcn_ds_bpermute: {
1934 unsigned SrcIdx = IID == Intrinsic::amdgcn_ds_bpermute ? 1 : 0;
1935 const Use &Src =
II.getArgOperandUse(SrcIdx);
1939 if (IID == Intrinsic::amdgcn_readlane &&
1946 if (IID == Intrinsic::amdgcn_ds_bpermute) {
1947 const Use &Lane =
II.getArgOperandUse(0);
1951 II.getModule(), Intrinsic::amdgcn_readlane,
II.getType());
1952 II.setCalledFunction(NewDecl);
1953 II.setOperand(0, Src);
1954 II.setOperand(1, NewLane);
1959 if (IID == Intrinsic::amdgcn_ds_bpermute)
1965 return std::nullopt;
1967 case Intrinsic::amdgcn_writelane: {
1971 return std::nullopt;
1973 case Intrinsic::amdgcn_trig_preop: {
1976 if (!
II.getType()->isDoubleTy())
1979 Value *Src =
II.getArgOperand(0);
1980 Value *Segment =
II.getArgOperand(1);
1989 if (StrippedSign != Src)
1992 if (
II.isStrictFP())
2014 unsigned Shift = SegmentVal * 53;
2019 static const uint32_t TwoByPi[] = {
2020 0xa2f9836e, 0x4e441529, 0xfc2757d1, 0xf534ddc0, 0xdb629599, 0x3c439041,
2021 0xfe5163ab, 0xdebbc561, 0xb7246e3a, 0x424dd2e0, 0x06492eea, 0x09d1921c,
2022 0xfe1deb1c, 0xb129a73e, 0xe88235f5, 0x2ebb4484, 0xe99c7026, 0xb45f7e41,
2023 0x3991d639, 0x835339f4, 0x9c845f8b, 0xbdf9283b, 0x1ff897ff, 0xde05980f,
2024 0xef2f118b, 0x5a0a6d1f, 0x6d367ecf, 0x27cb09b7, 0x4f463f66, 0x9e5fea2d,
2025 0x7527bac7, 0xebe5f17b, 0x3d0739f7, 0x8a5292ea, 0x6bfb5fb1, 0x1f8d5d08,
2029 unsigned Idx = Shift >> 5;
2030 if (Idx + 2 >= std::size(TwoByPi)) {
2035 unsigned BShift = Shift & 0x1f;
2039 Thi = (Thi << BShift) | (Tlo >> (64 - BShift));
2043 int Scale = -53 - Shift;
2050 case Intrinsic::amdgcn_fmul_legacy: {
2051 Value *Op0 =
II.getArgOperand(0);
2052 Value *Op1 =
II.getArgOperand(1);
2054 for (
Value *Src : {Op0, Op1}) {
2075 case Intrinsic::amdgcn_fma_legacy: {
2076 Value *Op0 =
II.getArgOperand(0);
2077 Value *Op1 =
II.getArgOperand(1);
2078 Value *Op2 =
II.getArgOperand(2);
2080 for (
Value *Src : {Op0, Op1, Op2}) {
2102 II.getModule(), Intrinsic::fma,
II.getType()));
2107 case Intrinsic::amdgcn_is_shared:
2108 case Intrinsic::amdgcn_is_private: {
2109 Value *Src =
II.getArgOperand(0);
2119 case Intrinsic::amdgcn_make_buffer_rsrc: {
2120 Value *Src =
II.getArgOperand(0);
2123 return std::nullopt;
2125 case Intrinsic::amdgcn_raw_buffer_store_format:
2126 case Intrinsic::amdgcn_struct_buffer_store_format:
2127 case Intrinsic::amdgcn_raw_tbuffer_store:
2128 case Intrinsic::amdgcn_struct_tbuffer_store:
2129 case Intrinsic::amdgcn_image_store_1d:
2130 case Intrinsic::amdgcn_image_store_1darray:
2131 case Intrinsic::amdgcn_image_store_2d:
2132 case Intrinsic::amdgcn_image_store_2darray:
2133 case Intrinsic::amdgcn_image_store_2darraymsaa:
2134 case Intrinsic::amdgcn_image_store_2dmsaa:
2135 case Intrinsic::amdgcn_image_store_3d:
2136 case Intrinsic::amdgcn_image_store_cube:
2137 case Intrinsic::amdgcn_image_store_mip_1d:
2138 case Intrinsic::amdgcn_image_store_mip_1darray:
2139 case Intrinsic::amdgcn_image_store_mip_2d:
2140 case Intrinsic::amdgcn_image_store_mip_2darray:
2141 case Intrinsic::amdgcn_image_store_mip_3d:
2142 case Intrinsic::amdgcn_image_store_mip_cube: {
2147 if (ST->hasDefaultComponentBroadcast())
2149 else if (ST->hasDefaultComponentZero())
2154 int DMaskIdx = getAMDGPUImageDMaskIntrinsic(
II.getIntrinsicID()) ? 1 : -1;
2162 case Intrinsic::amdgcn_prng_b32: {
2163 auto *Src =
II.getArgOperand(0);
2167 return std::nullopt;
2169 case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
2170 case Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
2171 Value *Src0 =
II.getArgOperand(0);
2172 Value *Src1 =
II.getArgOperand(1);
2178 auto getFormatNumRegs = [](
unsigned FormatVal) {
2179 switch (FormatVal) {
2193 bool MadeChange =
false;
2194 unsigned Src0NumElts = getFormatNumRegs(CBSZ);
2195 unsigned Src1NumElts = getFormatNumRegs(BLGP);
2199 if (Src0Ty->getNumElements() > Src0NumElts) {
2206 if (Src1Ty->getNumElements() > Src1NumElts) {
2214 return std::nullopt;
2225 case Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4:
2226 case Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
2227 case Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4: {
2228 Value *Src0 =
II.getArgOperand(1);
2229 Value *Src1 =
II.getArgOperand(3);
2235 bool MadeChange =
false;
2241 if (Src0Ty->getNumElements() > Src0NumElts) {
2248 if (Src1Ty->getNumElements() > Src1NumElts) {
2256 return std::nullopt;
2273 return std::nullopt;
2286 int DMaskIdx,
bool IsLoad) {
2289 :
II.getOperand(0)->getType());
2290 unsigned VWidth = IIVTy->getNumElements();
2293 Type *EltTy = IIVTy->getElementType();
2305 const unsigned UnusedComponentsAtFront = DemandedElts.
countr_zero();
2310 DemandedElts = (1 << ActiveBits) - 1;
2312 if (UnusedComponentsAtFront > 0) {
2313 static const unsigned InvalidOffsetIdx = 0xf;
2316 switch (
II.getIntrinsicID()) {
2317 case Intrinsic::amdgcn_raw_buffer_load:
2318 case Intrinsic::amdgcn_raw_ptr_buffer_load:
2321 case Intrinsic::amdgcn_s_buffer_load:
2325 if (ActiveBits == 4 && UnusedComponentsAtFront == 1)
2326 OffsetIdx = InvalidOffsetIdx;
2330 case Intrinsic::amdgcn_struct_buffer_load:
2331 case Intrinsic::amdgcn_struct_ptr_buffer_load:
2336 OffsetIdx = InvalidOffsetIdx;
2340 if (OffsetIdx != InvalidOffsetIdx) {
2342 DemandedElts &= ~((1 << UnusedComponentsAtFront) - 1);
2343 auto *
Offset = Args[OffsetIdx];
2344 unsigned SingleComponentSizeInBits =
2346 unsigned OffsetAdd =
2347 UnusedComponentsAtFront * SingleComponentSizeInBits / 8;
2348 auto *OffsetAddVal = ConstantInt::get(
Offset->getType(), OffsetAdd);
2365 unsigned NewDMaskVal = 0;
2366 unsigned OrigLdStIdx = 0;
2367 for (
unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) {
2368 const unsigned Bit = 1 << SrcIdx;
2369 if (!!(DMaskVal & Bit)) {
2370 if (!!DemandedElts[OrigLdStIdx])
2376 if (DMaskVal != NewDMaskVal)
2377 Args[DMaskIdx] = ConstantInt::get(DMask->
getType(), NewDMaskVal);
2380 unsigned NewNumElts = DemandedElts.
popcount();
2384 if (NewNumElts >= VWidth && DemandedElts.
isMask()) {
2386 II.setArgOperand(DMaskIdx, Args[DMaskIdx]);
2398 OverloadTys[0] = NewTy;
2402 for (
unsigned OrigStoreIdx = 0; OrigStoreIdx < VWidth; ++OrigStoreIdx)
2403 if (DemandedElts[OrigStoreIdx])
2406 if (NewNumElts == 1)
2413 II.getIntrinsicID(), OverloadTys, Args);
2416 AttributeList OldAttrList =
II.getAttributes();
2420 if (NewNumElts == 1) {
2426 unsigned NewLoadIdx = 0;
2427 for (
unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) {
2428 if (!!DemandedElts[OrigLoadIdx])
2444 APInt &UndefElts)
const {
2449 const unsigned FirstElt = DemandedElts.
countr_zero();
2451 const unsigned MaskLen = LastElt - FirstElt + 1;
2453 unsigned OldNumElts = VT->getNumElements();
2454 if (MaskLen == OldNumElts && MaskLen != 1)
2457 Type *EltTy = VT->getElementType();
2465 Value *Src =
II.getArgOperand(0);
2470 II.getOperandBundlesAsDefs(OpBundles);
2487 for (
unsigned I = 0;
I != MaskLen; ++
I) {
2488 if (DemandedElts[FirstElt +
I])
2489 ExtractMask[
I] = FirstElt +
I;
2498 for (
unsigned I = 0;
I != MaskLen; ++
I) {
2499 if (DemandedElts[FirstElt +
I])
2500 InsertMask[FirstElt +
I] =
I;
2512 SimplifyAndSetOp)
const {
2513 switch (
II.getIntrinsicID()) {
2514 case Intrinsic::amdgcn_readfirstlane:
2515 SimplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
2517 case Intrinsic::amdgcn_raw_buffer_load:
2518 case Intrinsic::amdgcn_raw_ptr_buffer_load:
2519 case Intrinsic::amdgcn_raw_buffer_load_format:
2520 case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
2521 case Intrinsic::amdgcn_raw_tbuffer_load:
2522 case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
2523 case Intrinsic::amdgcn_s_buffer_load:
2524 case Intrinsic::amdgcn_struct_buffer_load:
2525 case Intrinsic::amdgcn_struct_ptr_buffer_load:
2526 case Intrinsic::amdgcn_struct_buffer_load_format:
2527 case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
2528 case Intrinsic::amdgcn_struct_tbuffer_load:
2529 case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
2532 if (getAMDGPUImageDMaskIntrinsic(
II.getIntrinsicID())) {
2538 return std::nullopt;
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static Value * createPermlane16(IRBuilderBase &B, Value *Val, uint32_t Lo, uint32_t Hi)
Emit v_permlane16 with the precomputed lane-select halves.
static std::optional< unsigned > matchRowSharePattern(ArrayRef< uint8_t > Ids)
Match a row-share pattern: all 16 lanes of each row read the same source lane.
static bool matchMirrorPattern(ArrayRef< uint8_t > Ids)
Match an N-lane reversal (mirror) pattern.
static bool tryBuildShuffleMap(Value *Index, const GCNSubtarget &ST, SmallVectorImpl< uint8_t > &Ids, const DataLayout &DL)
Build the per-lane shuffle map by evaluating Index for every lane in the wave.
static std::optional< unsigned > matchQuadPermPattern(ArrayRef< uint8_t > Ids)
Match a 4-lane (quad) permutation, encoded as the v_mov_b32_dpp QUAD_PERM control word: bits[1:0]=Ids...
static std::optional< unsigned > matchDsSwizzleRotatePattern(ArrayRef< uint8_t > Ids)
Match a GFX9+ DS_SWIZZLE rotate-mode permutation: a cyclic left-rotation of all 32 lanes within each ...
static std::optional< unsigned > matchHalfRowPermPattern(ArrayRef< uint8_t > Ids)
Match an 8-lane arbitrary permutation, encoded as the v_mov_b32_dpp8 24-bit selector (three bits per ...
static std::optional< unsigned > matchRowXMaskPattern(ArrayRef< uint8_t > Ids)
Match an XOR mask pattern within each 16-lane row: Ids[J] == Mask ^ J, with Mask in [1,...
static constexpr auto matchHalfRowMirrorPattern
static Value * createPermlaneX16(IRBuilderBase &B, Value *Val, uint32_t Lo, uint32_t Hi)
Emit v_permlanex16 with the precomputed lane-select halves.
static bool isRowPattern(ArrayRef< uint8_t > Ids)
Match an N-lane row pattern: each lane in [0, N) reads from a source lane in the same N-lane row,...
static bool canContractSqrtToRsq(const FPMathOperator *SqrtOp)
Return true if it's legal to contract llvm.amdgcn.rcp(llvm.sqrt)
static bool isTriviallyUniform(const Use &U)
Return true if we can easily prove that use U is uniform.
static CallInst * rewriteCall(IRBuilderBase &B, CallInst &Old, Function &NewCallee, ArrayRef< Value * > Ops)
static Value * convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder)
static constexpr auto isFullRowPattern
static constexpr auto isQuadPattern
static APInt trimTrailingZerosInVector(InstCombiner &IC, Value *UseV, Instruction *I)
static uint64_t computePermlane16Masks(ArrayRef< uint8_t > Ids)
Pack a 16-lane permutation into a single 64-bit value: four bits per output lane, lane J in bits [J*4...
static bool matchHalfWaveSwapPattern(ArrayRef< uint8_t > Ids)
Match a half-wave swap: lane J reads from lane J ^ 32.
static bool hasPeriodicLayout(ArrayRef< uint8_t > Ids)
Lanes are partitioned into groups of Period; each group is a translated copy of the first: Ids[I] = I...
static std::optional< Instruction * > tryOptimizeShufflePattern(InstCombiner &IC, IntrinsicInst &II, const GCNSubtarget &ST)
Try to fold a wave_shuffle/ds_bpermute whose lane index is a constant function of the lane ID into a ...
static constexpr auto isHalfRowPattern
static APInt defaultComponentBroadcast(Value *V)
static std::optional< unsigned > matchDsSwizzleBitmaskPattern(ArrayRef< uint8_t > Ids)
Match a DS_SWIZZLE bitmask-mode permutation: dst_lane = ((src_lane & AND) | OR) ^ XOR with each mask ...
static Value * createDsSwizzle(IRBuilderBase &B, Value *Val, unsigned Offset, const DataLayout &DL)
Emit ds_swizzle with the given immediate, bitcasting/converting between pointer/float types and i32 a...
static std::optional< Instruction * > modifyIntrinsicCall(IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr, InstCombiner &IC, std::function< void(SmallVectorImpl< Value * > &, SmallVectorImpl< Type * > &)> Func)
Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with modified arguments (based on ...
static Value * matchShuffleToHWIntrinsic(IRBuilderBase &B, Value *Src, ArrayRef< uint8_t > Ids, const GCNSubtarget &ST, const DataLayout &DL)
Given a shuffle map, try to emit the best hardware intrinsic.
static std::optional< unsigned > matchRowRotatePattern(ArrayRef< uint8_t > Ids)
Match a 16-lane cyclic rotation; returns the rotation amount in [1, 15].
static bool isCrossRowPattern(ArrayRef< uint8_t > Ids)
Match a cross-row permutation suitable for v_permlanex16: every lane in the low 16-lane half reads fr...
static bool isThreadID(const GCNSubtarget &ST, Value *V)
static Value * createUpdateDpp(IRBuilderBase &B, Value *Val, unsigned Ctrl)
Emit v_mov_b32_dpp with the given control word, row/bank masks 0xF, and bound_ctrl=1 so out-of-bounds...
static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1, const APFloat &Src2)
static Value * simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, int DMaskIdx=-1, bool IsLoad=true)
Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
static std::optional< Instruction * > simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr, IntrinsicInst &II, InstCombiner &IC)
static bool canSafelyConvertTo16Bit(Value &V, bool IsFloat)
static Value * createMovDpp8(IRBuilderBase &B, Value *Val, unsigned Selector)
Emit v_mov_b32_dpp8 with the given 24-bit lane selector.
static Value * matchFPExtFromF16(Value *Arg)
Match an fpext from half to float, or a constant we can convert.
static constexpr auto matchFullRowMirrorPattern
static std::optional< unsigned > evalLaneExpr(Value *V, unsigned Lane, const GCNSubtarget &ST, const DataLayout &DL, unsigned Depth=0)
Evaluate V as a function of the lane ID and return its value on Lane, or std::nullopt if V is not a c...
static Value * createPermlane64(IRBuilderBase &B, Value *Val)
Emit v_permlane64 (swap of the two 32-lane halves of a wave64).
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Utilities for dealing with flags related to floating point properties and mode controls.
AMD GCN specific subclass of TargetSubtarget.
This file provides the interface for the instcombine pass implementation.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
Provides some synthesis utilities to produce sequences of values.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static constexpr roundingMode rmTowardZero
static constexpr roundingMode rmNearestTiesToEven
static const fltSemantics & IEEEhalf()
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
opStatus divide(const APFloat &RHS, roundingMode RM)
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
bool bitwiseIsEqual(const APFloat &RHS) const
bool isPosInfinity() const
const fltSemantics & getSemantics() const
APFloat makeQuiet() const
Assuming this is an IEEE-754 NaN value, quiet its signaling bit.
APInt bitcastToAPInt() const
bool isNegInfinity() const
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
uint64_t getZExtValue() const
Get zero extended value.
unsigned popcount() const
Count the number of bits set.
LLVM_ABI uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const
unsigned getActiveBits() const
Compute the number of active bits in the value.
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
unsigned countr_zero() const
Count the number of trailing zero bits.
bool isMask(unsigned numBits) const
Represent a constant reference to an array (0 or more elements consecutively in memory),...
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
size_t size() const
Get the array size.
static LLVM_ABI Attribute getWithDereferenceableBytes(LLVMContext &Context, uint64_t Bytes)
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
bool isTypeLegal(Type *Ty) const override
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
void setAttributes(AttributeList A)
Set the attributes for this call.
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
AttributeList getAttributes() const
Return the attributes for this call.
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
bool isFPPredicate() const
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
ConstantFP - Floating Point Values [float, double].
const APFloat & getValueAPF() const
static LLVM_ABI ConstantFP * getZero(Type *Ty, bool Negative=false)
static LLVM_ABI ConstantFP * getNaN(Type *Ty, bool Negative=false, uint64_t Payload=0)
static LLVM_ABI ConstantFP * getInfinity(Type *Ty, bool Negative=false)
This is the shared class of boolean and integer constants.
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
LLVM_ABI ConstantRange add(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an addition of a value in this ran...
LLVM_ABI bool isFullSet() const
Return true if this set contains all of the elements possible for this data-type.
LLVM_ABI ConstantRange intersectWith(const ConstantRange &CR, PreferredRangeType Type=Smallest) const
Return the range that results from the intersection of this range with another range.
This is an important base class in LLVM.
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Tagged union holding either a T or a Error.
This class represents an extension of floating point types.
Utility class for floating point operations which can have information about relaxed accuracy require...
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
bool hasApproxFunc() const
Test if this operation allows approximations of math library functions or intrinsics.
LLVM_ABI float getFPAccuracy() const
Get the maximum error permitted by this operation in ULPs.
Convenience struct for specifying and reasoning about fast-math flags.
bool allowContract() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
bool simplifyDemandedLaneMaskArg(InstCombiner &IC, IntrinsicInst &II, unsigned LaneAgIdx) const
Simplify a lane index operand (e.g.
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
Instruction * hoistLaneIntrinsicThroughOperand(InstCombiner &IC, IntrinsicInst &II) const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
KnownIEEEMode fpenvIEEEMode(const Instruction &I) const
Return KnownIEEEMode::On if we know if the use context can assume "amdgpu-ieee"="true" and KnownIEEEM...
Value * simplifyAMDGCNLaneIntrinsicDemanded(InstCombiner &IC, IntrinsicInst &II, const APInt &DemandedElts, APInt &UndefElts) const
bool canSimplifyLegacyMulToMul(const Instruction &I, const Value *Op0, const Value *Op1, InstCombiner &IC) const
Common base class shared among various IRBuilders.
LLVM_ABI CallInst * CreateIntrinsicWithoutFolding(Intrinsic::ID ID, ArrayRef< Type * > OverloadTypes, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="", ArrayRef< OperandBundleDef > OpBundles={})
Create a call to intrinsic ID with Args, mangled using OverloadTypes.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
ConstantInt * getTrue()
Get the constant value for i1 true.
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateExtractVector(Type *DstType, Value *SrcVec, Value *Idx, const Twine &Name="")
Create a call to the vector.extract intrinsic.
BasicBlock * GetInsertBlock() const
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Value * CreateMaxNum(Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")
Create call to the maxnum intrinsic.
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
LLVM_ABI Value * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > OverloadTypes, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="", ArrayRef< OperandBundleDef > OpBundles={}, function_ref< void(CallInst *)> SetFn=[](CallInst *) {})
Variant to create a possibly constant-folded intrinsic.
Value * CreateMaximumNum(Value *LHS, Value *RHS, const Twine &Name="")
Create call to the maximum intrinsic.
Value * CreateMinNum(Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")
Create call to the minnum intrinsic.
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateFAddFMF(Value *L, Value *R, FMFSource FMFSource, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateMinimumNum(Value *LHS, Value *RHS, const Twine &Name="")
Create call to the minimumnum intrinsic.
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateFMulFMF(Value *L, Value *R, FMFSource FMFSource, const Twine &Name="", MDNode *FPMD=nullptr)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
The core instruction combiner logic.
const DataLayout & getDataLayout() const
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
DominatorTree & getDominatorTree() const
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
virtual bool SimplifyDemandedBits(Instruction *I, unsigned OpNo, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0)=0
IRBuilder< TargetFolder, IRBuilderInstCombineInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
static Value * stripSignOnlyFPOps(Value *Val)
Ignore all operations which only change the sign of a value, returning the underlying magnitude value...
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
const SimplifyQuery & getSimplifyQuery() const
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
A wrapper class for inspecting calls to intrinsic functions.
A Module instance is used to store all the information related to an LLVM module.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
bool isVoidTy() const
Return true if this is 'void'.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI bool hasOneUser() const
Return true if there is exactly one user of this value.
LLVMContext & getContext() const
All values hold a context through their type.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY const MIMGOffsetMappingInfo * getMIMGOffsetMappingInfo(unsigned Offset)
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const ImageDimIntrinsicInfo * getImageDimIntrinsicByBaseOpcode(unsigned BaseOpcode, unsigned Dim)
LLVM_READONLY const MIMGMIPMappingInfo * getMIMGMIPMappingInfo(unsigned MIP)
bool isArgPassedInSGPR(const Argument *A)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY const MIMGBiasMappingInfo * getMIMGBiasMappingInfo(unsigned Bias)
LLVM_READONLY const MIMGLZMappingInfo * getMIMGLZMappingInfo(unsigned L)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI bool isSignatureValid(Intrinsic::ID ID, FunctionType *FT, SmallVectorImpl< Type * > &OverloadTys, raw_ostream &OS=nulls())
Returns true if FT is a valid function type for intrinsic ID.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
auto m_Cmp()
Matches any compare instruction and ignore it.
bool match(Val *V, const Pattern &P)
cstfp_pred_ty< is_any_zero_fp > m_AnyZeroFP()
Match a floating-point negative zero or positive zero.
ap_match< APFloat > m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
auto m_Value()
Match an arbitrary value and ignore it.
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
cstfp_pred_ty< is_finitenonzero > m_FiniteNonZero()
Match a finite non-zero FP constant.
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
auto m_ConstantFP()
Match an arbitrary ConstantFP and ignore it.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
auto dyn_cast_or_null(const Y &Val)
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 maxNum semantics.
constexpr unsigned MaxAnalysisRecursionDepth
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Returns: X * 2^Exp for integral exponents.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
LLVM_ABI Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
@ NearestTiesToEven
roundTiesToEven.
LLVM_ABI bool isKnownNeverInfOrNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point value can never contain a NaN or infinity.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
LLVM_ABI Constant * ConstantFoldInstOperands(const Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
constexpr uint64_t Make_64(uint32_t High, uint32_t Low)
Make a 64-bit integer from a high / low pair of 32-bit integers.
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, const SimplifyQuery &SQ, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
bool isConstant() const
Returns true if we know the value of all bits.
const APInt & getConstant() const
Returns the value when all bits have a known value.
SimplifyQuery getWithInstruction(const Instruction *I) const
LLVM_ABI bool isUndefValue(Value *V) const
If CanUseUndef is true, returns whether V is undef.