22#include "llvm/IR/IntrinsicsAMDGPU.h"
29#define DEBUG_TYPE "AMDGPUtti"
33struct AMDGPUImageDMaskIntrinsic {
37#define GET_AMDGPUImageDMaskIntrinsicTable_IMPL
38#include "AMDGPUGenSearchableTables.inc"
68 Type *VTy = V.getType();
77 APFloat FloatValue(ConstFloat->getValueAPF());
78 bool LosesInfo =
true;
87 APInt IntValue(ConstInt->getValue());
106 Type *VTy = V.getType();
144 bool RemoveOldIntr = &OldIntr != &InstToReplace;
153static std::optional<Instruction *>
158 if (
const auto *LZMappingInfo =
160 if (
auto *ConstantLod =
162 if (ConstantLod->isZero() || ConstantLod->isNegative()) {
167 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
168 Args.erase(Args.begin() + ImageDimIntr->LodIndex);
175 if (
const auto *MIPMappingInfo =
177 if (
auto *ConstantMip =
179 if (ConstantMip->isZero()) {
184 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
185 Args.erase(Args.begin() + ImageDimIntr->MipIndex);
192 if (
const auto *BiasMappingInfo =
194 if (
auto *ConstantBias =
196 if (ConstantBias->isZero()) {
201 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
202 Args.erase(Args.begin() + ImageDimIntr->BiasIndex);
203 ArgTys.erase(ArgTys.begin() + ImageDimIntr->BiasTyArg);
210 if (
const auto *OffsetMappingInfo =
212 if (
auto *ConstantOffset =
214 if (ConstantOffset->isZero()) {
217 OffsetMappingInfo->NoOffset, ImageDimIntr->
Dim);
219 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
220 Args.erase(Args.begin() + ImageDimIntr->OffsetIndex);
227 if (ST->hasD16Images()) {
237 if (
II.hasOneUse()) {
240 if (
User->getOpcode() == Instruction::FPTrunc &&
244 [&](
auto &Args,
auto &ArgTys) {
247 ArgTys[0] = User->getType();
256 bool AllHalfExtracts =
true;
258 for (
User *U :
II.users()) {
260 if (!Ext || !Ext->hasOneUse()) {
261 AllHalfExtracts =
false;
266 if (!Tr || !Tr->getType()->isHalfTy()) {
267 AllHalfExtracts =
false;
274 if (!ExtractTruncPairs.
empty() && AllHalfExtracts) {
283 SigTys[0] = HalfVecTy;
289 II.mutateType(HalfVecTy);
290 II.setCalledFunction(HalfDecl);
293 for (
auto &[Ext, Tr] : ExtractTruncPairs) {
294 Value *Idx = Ext->getIndexOperand();
296 Builder.SetInsertPoint(Tr);
298 Value *HalfExtract = Builder.CreateExtractElement(&
II, Idx);
301 Tr->replaceAllUsesWith(HalfExtract);
304 for (
auto &[Ext, Tr] : ExtractTruncPairs) {
315 if (!ST->hasA16() && !ST->hasG16())
322 bool FloatCoord =
false;
324 bool OnlyDerivatives =
false;
327 OperandIndex < ImageDimIntr->VAddrEnd; OperandIndex++) {
328 Value *Coord =
II.getOperand(OperandIndex);
331 if (OperandIndex < ImageDimIntr->CoordStart ||
336 OnlyDerivatives =
true;
345 if (!OnlyDerivatives && !ST->hasA16())
346 OnlyDerivatives =
true;
349 if (!OnlyDerivatives && ImageDimIntr->
NumBiasArgs != 0) {
352 "Only image instructions with a sampler can have a bias");
354 OnlyDerivatives =
true;
357 if (OnlyDerivatives && (!ST->hasG16() || ImageDimIntr->
GradientStart ==
365 II,
II,
II.getIntrinsicID(), IC, [&](
auto &Args,
auto &ArgTys) {
366 ArgTys[ImageDimIntr->GradientTyArg] = CoordType;
367 if (!OnlyDerivatives) {
368 ArgTys[ImageDimIntr->CoordTyArg] = CoordType;
371 if (ImageDimIntr->NumBiasArgs != 0)
372 ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext());
378 OperandIndex < EndIndex; OperandIndex++) {
380 convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder);
385 Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
386 Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder);
415 Value *Src =
nullptr;
418 if (Src->getType()->isHalfTy())
435 unsigned VWidth = VTy->getNumElements();
438 for (
int i = VWidth - 1; i > 0; --i) {
460 unsigned VWidth = VTy->getNumElements();
466 SVI->getShuffleMask(ShuffleMask);
468 for (
int I = VWidth - 1;
I > 0; --
I) {
469 if (ShuffleMask.empty()) {
520 unsigned LaneArgIdx)
const {
521 unsigned MaskBits = ST->getWavefrontSizeLog2();
535 Value *LaneArg =
II.getArgOperand(LaneArgIdx);
538 if (MaskedConst != LaneArg) {
539 II.getOperandUse(LaneArgIdx).set(MaskedConst);
551 CallInst *NewCall =
B.CreateCall(&NewCallee,
Ops, OpBundles);
559 const auto IID =
II.getIntrinsicID();
560 assert(IID == Intrinsic::amdgcn_readlane ||
561 IID == Intrinsic::amdgcn_readfirstlane ||
562 IID == Intrinsic::amdgcn_permlane64);
572 const bool IsReadLane = (IID == Intrinsic::amdgcn_readlane);
576 Value *LaneID =
nullptr;
578 LaneID =
II.getOperand(1);
592 const auto DoIt = [&](
unsigned OpIdx,
596 Ops.push_back(LaneID);
612 return DoIt(0,
II.getCalledFunction());
616 Type *SrcTy = Src->getType();
622 return DoIt(0, Remangled);
630 return DoIt(1,
II.getCalledFunction());
632 return DoIt(0,
II.getCalledFunction());
638std::optional<Instruction *>
642 case Intrinsic::amdgcn_rcp: {
643 Value *Src =
II.getArgOperand(0);
658 const APFloat &ArgVal =
C->getValueAPF();
676 auto IID = SrcCI->getIntrinsicID();
681 if (IID == Intrinsic::amdgcn_sqrt || IID == Intrinsic::sqrt) {
691 SrcCI->getModule(), Intrinsic::amdgcn_rsq, {SrcCI->getType()});
694 II.setFastMathFlags(InnerFMF);
696 II.setCalledFunction(NewDecl);
702 case Intrinsic::amdgcn_sqrt:
703 case Intrinsic::amdgcn_rsq:
704 case Intrinsic::amdgcn_tanh: {
705 Value *Src =
II.getArgOperand(0);
717 if (IID == Intrinsic::amdgcn_sqrt && Src->getType()->isHalfTy()) {
719 II.getModule(), Intrinsic::sqrt, {II.getType()});
720 II.setCalledFunction(NewDecl);
726 case Intrinsic::amdgcn_log:
727 case Intrinsic::amdgcn_exp2: {
728 const bool IsLog = IID == Intrinsic::amdgcn_log;
729 const bool IsExp = IID == Intrinsic::amdgcn_exp2;
730 Value *Src =
II.getArgOperand(0);
740 if (
C->isInfinity()) {
743 if (!
C->isNegative())
747 if (IsExp &&
C->isNegative())
755 Constant *Quieted = ConstantFP::get(Ty,
C->getValue().makeQuiet());
760 if (
C->isZero() || (
C->getValue().isDenormal() && Ty->isFloatTy())) {
762 : ConstantFP::get(Ty, 1.0);
766 if (IsLog &&
C->isNegative())
774 case Intrinsic::amdgcn_frexp_mant:
775 case Intrinsic::amdgcn_frexp_exp: {
776 Value *Src =
II.getArgOperand(0);
782 if (IID == Intrinsic::amdgcn_frexp_mant) {
784 II, ConstantFP::get(
II.getContext(), Significand));
804 case Intrinsic::amdgcn_class: {
805 Value *Src0 =
II.getArgOperand(0);
806 Value *Src1 =
II.getArgOperand(1);
810 II.getModule(), Intrinsic::is_fpclass, Src0->
getType()));
813 II.setArgOperand(1, ConstantInt::get(Src1->
getType(),
834 case Intrinsic::amdgcn_cvt_pkrtz: {
835 auto foldFPTruncToF16RTZ = [](
Value *Arg) ->
Value * {
848 return ConstantFP::get(HalfTy, Val);
851 Value *Src =
nullptr;
853 if (Src->getType()->isHalfTy())
860 if (
Value *Src0 = foldFPTruncToF16RTZ(
II.getArgOperand(0))) {
861 if (
Value *Src1 = foldFPTruncToF16RTZ(
II.getArgOperand(1))) {
871 case Intrinsic::amdgcn_cvt_pknorm_i16:
872 case Intrinsic::amdgcn_cvt_pknorm_u16:
873 case Intrinsic::amdgcn_cvt_pk_i16:
874 case Intrinsic::amdgcn_cvt_pk_u16: {
875 Value *Src0 =
II.getArgOperand(0);
876 Value *Src1 =
II.getArgOperand(1);
888 case Intrinsic::amdgcn_cvt_off_f32_i4: {
889 Value* Arg =
II.getArgOperand(0);
903 constexpr size_t ResValsSize = 16;
904 static constexpr float ResVals[ResValsSize] = {
905 0.0, 0.0625, 0.125, 0.1875, 0.25, 0.3125, 0.375, 0.4375,
906 -0.5, -0.4375, -0.375, -0.3125, -0.25, -0.1875, -0.125, -0.0625};
908 ConstantFP::get(Ty, ResVals[CArg->
getZExtValue() & (ResValsSize - 1)]);
911 case Intrinsic::amdgcn_ubfe:
912 case Intrinsic::amdgcn_sbfe: {
914 Value *Src =
II.getArgOperand(0);
921 unsigned IntSize = Ty->getIntegerBitWidth();
926 if ((Width & (IntSize - 1)) == 0) {
931 if (Width >= IntSize) {
933 II, 2, ConstantInt::get(CWidth->
getType(), Width & (IntSize - 1)));
944 ConstantInt::get(COffset->
getType(),
Offset & (IntSize - 1)));
948 bool Signed = IID == Intrinsic::amdgcn_sbfe;
950 if (!CWidth || !COffset)
960 if (
Offset + Width < IntSize) {
964 RightShift->takeName(&
II);
971 RightShift->takeName(&
II);
974 case Intrinsic::amdgcn_exp:
975 case Intrinsic::amdgcn_exp_row:
976 case Intrinsic::amdgcn_exp_compr: {
982 bool IsCompr = IID == Intrinsic::amdgcn_exp_compr;
984 for (
int I = 0;
I < (IsCompr ? 2 : 4); ++
I) {
985 if ((!IsCompr && (EnBits & (1 <<
I)) == 0) ||
986 (IsCompr && ((EnBits & (0x3 << (2 *
I))) == 0))) {
987 Value *Src =
II.getArgOperand(
I + 2);
1001 case Intrinsic::amdgcn_fmed3: {
1002 Value *Src0 =
II.getArgOperand(0);
1003 Value *Src1 =
II.getArgOperand(1);
1004 Value *Src2 =
II.getArgOperand(2);
1006 for (
Value *Src : {Src0, Src1, Src2}) {
1011 if (
II.isStrictFP())
1048 const APFloat *ConstSrc0 =
nullptr;
1049 const APFloat *ConstSrc1 =
nullptr;
1050 const APFloat *ConstSrc2 =
nullptr;
1055 const bool IsPosInfinity = ConstSrc0 && ConstSrc0->
isPosInfinity();
1075 const bool IsPosInfinity = ConstSrc1 && ConstSrc1->
isPosInfinity();
1098 auto *Quieted = ConstantFP::get(
II.getType(), ConstSrc2->
makeQuiet());
1118 CI->copyFastMathFlags(&
II);
1144 II.setArgOperand(0, Src0);
1145 II.setArgOperand(1, Src1);
1146 II.setArgOperand(2, Src2);
1156 ConstantFP::get(
II.getType(), Result));
1161 if (!ST->hasMed3_16())
1170 IID, {
X->getType()}, {
X,
Y, Z}, &
II,
II.getName());
1178 case Intrinsic::amdgcn_icmp:
1179 case Intrinsic::amdgcn_fcmp: {
1183 bool IsInteger = IID == Intrinsic::amdgcn_icmp;
1190 Value *Src0 =
II.getArgOperand(0);
1191 Value *Src1 =
II.getArgOperand(1);
1211 II.getType(), Args);
1212 NewCall->
addFnAttr(Attribute::Convergent);
1220 II.setArgOperand(0, Src1);
1221 II.setArgOperand(1, Src0);
1223 2, ConstantInt::get(CC->
getType(),
static_cast<int>(SwapPred)));
1270 ? Intrinsic::amdgcn_fcmp
1271 : Intrinsic::amdgcn_icmp;
1276 unsigned Width = CmpType->getBitWidth();
1277 unsigned NewWidth = Width;
1285 else if (Width <= 32)
1287 else if (Width <= 64)
1292 if (Width != NewWidth) {
1302 }
else if (!Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isHalfTy())
1305 Value *Args[] = {SrcLHS, SrcRHS,
1306 ConstantInt::get(CC->
getType(), SrcPred)};
1308 NewIID, {
II.getType(), SrcLHS->
getType()}, Args);
1315 case Intrinsic::amdgcn_mbcnt_hi: {
1321 case Intrinsic::amdgcn_ballot: {
1322 Value *Arg =
II.getArgOperand(0);
1327 if (Src->isZero()) {
1332 if (ST->isWave32() &&
II.getType()->getIntegerBitWidth() == 64) {
1339 {IC.Builder.getInt32Ty()},
1340 {II.getArgOperand(0)}),
1347 case Intrinsic::amdgcn_wavefrontsize: {
1348 if (ST->isWaveSizeKnown())
1350 II, ConstantInt::get(
II.getType(), ST->getWavefrontSize()));
1353 case Intrinsic::amdgcn_wqm_vote: {
1360 case Intrinsic::amdgcn_kill: {
1362 if (!
C || !
C->getZExtValue())
1368 case Intrinsic::amdgcn_update_dpp: {
1369 Value *Old =
II.getArgOperand(0);
1374 if (BC->isZeroValue() || RM->getZExtValue() != 0xF ||
1381 case Intrinsic::amdgcn_permlane16:
1382 case Intrinsic::amdgcn_permlane16_var:
1383 case Intrinsic::amdgcn_permlanex16:
1384 case Intrinsic::amdgcn_permlanex16_var: {
1386 Value *VDstIn =
II.getArgOperand(0);
1391 unsigned int FiIdx = (IID == Intrinsic::amdgcn_permlane16 ||
1392 IID == Intrinsic::amdgcn_permlanex16)
1399 unsigned int BcIdx = FiIdx + 1;
1408 case Intrinsic::amdgcn_permlane64:
1409 case Intrinsic::amdgcn_readfirstlane:
1410 case Intrinsic::amdgcn_readlane:
1411 case Intrinsic::amdgcn_ds_bpermute: {
1413 unsigned SrcIdx = IID == Intrinsic::amdgcn_ds_bpermute ? 1 : 0;
1414 const Use &Src =
II.getArgOperandUse(SrcIdx);
1418 if (IID == Intrinsic::amdgcn_readlane &&
1425 if (IID == Intrinsic::amdgcn_ds_bpermute) {
1426 const Use &Lane =
II.getArgOperandUse(0);
1430 II.getModule(), Intrinsic::amdgcn_readlane,
II.getType());
1431 II.setCalledFunction(NewDecl);
1432 II.setOperand(0, Src);
1433 II.setOperand(1, NewLane);
1438 if (IID != Intrinsic::amdgcn_ds_bpermute) {
1443 return std::nullopt;
1445 case Intrinsic::amdgcn_writelane: {
1449 return std::nullopt;
1451 case Intrinsic::amdgcn_trig_preop: {
1454 if (!
II.getType()->isDoubleTy())
1457 Value *Src =
II.getArgOperand(0);
1458 Value *Segment =
II.getArgOperand(1);
1463 auto *QNaN = ConstantFP::get(
1472 if (
II.isStrictFP())
1477 auto *Quieted = ConstantFP::get(
II.getType(), Fsrc.
makeQuiet());
1487 unsigned Shift = SegmentVal * 53;
1492 static const uint32_t TwoByPi[] = {
1493 0xa2f9836e, 0x4e441529, 0xfc2757d1, 0xf534ddc0, 0xdb629599, 0x3c439041,
1494 0xfe5163ab, 0xdebbc561, 0xb7246e3a, 0x424dd2e0, 0x06492eea, 0x09d1921c,
1495 0xfe1deb1c, 0xb129a73e, 0xe88235f5, 0x2ebb4484, 0xe99c7026, 0xb45f7e41,
1496 0x3991d639, 0x835339f4, 0x9c845f8b, 0xbdf9283b, 0x1ff897ff, 0xde05980f,
1497 0xef2f118b, 0x5a0a6d1f, 0x6d367ecf, 0x27cb09b7, 0x4f463f66, 0x9e5fea2d,
1498 0x7527bac7, 0xebe5f17b, 0x3d0739f7, 0x8a5292ea, 0x6bfb5fb1, 0x1f8d5d08,
1502 unsigned Idx = Shift >> 5;
1503 if (Idx + 2 >= std::size(TwoByPi)) {
1508 unsigned BShift = Shift & 0x1f;
1512 Thi = (Thi << BShift) | (Tlo >> (64 - BShift));
1516 int Scale = -53 - Shift;
1523 case Intrinsic::amdgcn_fmul_legacy: {
1524 Value *Op0 =
II.getArgOperand(0);
1525 Value *Op1 =
II.getArgOperand(1);
1527 for (
Value *Src : {Op0, Op1}) {
1548 case Intrinsic::amdgcn_fma_legacy: {
1549 Value *Op0 =
II.getArgOperand(0);
1550 Value *Op1 =
II.getArgOperand(1);
1551 Value *Op2 =
II.getArgOperand(2);
1553 for (
Value *Src : {Op0, Op1, Op2}) {
1575 II.getModule(), Intrinsic::fma,
II.getType()));
1580 case Intrinsic::amdgcn_is_shared:
1581 case Intrinsic::amdgcn_is_private: {
1582 Value *Src =
II.getArgOperand(0);
1592 case Intrinsic::amdgcn_make_buffer_rsrc: {
1593 Value *Src =
II.getArgOperand(0);
1596 return std::nullopt;
1598 case Intrinsic::amdgcn_raw_buffer_store_format:
1599 case Intrinsic::amdgcn_struct_buffer_store_format:
1600 case Intrinsic::amdgcn_raw_tbuffer_store:
1601 case Intrinsic::amdgcn_struct_tbuffer_store:
1602 case Intrinsic::amdgcn_image_store_1d:
1603 case Intrinsic::amdgcn_image_store_1darray:
1604 case Intrinsic::amdgcn_image_store_2d:
1605 case Intrinsic::amdgcn_image_store_2darray:
1606 case Intrinsic::amdgcn_image_store_2darraymsaa:
1607 case Intrinsic::amdgcn_image_store_2dmsaa:
1608 case Intrinsic::amdgcn_image_store_3d:
1609 case Intrinsic::amdgcn_image_store_cube:
1610 case Intrinsic::amdgcn_image_store_mip_1d:
1611 case Intrinsic::amdgcn_image_store_mip_1darray:
1612 case Intrinsic::amdgcn_image_store_mip_2d:
1613 case Intrinsic::amdgcn_image_store_mip_2darray:
1614 case Intrinsic::amdgcn_image_store_mip_3d:
1615 case Intrinsic::amdgcn_image_store_mip_cube: {
1620 if (ST->hasDefaultComponentBroadcast())
1622 else if (ST->hasDefaultComponentZero())
1627 int DMaskIdx = getAMDGPUImageDMaskIntrinsic(
II.getIntrinsicID()) ? 1 : -1;
1635 case Intrinsic::amdgcn_prng_b32: {
1636 auto *Src =
II.getArgOperand(0);
1640 return std::nullopt;
1642 case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
1643 case Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
1644 Value *Src0 =
II.getArgOperand(0);
1645 Value *Src1 =
II.getArgOperand(1);
1651 auto getFormatNumRegs = [](
unsigned FormatVal) {
1652 switch (FormatVal) {
1666 bool MadeChange =
false;
1667 unsigned Src0NumElts = getFormatNumRegs(CBSZ);
1668 unsigned Src1NumElts = getFormatNumRegs(BLGP);
1672 if (Src0Ty->getNumElements() > Src0NumElts) {
1679 if (Src1Ty->getNumElements() > Src1NumElts) {
1687 return std::nullopt;
1698 case Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4:
1699 case Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
1700 case Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4: {
1701 Value *Src0 =
II.getArgOperand(1);
1702 Value *Src1 =
II.getArgOperand(3);
1708 bool MadeChange =
false;
1714 if (Src0Ty->getNumElements() > Src0NumElts) {
1721 if (Src1Ty->getNumElements() > Src1NumElts) {
1729 return std::nullopt;
1746 return std::nullopt;
1759 int DMaskIdx,
bool IsLoad) {
1762 :
II.getOperand(0)->getType());
1763 unsigned VWidth = IIVTy->getNumElements();
1766 Type *EltTy = IIVTy->getElementType();
1778 const unsigned UnusedComponentsAtFront = DemandedElts.
countr_zero();
1783 DemandedElts = (1 << ActiveBits) - 1;
1785 if (UnusedComponentsAtFront > 0) {
1786 static const unsigned InvalidOffsetIdx = 0xf;
1789 switch (
II.getIntrinsicID()) {
1790 case Intrinsic::amdgcn_raw_buffer_load:
1791 case Intrinsic::amdgcn_raw_ptr_buffer_load:
1794 case Intrinsic::amdgcn_s_buffer_load:
1798 if (ActiveBits == 4 && UnusedComponentsAtFront == 1)
1799 OffsetIdx = InvalidOffsetIdx;
1803 case Intrinsic::amdgcn_struct_buffer_load:
1804 case Intrinsic::amdgcn_struct_ptr_buffer_load:
1809 OffsetIdx = InvalidOffsetIdx;
1813 if (OffsetIdx != InvalidOffsetIdx) {
1815 DemandedElts &= ~((1 << UnusedComponentsAtFront) - 1);
1816 auto *
Offset = Args[OffsetIdx];
1817 unsigned SingleComponentSizeInBits =
1819 unsigned OffsetAdd =
1820 UnusedComponentsAtFront * SingleComponentSizeInBits / 8;
1821 auto *OffsetAddVal = ConstantInt::get(
Offset->getType(), OffsetAdd);
1838 unsigned NewDMaskVal = 0;
1839 unsigned OrigLdStIdx = 0;
1840 for (
unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) {
1841 const unsigned Bit = 1 << SrcIdx;
1842 if (!!(DMaskVal & Bit)) {
1843 if (!!DemandedElts[OrigLdStIdx])
1849 if (DMaskVal != NewDMaskVal)
1850 Args[DMaskIdx] = ConstantInt::get(DMask->
getType(), NewDMaskVal);
1853 unsigned NewNumElts = DemandedElts.
popcount();
1857 if (NewNumElts >= VWidth && DemandedElts.
isMask()) {
1859 II.setArgOperand(DMaskIdx, Args[DMaskIdx]);
1871 OverloadTys[0] = NewTy;
1875 for (
unsigned OrigStoreIdx = 0; OrigStoreIdx < VWidth; ++OrigStoreIdx)
1876 if (DemandedElts[OrigStoreIdx])
1879 if (NewNumElts == 1)
1891 if (NewNumElts == 1) {
1897 unsigned NewLoadIdx = 0;
1898 for (
unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) {
1899 if (!!DemandedElts[OrigLoadIdx])
1915 APInt &UndefElts)
const {
1920 const unsigned FirstElt = DemandedElts.
countr_zero();
1922 const unsigned MaskLen = LastElt - FirstElt + 1;
1924 unsigned OldNumElts = VT->getNumElements();
1925 if (MaskLen == OldNumElts && MaskLen != 1)
1928 Type *EltTy = VT->getElementType();
1936 Value *Src =
II.getArgOperand(0);
1941 II.getOperandBundlesAsDefs(OpBundles);
1958 for (
unsigned I = 0;
I != MaskLen; ++
I) {
1959 if (DemandedElts[FirstElt +
I])
1960 ExtractMask[
I] = FirstElt +
I;
1969 for (
unsigned I = 0;
I != MaskLen; ++
I) {
1970 if (DemandedElts[FirstElt +
I])
1971 InsertMask[FirstElt +
I] =
I;
1983 SimplifyAndSetOp)
const {
1984 switch (
II.getIntrinsicID()) {
1985 case Intrinsic::amdgcn_readfirstlane:
1986 SimplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
1988 case Intrinsic::amdgcn_raw_buffer_load:
1989 case Intrinsic::amdgcn_raw_ptr_buffer_load:
1990 case Intrinsic::amdgcn_raw_buffer_load_format:
1991 case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
1992 case Intrinsic::amdgcn_raw_tbuffer_load:
1993 case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
1994 case Intrinsic::amdgcn_s_buffer_load:
1995 case Intrinsic::amdgcn_struct_buffer_load:
1996 case Intrinsic::amdgcn_struct_ptr_buffer_load:
1997 case Intrinsic::amdgcn_struct_buffer_load_format:
1998 case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
1999 case Intrinsic::amdgcn_struct_tbuffer_load:
2000 case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
2003 if (getAMDGPUImageDMaskIntrinsic(
II.getIntrinsicID())) {
2009 return std::nullopt;
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool canContractSqrtToRsq(const FPMathOperator *SqrtOp)
Return true if it's legal to contract llvm.amdgcn.rcp(llvm.sqrt)
static bool isTriviallyUniform(const Use &U)
Return true if we can easily prove that use U is uniform.
static CallInst * rewriteCall(IRBuilderBase &B, CallInst &Old, Function &NewCallee, ArrayRef< Value * > Ops)
static Value * convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder)
static APInt trimTrailingZerosInVector(InstCombiner &IC, Value *UseV, Instruction *I)
static APInt defaultComponentBroadcast(Value *V)
static std::optional< Instruction * > modifyIntrinsicCall(IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr, InstCombiner &IC, std::function< void(SmallVectorImpl< Value * > &, SmallVectorImpl< Type * > &)> Func)
Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with modified arguments (based on ...
static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1, const APFloat &Src2)
static Value * simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, int DMaskIdx=-1, bool IsLoad=true)
Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
static std::optional< Instruction * > simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr, IntrinsicInst &II, InstCombiner &IC)
static bool canSafelyConvertTo16Bit(Value &V, bool IsFloat)
static Value * matchFPExtFromF16(Value *Arg)
Match an fpext from half to float, or a constant we can convert.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Utilities for dealing with flags related to floating point properties and mode controls.
AMD GCN specific subclass of TargetSubtarget.
This file provides the interface for the instcombine pass implementation.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
cmpResult
IEEE-754R 5.11: Floating Point Comparison Relations.
static constexpr roundingMode rmTowardZero
static constexpr roundingMode rmNearestTiesToEven
static const fltSemantics & IEEEhalf()
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
opStatus divide(const APFloat &RHS, roundingMode RM)
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
bool isPosInfinity() const
const fltSemantics & getSemantics() const
APFloat makeQuiet() const
Assuming this is an IEEE-754 NaN value, quiet its signaling bit.
APInt bitcastToAPInt() const
bool isNegInfinity() const
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
cmpResult compare(const APFloat &RHS) const
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
uint64_t getZExtValue() const
Get zero extended value.
unsigned popcount() const
Count the number of bits set.
unsigned getActiveBits() const
Compute the number of active bits in the value.
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
unsigned countr_zero() const
Count the number of trailing zero bits.
bool isMask(unsigned numBits) const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
bool isTypeLegal(Type *Ty) const override
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
bool isFPPredicate() const
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
ConstantFP - Floating Point Values [float, double].
const APFloat & getValueAPF() const
static LLVM_ABI Constant * getInfinity(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getZero(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getNaN(Type *Ty, bool Negative=false, uint64_t Payload=0)
This is the shared class of boolean and integer constants.
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This class represents an extension of floating point types.
Utility class for floating point operations which can have information about relaxed accuracy require...
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
bool hasApproxFunc() const
Test if this operation allows approximations of math library functions or intrinsics.
LLVM_ABI float getFPAccuracy() const
Get the maximum error permitted by this operation in ULPs.
Convenience struct for specifying and reasoning about fast-math flags.
bool allowContract() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
bool simplifyDemandedLaneMaskArg(InstCombiner &IC, IntrinsicInst &II, unsigned LaneAgIdx) const
Simplify a lane index operand (e.g.
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
Instruction * hoistLaneIntrinsicThroughOperand(InstCombiner &IC, IntrinsicInst &II) const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
KnownIEEEMode fpenvIEEEMode(const Instruction &I) const
Return KnownIEEEMode::On if we know if the use context can assume "amdgpu-ieee"="true" and KnownIEEEM...
Value * simplifyAMDGCNLaneIntrinsicDemanded(InstCombiner &IC, IntrinsicInst &II, const APInt &DemandedElts, APInt &UndefElts) const
bool canSimplifyLegacyMulToMul(const Instruction &I, const Value *Op0, const Value *Op1, InstCombiner &IC) const
Common base class shared among various IRBuilders.
CallInst * CreateExtractVector(Type *DstType, Value *SrcVec, Value *Idx, const Twine &Name="")
Create a call to the vector.extract intrinsic.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
BasicBlock * GetInsertBlock() const
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateMaxNum(Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")
Create call to the maxnum intrinsic.
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateMaximumNum(Value *LHS, Value *RHS, const Twine &Name="")
Create call to the maximum intrinsic.
Value * CreateMinNum(Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")
Create call to the minnum intrinsic.
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateFAddFMF(Value *L, Value *R, FMFSource FMFSource, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateMinimumNum(Value *LHS, Value *RHS, const Twine &Name="")
Create call to the minimumnum intrinsic.
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateFMulFMF(Value *L, Value *R, FMFSource FMFSource, const Twine &Name="", MDNode *FPMD=nullptr)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
The core instruction combiner logic.
const DataLayout & getDataLayout() const
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
IRBuilder< TargetFolder, IRBuilderCallbackInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
DominatorTree & getDominatorTree() const
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
virtual bool SimplifyDemandedBits(Instruction *I, unsigned OpNo, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0)=0
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
const SimplifyQuery & getSimplifyQuery() const
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
A wrapper class for inspecting calls to intrinsic functions.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
A Module instance is used to store all the information related to an LLVM module.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
bool isVoidTy() const
Return true if this is 'void'.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI bool hasOneUser() const
Return true if there is exactly one user of this value.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY const MIMGOffsetMappingInfo * getMIMGOffsetMappingInfo(unsigned Offset)
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const ImageDimIntrinsicInfo * getImageDimIntrinsicByBaseOpcode(unsigned BaseOpcode, unsigned Dim)
LLVM_READONLY const MIMGMIPMappingInfo * getMIMGMIPMappingInfo(unsigned MIP)
bool isArgPassedInSGPR(const Argument *A)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY const MIMGBiasMappingInfo * getMIMGBiasMappingInfo(unsigned Bias)
LLVM_READONLY const MIMGLZMappingInfo * getMIMGLZMappingInfo(unsigned L)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
bool match(Val *V, const Pattern &P)
cstfp_pred_ty< is_any_zero_fp > m_AnyZeroFP()
Match a floating-point negative zero or positive zero.
ap_match< APFloat > m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
class_match< ConstantFP > m_ConstantFP()
Match an arbitrary ConstantFP and ignore it.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
cstfp_pred_ty< is_finitenonzero > m_FiniteNonZero()
Match a finite non-zero FP constant.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 maxNum semantics.
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Returns: X * 2^Exp for integral exponents.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
LLVM_ABI Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
@ NearestTiesToEven
roundTiesToEven.
LLVM_ABI bool isKnownNeverInfOrNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point value can never contain a NaN or infinity.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
constexpr uint64_t Make_64(uint32_t High, uint32_t Low)
Make a 64-bit integer from a high / low pair of 32-bit integers.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
bool isConstant() const
Returns true if we know the value of all bits.
const APInt & getConstant() const
Returns the value when all bits have a known value.
SimplifyQuery getWithInstruction(const Instruction *I) const
LLVM_ABI bool isUndefValue(Value *V) const
If CanUseUndef is true, returns whether V is undef.