22#include "llvm/IR/IntrinsicsAMDGPU.h"
29#define DEBUG_TYPE "AMDGPUtti"
33struct AMDGPUImageDMaskIntrinsic {
37#define GET_AMDGPUImageDMaskIntrinsicTable_IMPL
38#include "AMDGPUGenSearchableTables.inc"
68 Type *VTy = V.getType();
77 APFloat FloatValue(ConstFloat->getValueAPF());
78 bool LosesInfo =
true;
87 APInt IntValue(ConstInt->getValue());
106 Type *VTy = V.getType();
144 bool RemoveOldIntr = &OldIntr != &InstToReplace;
153static std::optional<Instruction *>
158 if (
const auto *LZMappingInfo =
160 if (
auto *ConstantLod =
162 if (ConstantLod->isZero() || ConstantLod->isNegative()) {
167 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
168 Args.erase(Args.begin() + ImageDimIntr->LodIndex);
175 if (
const auto *MIPMappingInfo =
177 if (
auto *ConstantMip =
179 if (ConstantMip->isZero()) {
184 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
185 Args.erase(Args.begin() + ImageDimIntr->MipIndex);
192 if (
const auto *BiasMappingInfo =
194 if (
auto *ConstantBias =
196 if (ConstantBias->isZero()) {
201 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
202 Args.erase(Args.begin() + ImageDimIntr->BiasIndex);
203 ArgTys.erase(ArgTys.begin() + ImageDimIntr->BiasTyArg);
210 if (
const auto *OffsetMappingInfo =
212 if (
auto *ConstantOffset =
214 if (ConstantOffset->isZero()) {
217 OffsetMappingInfo->NoOffset, ImageDimIntr->
Dim);
219 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
220 Args.erase(Args.begin() + ImageDimIntr->OffsetIndex);
227 if (ST->hasD16Images()) {
237 if (
II.hasOneUse()) {
240 if (
User->getOpcode() == Instruction::FPTrunc &&
244 [&](
auto &Args,
auto &ArgTys) {
247 ArgTys[0] = User->getType();
256 bool AllHalfExtracts =
true;
258 for (
User *U :
II.users()) {
260 if (!Ext || !Ext->hasOneUse()) {
261 AllHalfExtracts =
false;
266 if (!Tr || !Tr->getType()->isHalfTy()) {
267 AllHalfExtracts =
false;
274 if (!ExtractTruncPairs.
empty() && AllHalfExtracts) {
283 SigTys[0] = HalfVecTy;
289 II.mutateType(HalfVecTy);
290 II.setCalledFunction(HalfDecl);
293 for (
auto &[Ext, Tr] : ExtractTruncPairs) {
294 Value *Idx = Ext->getIndexOperand();
296 Builder.SetInsertPoint(Tr);
298 Value *HalfExtract = Builder.CreateExtractElement(&
II, Idx);
301 Tr->replaceAllUsesWith(HalfExtract);
304 for (
auto &[Ext, Tr] : ExtractTruncPairs) {
315 if (!ST->hasA16() && !ST->hasG16())
322 bool FloatCoord =
false;
324 bool OnlyDerivatives =
false;
327 OperandIndex < ImageDimIntr->VAddrEnd; OperandIndex++) {
328 Value *Coord =
II.getOperand(OperandIndex);
331 if (OperandIndex < ImageDimIntr->CoordStart ||
336 OnlyDerivatives =
true;
345 if (!OnlyDerivatives && !ST->hasA16())
346 OnlyDerivatives =
true;
349 if (!OnlyDerivatives && ImageDimIntr->
NumBiasArgs != 0) {
352 "Only image instructions with a sampler can have a bias");
354 OnlyDerivatives =
true;
357 if (OnlyDerivatives && (!ST->hasG16() || ImageDimIntr->
GradientStart ==
365 II,
II,
II.getIntrinsicID(), IC, [&](
auto &Args,
auto &ArgTys) {
366 ArgTys[ImageDimIntr->GradientTyArg] = CoordType;
367 if (!OnlyDerivatives) {
368 ArgTys[ImageDimIntr->CoordTyArg] = CoordType;
371 if (ImageDimIntr->NumBiasArgs != 0)
372 ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext());
378 OperandIndex < EndIndex; OperandIndex++) {
380 convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder);
385 Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
386 Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder);
415 Value *Src =
nullptr;
418 if (Src->getType()->isHalfTy())
435 unsigned VWidth = VTy->getNumElements();
438 for (
int i = VWidth - 1; i > 0; --i) {
460 unsigned VWidth = VTy->getNumElements();
466 SVI->getShuffleMask(ShuffleMask);
468 for (
int I = VWidth - 1;
I > 0; --
I) {
469 if (ShuffleMask.empty()) {
520 unsigned LaneArgIdx)
const {
521 unsigned MaskBits = ST->getWavefrontSizeLog2();
535 Value *LaneArg =
II.getArgOperand(LaneArgIdx);
538 if (MaskedConst != LaneArg) {
539 II.getOperandUse(LaneArgIdx).set(MaskedConst);
551 CallInst *NewCall =
B.CreateCall(&NewCallee,
Ops, OpBundles);
567 if (ST.isWave32() &&
match(V, W32Pred))
569 if (ST.isWave64() &&
match(V, W64Pred))
577static std::optional<Instruction *>
579 Value *Val =
II.getArgOperand(0);
580 Value *Idx =
II.getArgOperand(1);
584 if (!ST.isWaveSizeKnown() || !ST.hasDPPRowShare())
590 bool CanDPP16RowShare =
false;
594 uint64_t MaskCheck = (1UL << ST.getWavefrontSizeLog2()) - 1;
595 uint64_t MaskTarget = MaskCheck & 0xF0;
608 if ((Mask & MaskCheck) != MaskTarget)
612 CanDPP16RowShare =
true;
613 }
else if (
match(Idx, RowSharePred) &&
isThreadID(ST, Tid) && RowIdx < 15 &&
615 if ((Mask & MaskCheck) != MaskTarget)
618 CanDPP16RowShare =
true;
621 CanDPP16RowShare =
true;
624 if (CanDPP16RowShare) {
626 B.CreateIntrinsic(Intrinsic::amdgcn_update_dpp, Val->
getType(),
627 {PoisonValue::get(Val->getType()), Val,
628 B.getInt32(AMDGPU::DPP::ROW_SHARE0 | RowIdx),
629 B.getInt32(0xF), B.getInt32(0xF), B.getFalse()});
642 const auto IID =
II.getIntrinsicID();
643 assert(IID == Intrinsic::amdgcn_readlane ||
644 IID == Intrinsic::amdgcn_readfirstlane ||
645 IID == Intrinsic::amdgcn_permlane64);
655 const bool IsReadLane = (IID == Intrinsic::amdgcn_readlane);
659 Value *LaneID =
nullptr;
661 LaneID =
II.getOperand(1);
675 const auto DoIt = [&](
unsigned OpIdx,
679 Ops.push_back(LaneID);
695 return DoIt(0,
II.getCalledFunction());
699 Type *SrcTy = Src->getType();
705 return DoIt(0, Remangled);
713 return DoIt(1,
II.getCalledFunction());
715 return DoIt(0,
II.getCalledFunction());
721std::optional<Instruction *>
725 case Intrinsic::amdgcn_implicitarg_ptr: {
726 if (
II.getFunction()->hasFnAttribute(
"amdgpu-no-implicitarg-ptr"))
728 uint64_t ImplicitArgBytes = ST->getImplicitArgNumBytes(*
II.getFunction());
731 II.getAttributes().getRetDereferenceableOrNullBytes();
732 if (CurrentOrNullBytes != 0) {
735 uint64_t NewBytes = std::max(CurrentOrNullBytes, ImplicitArgBytes);
738 II.removeRetAttr(Attribute::DereferenceableOrNull);
742 uint64_t CurrentBytes =
II.getAttributes().getRetDereferenceableBytes();
743 uint64_t NewBytes = std::max(CurrentBytes, ImplicitArgBytes);
744 if (NewBytes != CurrentBytes) {
752 case Intrinsic::amdgcn_rcp: {
753 Value *Src =
II.getArgOperand(0);
768 const APFloat &ArgVal =
C->getValueAPF();
786 auto IID = SrcCI->getIntrinsicID();
791 if (IID == Intrinsic::amdgcn_sqrt || IID == Intrinsic::sqrt) {
801 SrcCI->getModule(), Intrinsic::amdgcn_rsq, {SrcCI->getType()});
804 II.setFastMathFlags(InnerFMF);
806 II.setCalledFunction(NewDecl);
812 case Intrinsic::amdgcn_sqrt:
813 case Intrinsic::amdgcn_rsq:
814 case Intrinsic::amdgcn_tanh: {
815 Value *Src =
II.getArgOperand(0);
827 if (IID == Intrinsic::amdgcn_sqrt && Src->getType()->isHalfTy()) {
829 II.getModule(), Intrinsic::sqrt, {II.getType()});
830 II.setCalledFunction(NewDecl);
836 case Intrinsic::amdgcn_log:
837 case Intrinsic::amdgcn_exp2: {
838 const bool IsLog = IID == Intrinsic::amdgcn_log;
839 const bool IsExp = IID == Intrinsic::amdgcn_exp2;
840 Value *Src =
II.getArgOperand(0);
850 if (
C->isInfinity()) {
853 if (!
C->isNegative())
857 if (IsExp &&
C->isNegative())
865 Constant *Quieted = ConstantFP::get(Ty,
C->getValue().makeQuiet());
870 if (
C->isZero() || (
C->getValue().isDenormal() && Ty->isFloatTy())) {
872 : ConstantFP::get(Ty, 1.0);
876 if (IsLog &&
C->isNegative())
884 case Intrinsic::amdgcn_frexp_mant:
885 case Intrinsic::amdgcn_frexp_exp: {
886 Value *Src =
II.getArgOperand(0);
892 if (IID == Intrinsic::amdgcn_frexp_mant) {
894 II, ConstantFP::get(
II.getContext(), Significand));
914 case Intrinsic::amdgcn_class: {
915 Value *Src0 =
II.getArgOperand(0);
916 Value *Src1 =
II.getArgOperand(1);
920 II.getModule(), Intrinsic::is_fpclass, Src0->
getType()));
923 II.setArgOperand(1, ConstantInt::get(Src1->
getType(),
944 case Intrinsic::amdgcn_cvt_pkrtz: {
945 auto foldFPTruncToF16RTZ = [](
Value *Arg) ->
Value * {
958 return ConstantFP::get(HalfTy, Val);
961 Value *Src =
nullptr;
963 if (Src->getType()->isHalfTy())
970 if (
Value *Src0 = foldFPTruncToF16RTZ(
II.getArgOperand(0))) {
971 if (
Value *Src1 = foldFPTruncToF16RTZ(
II.getArgOperand(1))) {
981 case Intrinsic::amdgcn_cvt_pknorm_i16:
982 case Intrinsic::amdgcn_cvt_pknorm_u16:
983 case Intrinsic::amdgcn_cvt_pk_i16:
984 case Intrinsic::amdgcn_cvt_pk_u16: {
985 Value *Src0 =
II.getArgOperand(0);
986 Value *Src1 =
II.getArgOperand(1);
998 case Intrinsic::amdgcn_cvt_off_f32_i4: {
999 Value* Arg =
II.getArgOperand(0);
1013 constexpr size_t ResValsSize = 16;
1014 static constexpr float ResVals[ResValsSize] = {
1015 0.0, 0.0625, 0.125, 0.1875, 0.25, 0.3125, 0.375, 0.4375,
1016 -0.5, -0.4375, -0.375, -0.3125, -0.25, -0.1875, -0.125, -0.0625};
1018 ConstantFP::get(Ty, ResVals[CArg->
getZExtValue() & (ResValsSize - 1)]);
1021 case Intrinsic::amdgcn_ubfe:
1022 case Intrinsic::amdgcn_sbfe: {
1024 Value *Src =
II.getArgOperand(0);
1031 unsigned IntSize = Ty->getIntegerBitWidth();
1036 if ((Width & (IntSize - 1)) == 0) {
1041 if (Width >= IntSize) {
1043 II, 2, ConstantInt::get(CWidth->
getType(), Width & (IntSize - 1)));
1054 ConstantInt::get(COffset->
getType(),
Offset & (IntSize - 1)));
1058 bool Signed = IID == Intrinsic::amdgcn_sbfe;
1060 if (!CWidth || !COffset)
1070 if (
Offset + Width < IntSize) {
1074 RightShift->takeName(&
II);
1081 RightShift->takeName(&
II);
1084 case Intrinsic::amdgcn_exp:
1085 case Intrinsic::amdgcn_exp_row:
1086 case Intrinsic::amdgcn_exp_compr: {
1092 bool IsCompr = IID == Intrinsic::amdgcn_exp_compr;
1094 for (
int I = 0;
I < (IsCompr ? 2 : 4); ++
I) {
1095 if ((!IsCompr && (EnBits & (1 <<
I)) == 0) ||
1096 (IsCompr && ((EnBits & (0x3 << (2 *
I))) == 0))) {
1097 Value *Src =
II.getArgOperand(
I + 2);
1111 case Intrinsic::amdgcn_fmed3: {
1112 Value *Src0 =
II.getArgOperand(0);
1113 Value *Src1 =
II.getArgOperand(1);
1114 Value *Src2 =
II.getArgOperand(2);
1116 for (
Value *Src : {Src0, Src1, Src2}) {
1121 if (
II.isStrictFP())
1158 const APFloat *ConstSrc0 =
nullptr;
1159 const APFloat *ConstSrc1 =
nullptr;
1160 const APFloat *ConstSrc2 =
nullptr;
1165 const bool IsPosInfinity = ConstSrc0 && ConstSrc0->
isPosInfinity();
1185 const bool IsPosInfinity = ConstSrc1 && ConstSrc1->
isPosInfinity();
1208 auto *Quieted = ConstantFP::get(
II.getType(), ConstSrc2->
makeQuiet());
1228 CI->copyFastMathFlags(&
II);
1254 II.setArgOperand(0, Src0);
1255 II.setArgOperand(1, Src1);
1256 II.setArgOperand(2, Src2);
1266 ConstantFP::get(
II.getType(), Result));
1271 if (!ST->hasMed3_16())
1280 IID, {
X->getType()}, {
X,
Y, Z}, &
II,
II.getName());
1288 case Intrinsic::amdgcn_icmp:
1289 case Intrinsic::amdgcn_fcmp: {
1293 bool IsInteger = IID == Intrinsic::amdgcn_icmp;
1300 Value *Src0 =
II.getArgOperand(0);
1301 Value *Src1 =
II.getArgOperand(1);
1321 II.getType(), Args);
1322 NewCall->
addFnAttr(Attribute::Convergent);
1330 II.setArgOperand(0, Src1);
1331 II.setArgOperand(1, Src0);
1333 2, ConstantInt::get(CC->
getType(),
static_cast<int>(SwapPred)));
1380 ? Intrinsic::amdgcn_fcmp
1381 : Intrinsic::amdgcn_icmp;
1386 unsigned Width = CmpType->getBitWidth();
1387 unsigned NewWidth = Width;
1395 else if (Width <= 32)
1397 else if (Width <= 64)
1402 if (Width != NewWidth) {
1412 }
else if (!Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isHalfTy())
1415 Value *Args[] = {SrcLHS, SrcRHS,
1416 ConstantInt::get(CC->
getType(), SrcPred)};
1418 NewIID, {
II.getType(), SrcLHS->
getType()}, Args);
1425 case Intrinsic::amdgcn_mbcnt_hi:
1430 case Intrinsic::amdgcn_mbcnt_lo: {
1443 if (std::optional<ConstantRange> ExistingRange =
II.getRange()) {
1444 ComputedRange = ComputedRange.
intersectWith(*ExistingRange);
1445 if (ComputedRange == *ExistingRange)
1449 II.addRangeRetAttr(ComputedRange);
1452 case Intrinsic::amdgcn_ballot: {
1453 Value *Arg =
II.getArgOperand(0);
1458 if (Src->isZero()) {
1463 if (ST->isWave32() &&
II.getType()->getIntegerBitWidth() == 64) {
1470 {IC.Builder.getInt32Ty()},
1471 {II.getArgOperand(0)}),
1478 case Intrinsic::amdgcn_wavefrontsize: {
1479 if (ST->isWaveSizeKnown())
1481 II, ConstantInt::get(
II.getType(), ST->getWavefrontSize()));
1484 case Intrinsic::amdgcn_wqm_vote: {
1491 case Intrinsic::amdgcn_kill: {
1493 if (!
C || !
C->getZExtValue())
1499 case Intrinsic::amdgcn_s_sendmsg:
1500 case Intrinsic::amdgcn_s_sendmsghalt: {
1506 Value *M0Val =
II.getArgOperand(1);
1512 decodeMsg(MsgImm->getZExtValue(), MsgId, OpId, StreamId, *ST);
1514 if (!msgDoesNotUseM0(MsgId, *ST))
1518 II.dropUBImplyingAttrsAndMetadata();
1522 case Intrinsic::amdgcn_update_dpp: {
1523 Value *Old =
II.getArgOperand(0);
1528 if (BC->isNullValue() || RM->getZExtValue() != 0xF ||
1535 case Intrinsic::amdgcn_permlane16:
1536 case Intrinsic::amdgcn_permlane16_var:
1537 case Intrinsic::amdgcn_permlanex16:
1538 case Intrinsic::amdgcn_permlanex16_var: {
1540 Value *VDstIn =
II.getArgOperand(0);
1545 unsigned int FiIdx = (IID == Intrinsic::amdgcn_permlane16 ||
1546 IID == Intrinsic::amdgcn_permlanex16)
1553 unsigned int BcIdx = FiIdx + 1;
1562 case Intrinsic::amdgcn_permlane64:
1563 case Intrinsic::amdgcn_readfirstlane:
1564 case Intrinsic::amdgcn_readlane:
1565 case Intrinsic::amdgcn_ds_bpermute: {
1567 unsigned SrcIdx = IID == Intrinsic::amdgcn_ds_bpermute ? 1 : 0;
1568 const Use &Src =
II.getArgOperandUse(SrcIdx);
1572 if (IID == Intrinsic::amdgcn_readlane &&
1579 if (IID == Intrinsic::amdgcn_ds_bpermute) {
1580 const Use &Lane =
II.getArgOperandUse(0);
1584 II.getModule(), Intrinsic::amdgcn_readlane,
II.getType());
1585 II.setCalledFunction(NewDecl);
1586 II.setOperand(0, Src);
1587 II.setOperand(1, NewLane);
1592 if (IID != Intrinsic::amdgcn_ds_bpermute) {
1597 return std::nullopt;
1599 case Intrinsic::amdgcn_writelane: {
1603 return std::nullopt;
1605 case Intrinsic::amdgcn_trig_preop: {
1608 if (!
II.getType()->isDoubleTy())
1611 Value *Src =
II.getArgOperand(0);
1612 Value *Segment =
II.getArgOperand(1);
1621 if (StrippedSign != Src)
1624 if (
II.isStrictFP())
1646 unsigned Shift = SegmentVal * 53;
1651 static const uint32_t TwoByPi[] = {
1652 0xa2f9836e, 0x4e441529, 0xfc2757d1, 0xf534ddc0, 0xdb629599, 0x3c439041,
1653 0xfe5163ab, 0xdebbc561, 0xb7246e3a, 0x424dd2e0, 0x06492eea, 0x09d1921c,
1654 0xfe1deb1c, 0xb129a73e, 0xe88235f5, 0x2ebb4484, 0xe99c7026, 0xb45f7e41,
1655 0x3991d639, 0x835339f4, 0x9c845f8b, 0xbdf9283b, 0x1ff897ff, 0xde05980f,
1656 0xef2f118b, 0x5a0a6d1f, 0x6d367ecf, 0x27cb09b7, 0x4f463f66, 0x9e5fea2d,
1657 0x7527bac7, 0xebe5f17b, 0x3d0739f7, 0x8a5292ea, 0x6bfb5fb1, 0x1f8d5d08,
1661 unsigned Idx = Shift >> 5;
1662 if (Idx + 2 >= std::size(TwoByPi)) {
1667 unsigned BShift = Shift & 0x1f;
1671 Thi = (Thi << BShift) | (Tlo >> (64 - BShift));
1675 int Scale = -53 - Shift;
1682 case Intrinsic::amdgcn_fmul_legacy: {
1683 Value *Op0 =
II.getArgOperand(0);
1684 Value *Op1 =
II.getArgOperand(1);
1686 for (
Value *Src : {Op0, Op1}) {
1707 case Intrinsic::amdgcn_fma_legacy: {
1708 Value *Op0 =
II.getArgOperand(0);
1709 Value *Op1 =
II.getArgOperand(1);
1710 Value *Op2 =
II.getArgOperand(2);
1712 for (
Value *Src : {Op0, Op1, Op2}) {
1734 II.getModule(), Intrinsic::fma,
II.getType()));
1739 case Intrinsic::amdgcn_is_shared:
1740 case Intrinsic::amdgcn_is_private: {
1741 Value *Src =
II.getArgOperand(0);
1751 case Intrinsic::amdgcn_make_buffer_rsrc: {
1752 Value *Src =
II.getArgOperand(0);
1755 return std::nullopt;
1757 case Intrinsic::amdgcn_raw_buffer_store_format:
1758 case Intrinsic::amdgcn_struct_buffer_store_format:
1759 case Intrinsic::amdgcn_raw_tbuffer_store:
1760 case Intrinsic::amdgcn_struct_tbuffer_store:
1761 case Intrinsic::amdgcn_image_store_1d:
1762 case Intrinsic::amdgcn_image_store_1darray:
1763 case Intrinsic::amdgcn_image_store_2d:
1764 case Intrinsic::amdgcn_image_store_2darray:
1765 case Intrinsic::amdgcn_image_store_2darraymsaa:
1766 case Intrinsic::amdgcn_image_store_2dmsaa:
1767 case Intrinsic::amdgcn_image_store_3d:
1768 case Intrinsic::amdgcn_image_store_cube:
1769 case Intrinsic::amdgcn_image_store_mip_1d:
1770 case Intrinsic::amdgcn_image_store_mip_1darray:
1771 case Intrinsic::amdgcn_image_store_mip_2d:
1772 case Intrinsic::amdgcn_image_store_mip_2darray:
1773 case Intrinsic::amdgcn_image_store_mip_3d:
1774 case Intrinsic::amdgcn_image_store_mip_cube: {
1779 if (ST->hasDefaultComponentBroadcast())
1781 else if (ST->hasDefaultComponentZero())
1786 int DMaskIdx = getAMDGPUImageDMaskIntrinsic(
II.getIntrinsicID()) ? 1 : -1;
1794 case Intrinsic::amdgcn_prng_b32: {
1795 auto *Src =
II.getArgOperand(0);
1799 return std::nullopt;
1801 case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
1802 case Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
1803 Value *Src0 =
II.getArgOperand(0);
1804 Value *Src1 =
II.getArgOperand(1);
1810 auto getFormatNumRegs = [](
unsigned FormatVal) {
1811 switch (FormatVal) {
1825 bool MadeChange =
false;
1826 unsigned Src0NumElts = getFormatNumRegs(CBSZ);
1827 unsigned Src1NumElts = getFormatNumRegs(BLGP);
1831 if (Src0Ty->getNumElements() > Src0NumElts) {
1838 if (Src1Ty->getNumElements() > Src1NumElts) {
1846 return std::nullopt;
1857 case Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4:
1858 case Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
1859 case Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4: {
1860 Value *Src0 =
II.getArgOperand(1);
1861 Value *Src1 =
II.getArgOperand(3);
1867 bool MadeChange =
false;
1873 if (Src0Ty->getNumElements() > Src0NumElts) {
1880 if (Src1Ty->getNumElements() > Src1NumElts) {
1888 return std::nullopt;
1900 case Intrinsic::amdgcn_wave_shuffle: {
1902 return std::nullopt;
1911 return std::nullopt;
1924 int DMaskIdx,
bool IsLoad) {
1927 :
II.getOperand(0)->getType());
1928 unsigned VWidth = IIVTy->getNumElements();
1931 Type *EltTy = IIVTy->getElementType();
1943 const unsigned UnusedComponentsAtFront = DemandedElts.
countr_zero();
1948 DemandedElts = (1 << ActiveBits) - 1;
1950 if (UnusedComponentsAtFront > 0) {
1951 static const unsigned InvalidOffsetIdx = 0xf;
1954 switch (
II.getIntrinsicID()) {
1955 case Intrinsic::amdgcn_raw_buffer_load:
1956 case Intrinsic::amdgcn_raw_ptr_buffer_load:
1959 case Intrinsic::amdgcn_s_buffer_load:
1963 if (ActiveBits == 4 && UnusedComponentsAtFront == 1)
1964 OffsetIdx = InvalidOffsetIdx;
1968 case Intrinsic::amdgcn_struct_buffer_load:
1969 case Intrinsic::amdgcn_struct_ptr_buffer_load:
1974 OffsetIdx = InvalidOffsetIdx;
1978 if (OffsetIdx != InvalidOffsetIdx) {
1980 DemandedElts &= ~((1 << UnusedComponentsAtFront) - 1);
1981 auto *
Offset = Args[OffsetIdx];
1982 unsigned SingleComponentSizeInBits =
1984 unsigned OffsetAdd =
1985 UnusedComponentsAtFront * SingleComponentSizeInBits / 8;
1986 auto *OffsetAddVal = ConstantInt::get(
Offset->getType(), OffsetAdd);
2003 unsigned NewDMaskVal = 0;
2004 unsigned OrigLdStIdx = 0;
2005 for (
unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) {
2006 const unsigned Bit = 1 << SrcIdx;
2007 if (!!(DMaskVal & Bit)) {
2008 if (!!DemandedElts[OrigLdStIdx])
2014 if (DMaskVal != NewDMaskVal)
2015 Args[DMaskIdx] = ConstantInt::get(DMask->
getType(), NewDMaskVal);
2018 unsigned NewNumElts = DemandedElts.
popcount();
2022 if (NewNumElts >= VWidth && DemandedElts.
isMask()) {
2024 II.setArgOperand(DMaskIdx, Args[DMaskIdx]);
2036 OverloadTys[0] = NewTy;
2040 for (
unsigned OrigStoreIdx = 0; OrigStoreIdx < VWidth; ++OrigStoreIdx)
2041 if (DemandedElts[OrigStoreIdx])
2044 if (NewNumElts == 1)
2056 if (NewNumElts == 1) {
2062 unsigned NewLoadIdx = 0;
2063 for (
unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) {
2064 if (!!DemandedElts[OrigLoadIdx])
2080 APInt &UndefElts)
const {
2085 const unsigned FirstElt = DemandedElts.
countr_zero();
2087 const unsigned MaskLen = LastElt - FirstElt + 1;
2089 unsigned OldNumElts = VT->getNumElements();
2090 if (MaskLen == OldNumElts && MaskLen != 1)
2093 Type *EltTy = VT->getElementType();
2101 Value *Src =
II.getArgOperand(0);
2106 II.getOperandBundlesAsDefs(OpBundles);
2123 for (
unsigned I = 0;
I != MaskLen; ++
I) {
2124 if (DemandedElts[FirstElt +
I])
2125 ExtractMask[
I] = FirstElt +
I;
2134 for (
unsigned I = 0;
I != MaskLen; ++
I) {
2135 if (DemandedElts[FirstElt +
I])
2136 InsertMask[FirstElt +
I] =
I;
2148 SimplifyAndSetOp)
const {
2149 switch (
II.getIntrinsicID()) {
2150 case Intrinsic::amdgcn_readfirstlane:
2151 SimplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
2153 case Intrinsic::amdgcn_raw_buffer_load:
2154 case Intrinsic::amdgcn_raw_ptr_buffer_load:
2155 case Intrinsic::amdgcn_raw_buffer_load_format:
2156 case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
2157 case Intrinsic::amdgcn_raw_tbuffer_load:
2158 case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
2159 case Intrinsic::amdgcn_s_buffer_load:
2160 case Intrinsic::amdgcn_struct_buffer_load:
2161 case Intrinsic::amdgcn_struct_ptr_buffer_load:
2162 case Intrinsic::amdgcn_struct_buffer_load_format:
2163 case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
2164 case Intrinsic::amdgcn_struct_tbuffer_load:
2165 case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
2168 if (getAMDGPUImageDMaskIntrinsic(
II.getIntrinsicID())) {
2174 return std::nullopt;
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool canContractSqrtToRsq(const FPMathOperator *SqrtOp)
Return true if it's legal to contract llvm.amdgcn.rcp(llvm.sqrt)
static bool isTriviallyUniform(const Use &U)
Return true if we can easily prove that use U is uniform.
static CallInst * rewriteCall(IRBuilderBase &B, CallInst &Old, Function &NewCallee, ArrayRef< Value * > Ops)
static Value * convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder)
static APInt trimTrailingZerosInVector(InstCombiner &IC, Value *UseV, Instruction *I)
static APInt defaultComponentBroadcast(Value *V)
static std::optional< Instruction * > tryWaveShuffleDPP(const GCNSubtarget &ST, InstCombiner &IC, IntrinsicInst &II)
static std::optional< Instruction * > modifyIntrinsicCall(IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr, InstCombiner &IC, std::function< void(SmallVectorImpl< Value * > &, SmallVectorImpl< Type * > &)> Func)
Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with modified arguments (based on ...
static bool isThreadID(const GCNSubtarget &ST, Value *V)
static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1, const APFloat &Src2)
static Value * simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, int DMaskIdx=-1, bool IsLoad=true)
Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
static std::optional< Instruction * > simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr, IntrinsicInst &II, InstCombiner &IC)
static bool canSafelyConvertTo16Bit(Value &V, bool IsFloat)
static Value * matchFPExtFromF16(Value *Arg)
Match an fpext from half to float, or a constant we can convert.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Utilities for dealing with flags related to floating point properties and mode controls.
AMD GCN specific subclass of TargetSubtarget.
This file provides the interface for the instcombine pass implementation.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
cmpResult
IEEE-754R 5.11: Floating Point Comparison Relations.
static constexpr roundingMode rmTowardZero
static constexpr roundingMode rmNearestTiesToEven
static const fltSemantics & IEEEhalf()
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
opStatus divide(const APFloat &RHS, roundingMode RM)
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
bool isPosInfinity() const
const fltSemantics & getSemantics() const
APFloat makeQuiet() const
Assuming this is an IEEE-754 NaN value, quiet its signaling bit.
APInt bitcastToAPInt() const
bool isNegInfinity() const
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
cmpResult compare(const APFloat &RHS) const
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
uint64_t getZExtValue() const
Get zero extended value.
unsigned popcount() const
Count the number of bits set.
LLVM_ABI uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const
unsigned getActiveBits() const
Compute the number of active bits in the value.
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
unsigned countr_zero() const
Count the number of trailing zero bits.
bool isMask(unsigned numBits) const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
static LLVM_ABI Attribute getWithDereferenceableBytes(LLVMContext &Context, uint64_t Bytes)
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
bool isTypeLegal(Type *Ty) const override
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
bool isFPPredicate() const
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
ConstantFP - Floating Point Values [float, double].
const APFloat & getValueAPF() const
static LLVM_ABI Constant * getInfinity(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getZero(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getNaN(Type *Ty, bool Negative=false, uint64_t Payload=0)
This is the shared class of boolean and integer constants.
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
LLVM_ABI ConstantRange add(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an addition of a value in this ran...
LLVM_ABI bool isFullSet() const
Return true if this set contains all of the elements possible for this data-type.
LLVM_ABI ConstantRange intersectWith(const ConstantRange &CR, PreferredRangeType Type=Smallest) const
Return the range that results from the intersection of this range with another range.
This is an important base class in LLVM.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This class represents an extension of floating point types.
Utility class for floating point operations which can have information about relaxed accuracy require...
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
bool hasApproxFunc() const
Test if this operation allows approximations of math library functions or intrinsics.
LLVM_ABI float getFPAccuracy() const
Get the maximum error permitted by this operation in ULPs.
Convenience struct for specifying and reasoning about fast-math flags.
bool allowContract() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
bool simplifyDemandedLaneMaskArg(InstCombiner &IC, IntrinsicInst &II, unsigned LaneAgIdx) const
Simplify a lane index operand (e.g.
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
Instruction * hoistLaneIntrinsicThroughOperand(InstCombiner &IC, IntrinsicInst &II) const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
KnownIEEEMode fpenvIEEEMode(const Instruction &I) const
Return KnownIEEEMode::On if we know if the use context can assume "amdgpu-ieee"="true" and KnownIEEEM...
Value * simplifyAMDGCNLaneIntrinsicDemanded(InstCombiner &IC, IntrinsicInst &II, const APInt &DemandedElts, APInt &UndefElts) const
bool canSimplifyLegacyMulToMul(const Instruction &I, const Value *Op0, const Value *Op1, InstCombiner &IC) const
Common base class shared among various IRBuilders.
CallInst * CreateExtractVector(Type *DstType, Value *SrcVec, Value *Idx, const Twine &Name="")
Create a call to the vector.extract intrinsic.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
BasicBlock * GetInsertBlock() const
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > OverloadTypes, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using OverloadTypes.
Value * CreateMaxNum(Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")
Create call to the maxnum intrinsic.
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateMaximumNum(Value *LHS, Value *RHS, const Twine &Name="")
Create call to the maximum intrinsic.
Value * CreateMinNum(Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")
Create call to the minnum intrinsic.
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateFAddFMF(Value *L, Value *R, FMFSource FMFSource, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateMinimumNum(Value *LHS, Value *RHS, const Twine &Name="")
Create call to the minimumnum intrinsic.
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateFMulFMF(Value *L, Value *R, FMFSource FMFSource, const Twine &Name="", MDNode *FPMD=nullptr)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
The core instruction combiner logic.
const DataLayout & getDataLayout() const
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
IRBuilder< TargetFolder, IRBuilderCallbackInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
DominatorTree & getDominatorTree() const
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
virtual bool SimplifyDemandedBits(Instruction *I, unsigned OpNo, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0)=0
static Value * stripSignOnlyFPOps(Value *Val)
Ignore all operations which only change the sign of a value, returning the underlying magnitude value...
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
const SimplifyQuery & getSimplifyQuery() const
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
A wrapper class for inspecting calls to intrinsic functions.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
A Module instance is used to store all the information related to an LLVM module.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
bool isVoidTy() const
Return true if this is 'void'.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI bool hasOneUser() const
Return true if there is exactly one user of this value.
LLVMContext & getContext() const
All values hold a context through their type.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY const MIMGOffsetMappingInfo * getMIMGOffsetMappingInfo(unsigned Offset)
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const ImageDimIntrinsicInfo * getImageDimIntrinsicByBaseOpcode(unsigned BaseOpcode, unsigned Dim)
LLVM_READONLY const MIMGMIPMappingInfo * getMIMGMIPMappingInfo(unsigned MIP)
bool isArgPassedInSGPR(const Argument *A)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY const MIMGBiasMappingInfo * getMIMGBiasMappingInfo(unsigned Bias)
LLVM_READONLY const MIMGLZMappingInfo * getMIMGLZMappingInfo(unsigned L)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &OverloadTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
auto m_Cmp()
Matches any compare instruction and ignore it.
bool match(Val *V, const Pattern &P)
cstfp_pred_ty< is_any_zero_fp > m_AnyZeroFP()
Match a floating-point negative zero or positive zero.
ap_match< APFloat > m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
auto m_Value()
Match an arbitrary value and ignore it.
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
cstfp_pred_ty< is_finitenonzero > m_FiniteNonZero()
Match a finite non-zero FP constant.
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
auto m_ConstantFP()
Match an arbitrary ConstantFP and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 maxNum semantics.
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Returns: X * 2^Exp for integral exponents.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
LLVM_ABI Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
@ NearestTiesToEven
roundTiesToEven.
LLVM_ABI bool isKnownNeverInfOrNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point value can never contain a NaN or infinity.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
constexpr uint64_t Make_64(uint32_t High, uint32_t Low)
Make a 64-bit integer from a high / low pair of 32-bit integers.
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, const SimplifyQuery &SQ, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
bool isConstant() const
Returns true if we know the value of all bits.
const APInt & getConstant() const
Returns the value when all bits have a known value.
SimplifyQuery getWithInstruction(const Instruction *I) const
LLVM_ABI bool isUndefValue(Value *V) const
If CanUseUndef is true, returns whether V is undef.