29#include "llvm/IR/IntrinsicsAMDGPU.h"
39#define DEBUG_TYPE "amdgpu-codegenprepare"
47 "amdgpu-codegenprepare-widen-constant-loads",
48 cl::desc(
"Widen sub-dword constant address space loads in AMDGPUCodeGenPrepare"),
53 BreakLargePHIs(
"amdgpu-codegenprepare-break-large-phis",
54 cl::desc(
"Break large PHI nodes for DAGISel"),
58 ForceBreakLargePHIs(
"amdgpu-codegenprepare-force-break-large-phis",
59 cl::desc(
"For testing purposes, always break large "
60 "PHIs even if it isn't profitable."),
64 "amdgpu-codegenprepare-break-large-phis-threshold",
65 cl::desc(
"Minimum type size in bits for breaking large PHI nodes"),
69 "amdgpu-codegenprepare-mul24",
70 cl::desc(
"Introduce mul24 intrinsics in AMDGPUCodeGenPrepare"),
76 "amdgpu-codegenprepare-expand-div64",
77 cl::desc(
"Expand 64-bit division in AMDGPUCodeGenPrepare"),
84 "amdgpu-codegenprepare-disable-idiv-expansion",
85 cl::desc(
"Prevent expanding integer division in AMDGPUCodeGenPrepare"),
91 "amdgpu-codegenprepare-disable-fdiv-expansion",
92 cl::desc(
"Prevent expanding floating point division in AMDGPUCodeGenPrepare"),
96class AMDGPUCodeGenPrepareImpl
97 :
public InstVisitor<AMDGPUCodeGenPrepareImpl, bool> {
107 const bool HasFP32DenormalFlush;
108 bool FlowChanged =
false;
109 mutable Function *SqrtF32 =
nullptr;
110 mutable Function *LdexpF32 =
nullptr;
118 :
F(
F), ST(TM.getSubtarget<
GCNSubtarget>(
F)), TM(TM), TLI(TLI), AC(AC),
119 DT(DT), UA(UA),
DL(
F.getDataLayout()),
129 F.getParent(), Intrinsic::amdgcn_sqrt, {Type::getFloatTy(Ctx)});
139 F.getParent(), Intrinsic::ldexp,
140 {Type::getFloatTy(Ctx), Type::getInt32Ty(Ctx)});
144 bool canBreakPHINode(
const PHINode &
I);
155 bool isLegalFloatingTy(
const Type *
T)
const;
163 bool canIgnoreDenormalInput(
const Value *V,
const Instruction *CtxI)
const {
164 return HasFP32DenormalFlush ||
171 unsigned numBitsUnsigned(
Value *
Op)
const;
176 unsigned numBitsSigned(
Value *
Op)
const;
189 unsigned MaxDivBits,
bool Signed)
const;
194 bool IsDiv,
bool IsSigned)
const;
198 bool IsDiv,
bool IsSigned)
const;
216 bool canWidenScalarExtLoad(
LoadInst &
I)
const;
231 float ReqdAccuracy)
const;
236 float ReqdAccuracy)
const;
238 std::pair<Value *, Value *> getFrexpResults(
IRBuilder<> &Builder,
242 bool IsNegative)
const;
276 if (!ExpandDiv64InIR)
280 StringRef getPassName()
const override {
return "AMDGPU IR optimizations"; }
285bool AMDGPUCodeGenPrepareImpl::run() {
286 BreakPhiNodesCache.clear();
287 bool MadeChange =
false;
299 while (!DeadVals.empty()) {
307bool AMDGPUCodeGenPrepareImpl::isSigned(
const BinaryOperator &
I)
const {
308 return I.getOpcode() == Instruction::AShr ||
309 I.getOpcode() == Instruction::SDiv ||
I.getOpcode() == Instruction::SRem;
312bool AMDGPUCodeGenPrepareImpl::isSigned(
const SelectInst &
I)
const {
317bool AMDGPUCodeGenPrepareImpl::isLegalFloatingTy(
const Type *Ty)
const {
322bool AMDGPUCodeGenPrepareImpl::canWidenScalarExtLoad(LoadInst &
I)
const {
323 Type *Ty =
I.getType();
324 int TySize =
DL.getTypeSizeInBits(Ty);
325 Align Alignment =
DL.getValueOrABITypeAlignment(
I.getAlign(), Ty);
327 return I.isSimple() && TySize < 32 && Alignment >= 4 && UA.
isUniform(&
I);
330unsigned AMDGPUCodeGenPrepareImpl::numBitsUnsigned(
Value *
Op)
const {
334unsigned AMDGPUCodeGenPrepareImpl::numBitsSigned(
Value *
Op)
const {
346 for (
int I = 0,
E = VT->getNumElements();
I !=
E; ++
I)
347 Values.
push_back(Builder.CreateExtractElement(V,
I));
353 if (!Ty->isVectorTy()) {
359 for (
int I = 0,
E = Values.
size();
I !=
E; ++
I)
360 NewVal = Builder.CreateInsertElement(NewVal, Values[
I],
I);
365bool AMDGPUCodeGenPrepareImpl::replaceMulWithMul24(BinaryOperator &
I)
const {
366 if (
I.getOpcode() != Instruction::Mul)
369 Type *Ty =
I.getType();
381 Builder.SetCurrentDebugLocation(
I.getDebugLoc());
383 unsigned LHSBits = 0, RHSBits = 0;
384 bool IsSigned =
false;
386 if (ST.
hasMulU24() && (LHSBits = numBitsUnsigned(
LHS)) <= 24 &&
387 (RHSBits = numBitsUnsigned(
RHS)) <= 24) {
390 }
else if (ST.
hasMulI24() && (LHSBits = numBitsSigned(
LHS)) <= 24 &&
391 (RHSBits = numBitsSigned(
RHS)) <= 24) {
403 IntegerType *I32Ty = Builder.getInt32Ty();
404 IntegerType *IntrinTy =
Size > 32 ? Builder.getInt64Ty() : I32Ty;
405 Type *DstTy = LHSVals[0]->getType();
407 for (
int I = 0,
E = LHSVals.
size();
I !=
E; ++
I) {
408 Value *
LHS = IsSigned ? Builder.CreateSExtOrTrunc(LHSVals[
I], I32Ty)
409 : Builder.CreateZExtOrTrunc(LHSVals[
I], I32Ty);
410 Value *
RHS = IsSigned ? Builder.CreateSExtOrTrunc(RHSVals[
I], I32Ty)
411 : Builder.CreateZExtOrTrunc(RHSVals[
I], I32Ty);
413 IsSigned ? Intrinsic::amdgcn_mul_i24 : Intrinsic::amdgcn_mul_u24;
415 Result = IsSigned ? Builder.CreateSExtOrTrunc(Result, DstTy)
416 : Builder.CreateZExtOrTrunc(Result, DstTy);
422 I.replaceAllUsesWith(NewVal);
423 DeadVals.push_back(&
I);
443bool AMDGPUCodeGenPrepareImpl::foldBinOpIntoSelect(BinaryOperator &BO)
const {
464 if (!CBO || !CT || !CF)
491 Builder.setFastMathFlags(FPOp->getFastMathFlags());
497 DeadVals.push_back(&BO);
499 DeadVals.push_back(CastOp);
500 DeadVals.push_back(Sel);
504std::pair<Value *, Value *>
505AMDGPUCodeGenPrepareImpl::getFrexpResults(
IRBuilder<> &Builder,
507 Type *Ty = Src->getType();
520 : Builder.CreateExtractValue(Frexp, {1});
521 return {FrexpMant, FrexpExp};
527 bool IsNegative)
const {
542 auto [FrexpMant, FrexpExp] = getFrexpResults(Builder, Src);
545 return Builder.
CreateCall(getLdexpF32(), {Rcp, ScaleFactor});
551 FastMathFlags FMF)
const {
561 auto [FrexpMantRHS, FrexpExpRHS] = getFrexpResults(Builder,
RHS);
566 auto [FrexpMantLHS, FrexpExpLHS] = getFrexpResults(Builder,
LHS);
578 FastMathFlags FMF)
const {
579 Type *Ty = Src->getType();
583 Builder.
CreateFCmpOLT(Src, ConstantFP::get(Ty, SmallestNormal));
586 Value *InputScaleFactor =
593 Value *OutputScaleFactor =
595 return Builder.
CreateCall(getLdexpF32(), {Sqrt, OutputScaleFactor});
606 Type *Ty = Src->getType();
610 Builder.CreateFCmpOLT(Src, ConstantFP::get(Ty, SmallestNormal));
611 Constant *One = ConstantFP::get(Ty, 1.0);
612 Constant *InputScale = ConstantFP::get(Ty, 0x1.0p+24);
614 ConstantFP::get(Ty, IsNegative ? -0x1.0p+12 : 0x1.0p+12);
616 Value *InputScaleFactor = Builder.CreateSelect(NeedScale, InputScale, One);
618 Value *ScaledInput = Builder.CreateFMul(Src, InputScaleFactor);
619 Value *Rsq = Builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_rsq, ScaledInput);
620 Value *OutputScaleFactor = Builder.CreateSelect(
621 NeedScale, OutputScale, IsNegative ? ConstantFP::get(Ty, -1.0) : One);
623 return Builder.CreateFMul(Rsq, OutputScaleFactor);
626bool AMDGPUCodeGenPrepareImpl::canOptimizeWithRsq(
const FPMathOperator *SqrtOp,
627 FastMathFlags DivFMF,
628 FastMathFlags SqrtFMF)
const {
637Value *AMDGPUCodeGenPrepareImpl::optimizeWithRsq(
639 const FastMathFlags SqrtFMF,
const Instruction *CtxI)
const {
652 bool IsNegative =
false;
657 IRBuilder<>::FastMathFlagGuard Guard(Builder);
661 canIgnoreDenormalInput(Den, CtxI)) {
681 Value *Den, FastMathFlags FMF,
682 const Instruction *CtxI)
const {
689 bool IsNegative =
false;
694 if (HasFP32DenormalFlush || FMF.
approxFunc()) {
715 return emitRcpIEEE1ULP(Builder, Src, IsNegative);
724 if (HasFP32DenormalFlush || FMF.
approxFunc()) {
729 Value *Recip = emitRcpIEEE1ULP(Builder, Den,
false);
743Value *AMDGPUCodeGenPrepareImpl::optimizeWithFDivFast(
746 if (ReqdAccuracy < 2.5f)
752 bool NumIsOne =
false;
754 if (CNum->isExactlyValue(+1.0) || CNum->isExactlyValue(-1.0))
762 if (!HasFP32DenormalFlush && !NumIsOne)
765 return Builder.
CreateIntrinsic(Intrinsic::amdgcn_fdiv_fast, {Num, Den});
768Value *AMDGPUCodeGenPrepareImpl::visitFDivElement(
770 FastMathFlags SqrtFMF,
Value *RsqOp,
const Instruction *FDivInst,
771 float ReqdDivAccuracy)
const {
774 optimizeWithRsq(Builder, Num, RsqOp, DivFMF, SqrtFMF, FDivInst);
779 Value *Rcp = optimizeWithRcp(Builder, Num, Den, DivFMF, FDivInst);
787 Value *FDivFast = optimizeWithFDivFast(Builder, Num, Den, ReqdDivAccuracy);
791 return emitFrexpDiv(Builder, Num, Den, DivFMF);
809bool AMDGPUCodeGenPrepareImpl::visitFDiv(BinaryOperator &FDiv) {
810 if (DisableFDivExpand)
824 FastMathFlags SqrtFMF;
829 Value *RsqOp =
nullptr;
831 if (DenII && DenII->getIntrinsicID() == Intrinsic::sqrt &&
832 DenII->hasOneUse()) {
835 if (canOptimizeWithRsq(SqrtOp, DivFMF, SqrtFMF))
848 const bool AllowInaccurateRcp = DivFMF.
approxFunc();
849 if (!RsqOp && AllowInaccurateRcp)
853 if (ReqdAccuracy < 1.0f)
870 for (
int I = 0,
E = NumVals.
size();
I !=
E; ++
I) {
871 Value *NumElt = NumVals[
I];
872 Value *DenElt = DenVals[
I];
873 Value *RsqDenElt = RsqOp ? RsqDenVals[
I] :
nullptr;
876 visitFDivElement(Builder, NumElt, DenElt, DivFMF, SqrtFMF, RsqDenElt,
885 NewEltInst->copyMetadata(FDiv);
888 ResultVals[
I] = NewElt;
896 DeadVals.push_back(&FDiv);
907 Value *LHS_EXT64 = Builder.CreateZExt(
LHS, I64Ty);
908 Value *RHS_EXT64 = Builder.CreateZExt(
RHS, I64Ty);
909 Value *MUL64 = Builder.CreateMul(LHS_EXT64, RHS_EXT64);
910 Value *
Lo = Builder.CreateTrunc(MUL64, I32Ty);
911 Value *
Hi = Builder.CreateLShr(MUL64, Builder.getInt64(32));
912 Hi = Builder.CreateTrunc(
Hi, I32Ty);
913 return std::pair(
Lo,
Hi);
924unsigned AMDGPUCodeGenPrepareImpl::getDivNumBits(BinaryOperator &
I,
Value *Num,
927 bool IsSigned)
const {
934 unsigned DivBits = SSBits - RHSSignBits + 1;
935 if (DivBits > MaxDivBits)
940 unsigned SignBits = std::min(LHSSignBits, RHSSignBits);
941 DivBits = SSBits - SignBits + 1;
951 unsigned DivBits = SSBits - RHSSignBits;
952 if (DivBits > MaxDivBits)
960 unsigned SignBits = std::min(LHSSignBits, RHSSignBits);
961 DivBits = SSBits - SignBits;
968 BinaryOperator &
I,
Value *Num,
969 Value *Den,
bool IsDiv,
970 bool IsSigned)
const {
971 unsigned DivBits = getDivNumBits(
I, Num, Den, 24, IsSigned);
974 return expandDivRem24Impl(Builder,
I, Num, Den, DivBits, IsDiv, IsSigned);
977Value *AMDGPUCodeGenPrepareImpl::expandDivRem24Impl(
979 unsigned DivBits,
bool IsDiv,
bool IsSigned)
const {
985 ConstantInt *One = Builder.
getInt32(1);
1029 {FQNeg->
getType()}, {FQNeg, FB, FA}, FQ);
1057 if (DivBits != 0 && DivBits < 32) {
1060 int InRegBits = 32 - DivBits;
1062 Res = Builder.
CreateShl(Res, InRegBits);
1065 ConstantInt *TruncMask
1066 = Builder.
getInt32((UINT64_C(1) << DivBits) - 1);
1067 Res = Builder.
CreateAnd(Res, TruncMask);
1078bool AMDGPUCodeGenPrepareImpl::divHasSpecialOptimization(BinaryOperator &
I,
1084 if (
C->getType()->getScalarSizeInBits() <= 32)
1100 if (BinOpDen->getOpcode() == Instruction::Shl &&
1117 return Builder.CreateAShr(V, Builder.getInt32(31));
1124 assert(
Opc == Instruction::URem ||
Opc == Instruction::UDiv ||
1125 Opc == Instruction::SRem ||
Opc == Instruction::SDiv);
1131 if (divHasSpecialOptimization(
I,
X,
Y))
1134 bool IsDiv =
Opc == Instruction::UDiv ||
Opc == Instruction::SDiv;
1135 bool IsSigned =
Opc == Instruction::SRem ||
Opc == Instruction::SDiv;
1137 Type *Ty =
X->getType();
1151 if (
Value *Res = expandDivRem24(Builder,
I,
X,
Y, IsDiv, IsSigned)) {
1157 ConstantInt *One = Builder.
getInt32(1);
1159 Value *Sign =
nullptr;
1164 Sign = IsDiv ? Builder.
CreateXor(SignX, SignY) : SignX;
1245 BinaryOperator &
I,
Value *Num,
1247 if (!ExpandDiv64InIR && divHasSpecialOptimization(
I, Num, Den))
1252 bool IsDiv =
Opc == Instruction::SDiv ||
Opc == Instruction::UDiv;
1253 bool IsSigned =
Opc == Instruction::SDiv ||
Opc == Instruction::SRem;
1255 unsigned NumDivBits = getDivNumBits(
I, Num, Den, 32, IsSigned);
1256 if (NumDivBits > 32)
1259 Value *Narrowed =
nullptr;
1260 if (NumDivBits <= 24) {
1261 Narrowed = expandDivRem24Impl(Builder,
I, Num, Den, NumDivBits,
1263 }
else if (NumDivBits <= 32) {
1264 Narrowed = expandDivRem32(Builder,
I, Num, Den);
1275void AMDGPUCodeGenPrepareImpl::expandDivRem64(BinaryOperator &
I)
const {
1278 if (
Opc == Instruction::UDiv ||
Opc == Instruction::SDiv) {
1283 if (
Opc == Instruction::URem ||
Opc == Instruction::SRem) {
1303bool AMDGPUCodeGenPrepareImpl::tryNarrowMathIfNoOverflow(Instruction *
I) {
1304 unsigned Opc =
I->getOpcode();
1305 Type *OldType =
I->getType();
1307 if (
Opc != Instruction::Add &&
Opc != Instruction::Mul)
1312 if (
Opc != Instruction::Add &&
Opc != Instruction::Mul)
1314 "Instruction::Mul.");
1318 MaxBitsNeeded = std::max<unsigned>(
bit_ceil(MaxBitsNeeded), 8);
1319 Type *NewType =
DL.getSmallestLegalIntType(
I->getContext(), MaxBitsNeeded);
1323 if (NewBit >= OrigBit)
1335 int NumOfNonConstOps = 2;
1338 NumOfNonConstOps = 1;
1348 if (NewCost >= OldCost)
1359 DeadVals.push_back(
I);
1363bool AMDGPUCodeGenPrepareImpl::visitBinaryOperator(BinaryOperator &
I) {
1364 if (foldBinOpIntoSelect(
I))
1367 if (UseMul24Intrin && replaceMulWithMul24(
I))
1369 if (tryNarrowMathIfNoOverflow(&
I))
1374 Type *Ty =
I.getType();
1375 Value *NewDiv =
nullptr;
1380 if ((
Opc == Instruction::URem ||
Opc == Instruction::UDiv ||
1381 Opc == Instruction::SRem ||
Opc == Instruction::SDiv) &&
1383 !DisableIDivExpand) {
1384 Value *Num =
I.getOperand(0);
1385 Value *Den =
I.getOperand(1);
1392 for (
unsigned N = 0,
E = VT->getNumElements();
N !=
E; ++
N) {
1397 if (ScalarSize <= 32) {
1398 NewElt = expandDivRem32(Builder,
I, NumEltN, DenEltN);
1404 NewElt = shrinkDivRem64(Builder,
I, NumEltN, DenEltN);
1418 NewEltI->copyIRFlags(&
I);
1423 if (ScalarSize <= 32)
1424 NewDiv = expandDivRem32(Builder,
I, Num, Den);
1426 NewDiv = shrinkDivRem64(Builder,
I, Num, Den);
1433 I.replaceAllUsesWith(NewDiv);
1434 DeadVals.push_back(&
I);
1439 if (ExpandDiv64InIR) {
1441 for (BinaryOperator *Div : Div64ToExpand) {
1442 expandDivRem64(*Div);
1451bool AMDGPUCodeGenPrepareImpl::visitLoadInst(LoadInst &
I) {
1457 canWidenScalarExtLoad(
I)) {
1467 if (
auto *
Range =
WidenLoad->getMetadata(LLVMContext::MD_range)) {
1468 ConstantInt *
Lower =
1471 if (
Lower->isNullValue()) {
1472 WidenLoad->setMetadata(LLVMContext::MD_range,
nullptr);
1480 WidenLoad->setMetadata(LLVMContext::MD_range,
1485 int TySize =
DL.getTypeSizeInBits(
I.getType());
1490 DeadVals.push_back(&
I);
1497bool AMDGPUCodeGenPrepareImpl::visitSelectInst(SelectInst &
I) {
1518 Value *Fract =
nullptr;
1519 if (Pred == FCmpInst::FCMP_UNO && TrueVal == CmpVal && IIFalse &&
1520 CmpVal == matchFractPat(*IIFalse)) {
1522 Fract = applyFractPat(Builder, CmpVal);
1523 }
else if (Pred == FCmpInst::FCMP_ORD && FalseVal == CmpVal && IITrue &&
1524 CmpVal == matchFractPat(*IITrue)) {
1526 Fract = applyFractPat(Builder, CmpVal);
1531 I.replaceAllUsesWith(Fract);
1532 DeadVals.push_back(&
I);
1539 return IA && IB && IA->getParent() == IB->getParent();
1549 const Value *CurVal = V;
1552 BitVector EltsCovered(FVT->getNumElements());
1559 if (!Idx || Idx->getZExtValue() >= FVT->getNumElements())
1562 const auto *VecSrc = IE->getOperand(0);
1571 EltsCovered.
set(Idx->getZExtValue());
1574 if (EltsCovered.
all())
1601 const auto [It, Inserted] = SeenPHIs.
insert(&
I);
1605 for (
const Value *Inc :
I.incoming_values()) {
1610 for (
const User *U :
I.users()) {
1616bool AMDGPUCodeGenPrepareImpl::canBreakPHINode(
const PHINode &
I) {
1618 if (
const auto It = BreakPhiNodesCache.find(&
I);
1619 It != BreakPhiNodesCache.end())
1628 SmallPtrSet<const PHINode *, 8> WorkList;
1634 for (
const PHINode *WLP : WorkList) {
1635 assert(BreakPhiNodesCache.count(WLP) == 0);
1650 const auto Threshold = (
alignTo(WorkList.size() * 2, 3) / 3);
1651 unsigned NumBreakablePHIs = 0;
1652 bool CanBreak =
false;
1653 for (
const PHINode *Cur : WorkList) {
1661 if (++NumBreakablePHIs >= Threshold) {
1668 for (
const PHINode *Cur : WorkList)
1669 BreakPhiNodesCache[Cur] = CanBreak;
1718 Value *&Res = SlicedVals[{BB, Inc}];
1724 B.SetCurrentDebugLocation(IncInst->getDebugLoc());
1730 Res =
B.CreateShuffleVector(Inc, Mask, NewValName);
1732 Res =
B.CreateExtractElement(Inc,
Idx, NewValName);
1741bool AMDGPUCodeGenPrepareImpl::visitPHINode(PHINode &
I) {
1757 DL.getTypeSizeInBits(FVT) <= BreakLargePHIsThreshold)
1760 if (!ForceBreakLargePHIs && !canBreakPHINode(
I))
1763 std::vector<VectorSlice> Slices;
1770 const unsigned EltSize =
DL.getTypeSizeInBits(EltTy);
1772 if (EltSize == 8 || EltSize == 16) {
1773 const unsigned SubVecSize = (32 / EltSize);
1775 for (
unsigned End =
alignDown(NumElts, SubVecSize); Idx < End;
1777 Slices.emplace_back(SubVecTy, Idx, SubVecSize);
1781 for (; Idx < NumElts; ++Idx)
1782 Slices.emplace_back(EltTy, Idx, 1);
1785 assert(Slices.size() > 1);
1791 B.SetCurrentDebugLocation(
I.getDebugLoc());
1793 unsigned IncNameSuffix = 0;
1794 for (VectorSlice &S : Slices) {
1797 B.SetInsertPoint(
I.getParent()->getFirstNonPHIIt());
1798 S.NewPHI =
B.CreatePHI(S.Ty,
I.getNumIncomingValues());
1800 for (
const auto &[Idx, BB] :
enumerate(
I.blocks())) {
1801 S.NewPHI->addIncoming(S.getSlicedVal(BB,
I.getIncomingValue(Idx),
1802 "largephi.extractslice" +
1803 std::to_string(IncNameSuffix++)),
1810 unsigned NameSuffix = 0;
1811 for (VectorSlice &S : Slices) {
1812 const auto ValName =
"largephi.insertslice" + std::to_string(NameSuffix++);
1814 Vec =
B.CreateInsertVector(FVT, Vec, S.NewPHI, S.Idx, ValName);
1816 Vec =
B.CreateInsertElement(Vec, S.NewPHI, S.Idx, ValName);
1819 I.replaceAllUsesWith(Vec);
1820 DeadVals.push_back(&
I);
1843 Load && Load->hasMetadata(LLVMContext::MD_nonnull))
1860 const auto NullVal = TM.getNullPointerValue(AS);
1862 assert(SrcPtrKB.getBitWidth() ==
DL.getPointerSizeInBits(AS));
1863 assert((NullVal == 0 || NullVal == -1) &&
1864 "don't know how to check for this null value!");
1865 return NullVal ? !SrcPtrKB.getMaxValue().isAllOnes() : SrcPtrKB.isNonZero();
1868bool AMDGPUCodeGenPrepareImpl::visitAddrSpaceCastInst(AddrSpaceCastInst &
I) {
1872 if (
I.getType()->isVectorTy())
1877 const unsigned SrcAS =
I.getSrcAddressSpace();
1878 const unsigned DstAS =
I.getDestAddressSpace();
1880 bool CanLower =
false;
1898 auto *Intrin =
B.CreateIntrinsic(
1899 I.getType(), Intrinsic::amdgcn_addrspacecast_nonnull, {I.getOperand(0)});
1900 I.replaceAllUsesWith(Intrin);
1901 DeadVals.push_back(&
I);
1905bool AMDGPUCodeGenPrepareImpl::visitIntrinsicInst(IntrinsicInst &
I) {
1906 switch (
I.getIntrinsicID()) {
1907 case Intrinsic::minnum:
1908 case Intrinsic::minimumnum:
1909 case Intrinsic::minimum:
1910 return visitFMinLike(
I);
1911 case Intrinsic::sqrt:
1912 return visitSqrt(
I);
1925Value *AMDGPUCodeGenPrepareImpl::matchFractPat(IntrinsicInst &
I) {
1933 if (IID != Intrinsic::minnum && IID != Intrinsic::minimum &&
1934 IID != Intrinsic::minimumnum)
1937 Type *Ty =
I.getType();
1941 Value *Arg0 =
I.getArgOperand(0);
1942 Value *Arg1 =
I.getArgOperand(1);
1950 One.convert(
C->getSemantics(), APFloat::rmNearestTiesToEven, &LosesInfo);
1972 for (
unsigned I = 0,
E = FractVals.
size();
I !=
E; ++
I) {
1980bool AMDGPUCodeGenPrepareImpl::visitFMinLike(IntrinsicInst &
I) {
1981 Value *FractArg = matchFractPat(
I);
1991 FastMathFlags FMF =
I.getFastMathFlags();
1995 Value *Fract = applyFractPat(Builder, FractArg);
1997 I.replaceAllUsesWith(Fract);
1998 DeadVals.push_back(&
I);
2003bool AMDGPUCodeGenPrepareImpl::visitSqrt(IntrinsicInst &Sqrt) {
2019 if (ReqdAccuracy < 1.0f)
2023 bool CanTreatAsDAZ = canIgnoreDenormalInput(SrcVal, &Sqrt);
2027 if (!CanTreatAsDAZ && ReqdAccuracy < 2.0f)
2035 for (
int I = 0,
E = SrcVals.
size();
I !=
E; ++
I) {
2037 ResultVals[
I] = Builder.
CreateCall(getSqrtF32(), SrcVals[
I]);
2039 ResultVals[
I] = emitSqrtIEEE2ULP(Builder, SrcVals[
I], SqrtFMF);
2045 DeadVals.push_back(&Sqrt);
2049bool AMDGPUCodeGenPrepare::runOnFunction(Function &
F) {
2050 if (skipFunction(
F))
2053 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
2057 const AMDGPUTargetMachine &
TM = TPC->getTM<AMDGPUTargetMachine>();
2058 const TargetLibraryInfo *TLI =
2059 &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
F);
2060 AssumptionCache *AC =
2061 &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
2062 auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
2063 const DominatorTree *DT = DTWP ? &DTWP->getDomTree() :
nullptr;
2065 getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
2066 return AMDGPUCodeGenPrepareImpl(
F, TM, TLI, AC, DT, UA).run();
2076 AMDGPUCodeGenPrepareImpl Impl(
F, ATM, TLI, AC, DT, UA);
2080 if (!Impl.FlowChanged)
2086 "AMDGPU IR optimizations",
false,
false)
2093char AMDGPUCodeGenPrepare::
ID = 0;
2096 return new AMDGPUCodeGenPrepare();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static Value * insertValues(IRBuilder<> &Builder, Type *Ty, SmallVectorImpl< Value * > &Values)
static void extractValues(IRBuilder<> &Builder, SmallVectorImpl< Value * > &Values, Value *V)
static Value * getMulHu(IRBuilder<> &Builder, Value *LHS, Value *RHS)
static bool isInterestingPHIIncomingValue(const Value *V)
static SelectInst * findSelectThroughCast(Value *V, CastInst *&Cast)
static std::pair< Value *, Value * > getMul64(IRBuilder<> &Builder, Value *LHS, Value *RHS)
static Value * emitRsqIEEE1ULP(IRBuilder<> &Builder, Value *Src, bool IsNegative)
Emit an expansion of 1.0 / sqrt(Src) good for 1ulp that supports denormals.
static Value * getSign32(Value *V, IRBuilder<> &Builder, const DataLayout DL)
static void collectPHINodes(const PHINode &I, SmallPtrSet< const PHINode *, 8 > &SeenPHIs)
static bool isPtrKnownNeverNull(const Value *V, const DataLayout &DL, const AMDGPUTargetMachine &TM, unsigned AS)
static bool areInSameBB(const Value *A, const Value *B)
static cl::opt< bool > WidenLoads("amdgpu-late-codegenprepare-widen-constant-loads", cl::desc("Widen sub-dword constant address space loads in " "AMDGPULateCodeGenPrepare"), cl::ReallyHidden, cl::init(true))
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool runOnFunction(Function &F, bool PostInlining)
static bool isSigned(unsigned int Opcode)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > & Cond
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
This file implements a set that has insertion order iteration characteristics.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static cl::opt< cl::boolOrDefault > EnableGlobalISelOption("global-isel", cl::Hidden, cl::desc("Enable the \"global\" instruction selector"))
Target-Independent Code Generator Pass Configuration Options pass.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
VectorSlice(Type *Ty, unsigned Idx, unsigned NumElts)
Value * getSlicedVal(BasicBlock *BB, Value *Inc, StringRef NewValName)
Slice Inc according to the information contained within this slice.
PreservedAnalyses run(Function &, FunctionAnalysisManager &)
bool hasMadMacF32Insts() const
bool has16BitInsts() const
bool hasFastFMAF32() const
static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
This class represents a conversion between pointers from one address space to another.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
BinaryOps getOpcode() const
bool all() const
all - Returns true if all bits are set.
Represents analyses that only rely on functions' control flow.
This is the base class for all instructions that perform data casts.
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
TargetTransformInfo getTargetTransformInfo(const Function &F) const override
Get a TargetTransformInfo implementation for the target.
LLVM_ABI bool isExactlyValue(const APFloat &V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Utility class for floating point operations which can have information about relaxed accuracy require...
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
LLVM_ABI float getFPAccuracy() const
Get the maximum error permitted by this operation in ULPs.
Convenience struct for specifying and reasoning about fast-math flags.
void setFast(bool B=true)
bool allowReciprocal() const
void setNoNaNs(bool B=true)
bool allowContract() const
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateFDiv(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateSIToFP(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFPToUI(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Value * CreateUIToFP(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Value * CreateFCmpOLT(Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Value * CreateNeg(Value *V, const Twine &Name="", bool HasNSW=false)
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LLVM_ABI CallInst * CreateUnaryIntrinsic(Intrinsic::ID ID, Value *V, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with 1 operand which is mangled on its type.
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
FastMathFlags getFastMathFlags() const
Get the flags to be applied to created floating point ops.
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateICmpUGE(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateFNeg(Value *V, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Value * CreateSExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a SExt or Trunc from the integer value V to DestTy.
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateFCmpOGE(Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateFPToSI(Value *V, Type *DestTy, const Twine &Name="")
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
LLVM_ABI void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
This class represents the LLVM 'select' instruction.
const Value * getFalseValue() const
const Value * getCondition() const
const Value * getTrueValue() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
LLVM_ABI unsigned getIntegerBitWidth() const
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
LLVM_ABI const fltSemantics & getFltSemantics() const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Type * getElementType() const
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
CmpClass_match< LHS, RHS, FCmpInst > m_FCmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::FSub > m_FSub(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
ap_match< APFloat > m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
cstfp_pred_ty< is_nonnan > m_NonNaN()
Match a non-NaN FP constant.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
This is an optimization pass for GlobalISel generic memory operations.
GenericUniformityInfo< SSAContext > UniformityInfo
FunctionAddr VTableAddr Value
LLVM_ABI KnownFPClass computeKnownFPClass(const Value *V, const APInt &DemandedElts, FPClassTest InterestedClasses, const SimplifyQuery &SQ, unsigned Depth=0)
Determine which floating-point classes are valid for V, and return them in KnownFPClass bit sets.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool expandRemainderUpTo64Bits(BinaryOperator *Rem)
Generate code to calculate the remainder of two integers, replacing Rem with the generated code.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
auto reverse(ContainerTy &&C)
LLVM_ABI bool expandDivisionUpTo64Bits(BinaryOperator *Div)
Generate code to divide two integers, replacing Div with the generated code.
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
FunctionPass * createAMDGPUCodeGenPreparePass()
To bit_cast(const From &from) noexcept
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
DWARFExpression::Operation Op
LLVM_ABI unsigned ComputeNumSignBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Return the number of times the sign bit of the register is replicated into the other bits.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool isKnownNeverNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
LLVM_ABI unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Get the upper bound on bit size for this Value Op as a signed integer.
LLVM_ABI bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, bool OrZero=false, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Return true if the given value is known to have exactly one bit set when defined.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, const LoopInfo *LI=nullptr, unsigned MaxLookup=MaxLookupSearchDepth)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
LLVM_ABI CGPassBuilderOption getCGPassBuilderOption()
static constexpr DenormalMode getPreserveSign()
bool isNonNegative() const
Returns true if this value is known to be non-negative.
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
bool isNegative() const
Returns true if this value is known to be negative.
bool isKnownNeverSubnormal() const
Return true if it's known this can never be a subnormal.