60#define DEBUG_TYPE "expand-ir-insts"
71 cl::desc(
"fp convert instructions on integers with "
72 "more than <N> bits are expanded."));
77 cl::desc(
"div and rem instructions on integers with "
78 "more than <N> bits are expanded."));
92 return Opcode == Instruction::SDiv || Opcode == Instruction::SRem;
105 "ShiftAmt out of range; callers should handle ShiftAmt == 0");
107 Value *Bias = Builder.CreateLShr(Sign,
BitWidth - ShiftAmt,
"bias");
108 return Builder.CreateAdd(
X, Bias,
"adjusted");
124 bool IsDiv = (Opcode == Instruction::UDiv || Opcode == Instruction::SDiv);
127 bool IsExact = IsDiv && BO->
isExact();
130 "Expected power-of-2 constant divisor");
135 unsigned BitWidth = Ty->getIntegerBitWidth();
137 APInt DivisorVal =
C->getValue();
138 bool IsNegativeDivisor = IsSigned && DivisorVal.
isNegative();
151 Result = IsNegativeDivisor ? Builder.CreateNeg(
X) :
X;
153 Result = ConstantInt::get(Ty, 0);
154 }
else if (IsSigned) {
160 X = Builder.CreateFreeze(
X,
X->getName() +
".fr");
164 Value *Quotient = Builder.CreateAShr(
165 Dividend, ShiftAmt, IsDiv && IsNegativeDivisor ?
"pre.neg" :
"shifted",
168 Result = IsNegativeDivisor ? Builder.CreateNeg(Quotient) : Quotient;
172 Value *Truncated = Builder.CreateShl(Quotient, ShiftAmt,
"truncated");
173 Result = Builder.CreateSub(
X, Truncated);
177 Result = Builder.CreateLShr(
X, ShiftAmt,
"", IsExact);
180 Result = Builder.CreateAnd(
X, ConstantInt::get(Ty, Mask));
221 static constexpr std::array<MVT, 3> ExpandableTypes{MVT::f16, MVT::f32,
225 static bool canExpandType(
Type *Ty) {
232 static bool shouldExpandFremType(
const TargetLowering &TLI, EVT VT) {
233 assert(!VT.
isVector() &&
"Cannot handle vector type; must scalarize first");
235 TargetLowering::LegalizeAction::Expand;
238 static bool shouldExpandFremType(
const TargetLowering &TLI,
Type *Ty) {
247 static bool shouldExpandAnyFremType(
const TargetLowering &TLI) {
248 return any_of(ExpandableTypes,
249 [&](MVT V) {
return shouldExpandFremType(TLI, EVT(V)); });
253 assert(canExpandType(Ty) &&
"Expected supported floating point type");
257 Type *ComputeTy = Ty;
261 unsigned MaxIter = 2;
269 unsigned Precision = APFloat::semanticsPrecision(Ty->
getFltSemantics());
270 return FRemExpander{B, Ty, Precision / MaxIter, ComputeTy};
286 : B(B), FremTy(FremTy), ComputeFpTy(ComputeFpTy), ExTy(B.getInt32Ty()),
287 Bits(ConstantInt::
get(ExTy, Bits)), One(ConstantInt::
get(ExTy, 1)) {}
289 Value *createRcp(
Value *V,
const Twine &Name)
const {
292 return B.CreateFDiv(ConstantFP::get(ComputeFpTy, 1.0), V, Name);
304 Value *Q = B.CreateUnaryIntrinsic(Intrinsic::rint, B.CreateFMul(Ax, Ayinv),
306 Value *AxUpdate = B.CreateFMA(B.CreateFNeg(Q), Ay, Ax, {},
"ax");
309 Value *Axp = B.CreateFAdd(AxUpdate, Ay,
"axp");
310 return B.CreateSelect(Clt, Axp, AxUpdate,
"ax");
316 std::pair<Value *, Value *> buildExpAndPower(
Value *Src,
Value *NewExp,
318 const Twine &PowName)
const {
322 Type *Ty = Src->getType();
323 Type *ExTy = B.getInt32Ty();
324 Value *Frexp = B.CreateIntrinsic(Intrinsic::frexp, {Ty, ExTy}, Src);
325 Value *Mant = B.CreateExtractValue(Frexp, {0});
326 Value *
Exp = B.CreateExtractValue(Frexp, {1});
328 Exp = B.CreateSub(Exp, One, ExName);
329 Value *
Pow = B.CreateLdexp(Mant, NewExp, {}, PowName);
338 void buildRemainderComputation(
Value *AxInitial,
Value *AyInitial,
Value *
X,
339 PHINode *RetPhi, FastMathFlags FMF)
const {
340 IRBuilder<>::FastMathFlagGuard Guard(B);
341 B.setFastMathFlags(FMF);
348 auto [Ax, Ex] = buildExpAndPower(AxInitial, Bits,
"ex",
"ax");
349 auto [Ay, Ey] = buildExpAndPower(AyInitial, One,
"ey",
"ay");
354 Value *Nb = B.CreateSub(Ex, Ey,
"nb");
355 Value *Ayinv = createRcp(Ay,
"ayinv");
371 B.SetInsertPoint(LoopBB);
372 PHINode *NbIv = B.CreatePHI(Nb->
getType(), 2,
"nb_iv");
375 auto *AxPhi = B.CreatePHI(ComputeFpTy, 2,
"ax_loop_phi");
376 AxPhi->addIncoming(Ax, PreheaderBB);
378 Value *AxPhiUpdate = buildUpdateAx(AxPhi, Ay, Ayinv);
379 AxPhiUpdate = B.CreateLdexp(AxPhiUpdate, Bits, {},
"ax_update");
380 AxPhi->addIncoming(AxPhiUpdate, LoopBB);
381 NbIv->
addIncoming(B.CreateSub(NbIv, Bits,
"nb_update"), LoopBB);
388 B.SetInsertPoint(ExitBB);
390 auto *AxPhiExit = B.CreatePHI(ComputeFpTy, 2,
"ax_exit_phi");
391 AxPhiExit->addIncoming(Ax, PreheaderBB);
392 AxPhiExit->addIncoming(AxPhi, LoopBB);
393 auto *NbExitPhi = B.CreatePHI(Nb->
getType(), 2,
"nb_exit_phi");
394 NbExitPhi->addIncoming(NbIv, LoopBB);
395 NbExitPhi->addIncoming(Nb, PreheaderBB);
397 Value *AxFinal = B.CreateLdexp(
398 AxPhiExit, B.CreateAdd(B.CreateSub(NbExitPhi, Bits), One), {},
"ax");
399 AxFinal = buildUpdateAx(AxFinal, Ay, Ayinv);
404 AxFinal = B.CreateLdexp(AxFinal, Ey, {},
"ax");
405 if (ComputeFpTy != FremTy)
406 AxFinal = B.CreateFPTrunc(AxFinal, FremTy);
407 Value *Ret = B.CreateCopySign(AxFinal,
X);
416 void buildElseBranch(
Value *Ax,
Value *Ay,
Value *
X, PHINode *RetPhi)
const {
420 Value *Ret = B.CreateSelect(B.CreateFCmpOEQ(Ax, Ay), ZeroWithXSign,
X);
428 std::optional<SimplifyQuery> &SQ,
440 Ret = B.CreateSelect(XFinite, Ret, Nan);
448 IRBuilder<>::FastMathFlagGuard Guard(
B);
453 B.clearFastMathFlags();
456 Value *Trunc =
B.CreateUnaryIntrinsic(Intrinsic::trunc, Quot, {});
457 Value *Neg =
B.CreateFNeg(Trunc);
459 return B.CreateFMA(Neg,
Y,
X);
463 std::optional<SimplifyQuery> &SQ)
const {
464 assert(
X->getType() == FremTy &&
Y->getType() == FremTy);
466 FastMathFlags FMF =
B.getFastMathFlags();
475 Value *Ax =
B.CreateFAbs(
X, {},
"ax");
476 Value *Ay =
B.CreateFAbs(
Y, {},
"ay");
477 if (ComputeFpTy !=
X->getType()) {
478 Ax =
B.CreateFPExt(Ax, ComputeFpTy,
"ax");
479 Ay =
B.CreateFPExt(Ay, ComputeFpTy,
"ay");
481 Value *AxAyCmp =
B.CreateFCmpOGT(Ax, Ay);
483 PHINode *RetPhi =
B.CreatePHI(FremTy, 2,
"ret");
489 Ret = handleInputCornerCases(Ret,
X,
Y, SQ, FMF.
noInfs());
496 auto SavedInsertPt =
B.GetInsertPoint();
504 FastMathFlags ComputeFMF = FMF;
508 B.SetInsertPoint(ThenBB);
509 buildRemainderComputation(Ax, Ay,
X, RetPhi, FMF);
513 B.SetInsertPoint(ElseBB);
514 buildElseBranch(Ax, Ay,
X, RetPhi);
517 B.SetInsertPoint(SavedInsertPt);
525 Type *Ty =
I.getType();
526 assert(FRemExpander::canExpandType(Ty) &&
527 "Expected supported floating point type");
535 B.setFastMathFlags(FMF);
536 B.SetCurrentDebugLocation(
I.getDebugLoc());
538 const FRemExpander Expander = FRemExpander::create(
B, Ty);
540 ? Expander.buildApproxFRem(
I.getOperand(0),
I.getOperand(1))
541 : Expander.buildFRem(
I.getOperand(0),
I.getOperand(1), SQ);
543 I.replaceAllUsesWith(Ret);
609 unsigned FPMantissaWidth = FloatVal->getType()->getFPMantissaWidth() - 1;
614 if (FloatVal->getType()->isHalfTy() &&
BitWidth >= 32) {
615 if (FPToI->
getOpcode() == Instruction::FPToUI) {
616 Value *A0 = Builder.CreateFPToUI(FloatVal, Builder.getInt32Ty());
617 A1 = Builder.CreateZExt(A0, IntTy);
619 Value *A0 = Builder.CreateFPToSI(FloatVal, Builder.getInt32Ty());
620 A1 = Builder.CreateSExt(A0, IntTy);
630 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
631 unsigned FloatWidth =
632 PowerOf2Ceil(FloatVal->getType()->getScalarSizeInBits());
633 unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1;
634 unsigned ExponentBias = (1 << (ExponentWidth - 1)) - 1;
636 Value *ImplicitBit = ConstantInt::get(
638 Value *SignificandMask = ConstantInt::get(
643 Entry->setName(
Twine(Entry->getName(),
"fp-to-i-entry"));
649 "fp-to-i-if-check.saturate",
F, End);
654 Builder.getContext(),
"fp-to-i-if-check.exp.size",
F, End);
660 Entry->getTerminator()->eraseFromParent();
663 Builder.SetInsertPoint(Entry);
666 FloatVal = Builder.CreateFreeze(FloatVal);
669 if (FloatVal->getType()->isX86_FP80Ty())
672 Value *ARep = Builder.CreateBitCast(FloatVal, FloatIntTy);
673 Value *PosOrNeg, *Sign;
677 Sign = Builder.CreateSelectWithUnknownProfile(
682 Builder.CreateLShr(ARep, Builder.getIntN(FloatWidth, FPMantissaWidth));
683 Value *BiasedExp = Builder.CreateAnd(
684 And, Builder.getIntN(FloatWidth, (1 << ExponentWidth) - 1),
"biased.exp");
685 Value *Abs = Builder.CreateAnd(ARep, SignificandMask);
686 Value *Significand = Builder.CreateOr(Abs, ImplicitBit,
"significand");
687 Value *ZeroResultCond = Builder.CreateICmpULT(
688 BiasedExp, Builder.getIntN(FloatWidth, ExponentBias),
"exp.is.negative");
690 Value *IsNaN = Builder.CreateFCmpUNO(FloatVal, FloatVal,
"is.nan");
691 ZeroResultCond = Builder.CreateOr(ZeroResultCond, IsNaN);
693 Value *IsNeg = Builder.CreateIsNeg(ARep);
694 ZeroResultCond = Builder.CreateOr(ZeroResultCond, IsNeg);
698 ZeroResultCond, End, IsSaturating ? CheckSaturateBB : CheckExpSizeBB);
706 Builder.SetInsertPoint(CheckSaturateBB);
712 uint64_t MaxBiasedExp = (1ULL << ExponentWidth) - 1;
713 if (SaturatingBiasedExp > MaxBiasedExp)
714 SaturatingBiasedExp = MaxBiasedExp;
715 Value *Cmp3 = Builder.CreateICmpUGE(
716 BiasedExp, ConstantInt::get(FloatIntTy, SaturatingBiasedExp));
717 Value *CondBrSat = Builder.CreateCondBr(Cmp3, SaturateBB, CheckExpSizeBB);
721 LLVMContext::MD_prof,
726 Builder.SetInsertPoint(SaturateBB);
733 Saturated = Builder.CreateSelectWithUnknownProfile(
734 PosOrNeg, SignedMax, SignedMin,
"saturated");
738 Builder.CreateBr(End);
742 Builder.SetInsertPoint(CheckExpSizeBB);
743 Value *ExpSmallerMantissaWidth = Builder.CreateICmpULT(
744 BiasedExp, Builder.getIntN(FloatWidth, ExponentBias + FPMantissaWidth),
745 "exp.smaller.mantissa.width");
749 Builder.CreateCondBr(ExpSmallerMantissaWidth, ExpSmallBB, ExpLargeBB);
755 Builder.SetInsertPoint(ExpSmallBB);
756 Value *Sub13 = Builder.CreateSub(
757 Builder.getIntN(FloatWidth, ExponentBias + FPMantissaWidth), BiasedExp);
759 Builder.CreateZExtOrTrunc(Builder.CreateLShr(Significand, Sub13), IntTy);
761 ExpSmallRes = Builder.CreateMul(ExpSmallRes, Sign);
762 Builder.CreateBr(End);
765 Builder.SetInsertPoint(ExpLargeBB);
766 Value *Sub15 = Builder.CreateAdd(
769 FloatIntTy, -
static_cast<int64_t
>(ExponentBias + FPMantissaWidth)));
770 Value *SignificandCast = Builder.CreateZExtOrTrunc(Significand, IntTy);
771 Value *ExpLargeRes = Builder.CreateShl(
772 SignificandCast, Builder.CreateZExtOrTrunc(Sub15, IntTy));
774 ExpLargeRes = Builder.CreateMul(ExpLargeRes, Sign);
775 Builder.CreateBr(End);
778 Builder.SetInsertPoint(End, End->
begin());
779 PHINode *Retval0 = Builder.CreatePHI(FPToI->
getType(), 3 + IsSaturating);
882 unsigned BitWidth = IntVal->getType()->getIntegerBitWidth();
886 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
889 FPMantissaWidth = FPMantissaWidth == 10 ? 23 : FPMantissaWidth;
890 FPMantissaWidth = FPMantissaWidth == 7 ? 23 : FPMantissaWidth;
892 bool IsSigned = IToFP->
getOpcode() == Instruction::SIToFP;
896 IntVal = Builder.CreateFreeze(IntVal);
902 IntTy = Builder.getIntNTy(
BitWidth);
903 IntVal = Builder.CreateIntCast(IntVal, IntTy, IsSigned);
907 Builder.CreateShl(Builder.getIntN(
BitWidth, 1),
908 Builder.getIntN(
BitWidth, FPMantissaWidth + 3));
912 Entry->setName(
Twine(Entry->getName(),
"itofp-entry"));
932 Entry->getTerminator()->eraseFromParent();
939 Builder.SetInsertPoint(Entry);
943 Value *CondBrEntry = Builder.CreateCondBr(Cmp, End, IfEnd);
946 LLVMContext::MD_prof,
951 Builder.SetInsertPoint(IfEnd);
954 Value *
Xor = Builder.CreateXor(Shr, IntVal);
956 Value *
Call = Builder.CreateCall(CTLZ, {IsSigned ?
Sub : IntVal, True});
957 Value *Cast = Builder.CreateTrunc(
Call, Builder.getInt32Ty());
958 int BitWidthNew = FloatWidth == 128 ?
BitWidth : 32;
959 Value *Sub1 = Builder.CreateSub(Builder.getIntN(BitWidthNew,
BitWidth),
960 FloatWidth == 128 ?
Call : Cast);
961 Value *Sub2 = Builder.CreateSub(Builder.getIntN(BitWidthNew,
BitWidth - 1),
962 FloatWidth == 128 ?
Call : Cast);
963 Value *Cmp3 = Builder.CreateICmpSGT(
964 Sub1, Builder.getIntN(BitWidthNew, FPMantissaWidth + 1));
968 Value *CondBrIfEnd = Builder.CreateCondBr(Cmp3, IfThen4, IfElse);
971 LLVMContext::MD_prof,
976 Builder.SetInsertPoint(IfThen4);
978 SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 2), SwBB);
979 SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 3), SwEpilog);
986 LLVMContext::MD_prof,
989 llvm::MDBuilder::kUnlikelyBranchWeight,
990 llvm::MDBuilder::kUnlikelyBranchWeight}));
994 Builder.SetInsertPoint(SwBB);
996 Builder.CreateShl(IsSigned ?
Sub : IntVal, Builder.getIntN(
BitWidth, 1));
997 Builder.CreateBr(SwEpilog);
1000 Builder.SetInsertPoint(SwDefault);
1001 Value *Sub5 = Builder.CreateSub(
1002 Builder.getIntN(BitWidthNew,
BitWidth - FPMantissaWidth - 3),
1003 FloatWidth == 128 ?
Call : Cast);
1004 Value *ShProm = Builder.CreateZExt(Sub5, IntTy);
1005 Value *Shr6 = Builder.CreateLShr(IsSigned ?
Sub : IntVal,
1006 FloatWidth == 128 ? Sub5 : ShProm);
1008 Builder.CreateAdd(FloatWidth == 128 ?
Call : Cast,
1009 Builder.getIntN(BitWidthNew, FPMantissaWidth + 3));
1010 Value *ShProm9 = Builder.CreateZExt(Sub8, IntTy);
1012 FloatWidth == 128 ? Sub8 : ShProm9);
1013 Value *
And = Builder.CreateAnd(Shr9, IsSigned ?
Sub : IntVal);
1015 Value *Conv11 = Builder.CreateZExt(Cmp10, IntTy);
1016 Value *
Or = Builder.CreateOr(Shr6, Conv11);
1017 Builder.CreateBr(SwEpilog);
1020 Builder.SetInsertPoint(SwEpilog);
1021 PHINode *AAddr0 = Builder.CreatePHI(IntTy, 3);
1025 Value *A0 = Builder.CreateTrunc(AAddr0, Builder.getInt32Ty());
1026 Value *A1 = Builder.CreateLShr(A0, Builder.getInt32(2));
1027 Value *A2 = Builder.CreateAnd(A1, Builder.getInt32(1));
1028 Value *Conv16 = Builder.CreateZExt(A2, IntTy);
1029 Value *Or17 = Builder.CreateOr(AAddr0, Conv16);
1030 Value *Inc = Builder.CreateAdd(Or17, Builder.getIntN(
BitWidth, 1));
1031 Value *Shr18 =
nullptr;
1033 Shr18 = Builder.CreateAShr(Inc, Builder.getIntN(
BitWidth, 2));
1035 Shr18 = Builder.CreateLShr(Inc, Builder.getIntN(
BitWidth, 2));
1036 Value *A3 = Builder.CreateAnd(Inc, Temp1,
"a3");
1037 Value *PosOrNeg = Builder.CreateICmpEQ(A3, Builder.getIntN(
BitWidth, 0));
1038 Value *ExtractT60 = Builder.CreateTrunc(Shr18, Builder.getIntNTy(FloatWidth));
1039 Value *Extract63 = Builder.CreateLShr(Shr18, Builder.getIntN(
BitWidth, 32));
1040 Value *ExtractT64 =
nullptr;
1041 if (FloatWidth > 80)
1042 ExtractT64 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());
1044 ExtractT64 = Builder.CreateTrunc(Extract63, Builder.getInt32Ty());
1047 Value *CondBrSwEpilog = Builder.CreateCondBr(PosOrNeg, IfEnd26, IfThen20);
1050 LLVMContext::MD_prof,
1055 Builder.SetInsertPoint(IfThen20);
1056 Value *Shr21 =
nullptr;
1058 Shr21 = Builder.CreateAShr(Inc, Builder.getIntN(
BitWidth, 3));
1060 Shr21 = Builder.CreateLShr(Inc, Builder.getIntN(
BitWidth, 3));
1061 Value *ExtractT = Builder.CreateTrunc(Shr21, Builder.getIntNTy(FloatWidth));
1062 Value *Extract = Builder.CreateLShr(Shr21, Builder.getIntN(
BitWidth, 32));
1063 Value *ExtractT62 =
nullptr;
1064 if (FloatWidth > 80)
1065 ExtractT62 = Builder.CreateTrunc(Sub1, Builder.getInt64Ty());
1067 ExtractT62 = Builder.CreateTrunc(Extract, Builder.getInt32Ty());
1068 Builder.CreateBr(IfEnd26);
1071 Builder.SetInsertPoint(IfElse);
1072 Value *Sub24 = Builder.CreateAdd(
1073 FloatWidth == 128 ?
Call : Cast,
1075 -(
int)(
BitWidth - FPMantissaWidth - 1)));
1076 Value *ShProm25 = Builder.CreateZExt(Sub24, IntTy);
1077 Value *Shl26 = Builder.CreateShl(IsSigned ?
Sub : IntVal,
1078 FloatWidth == 128 ? Sub24 : ShProm25);
1079 Value *ExtractT61 = Builder.CreateTrunc(Shl26, Builder.getIntNTy(FloatWidth));
1080 Value *Extract65 = Builder.CreateLShr(Shl26, Builder.getIntN(
BitWidth, 32));
1081 Value *ExtractT66 =
nullptr;
1082 if (FloatWidth > 80)
1083 ExtractT66 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());
1085 ExtractT66 = Builder.CreateTrunc(Extract65, Builder.getInt32Ty());
1086 Builder.CreateBr(IfEnd26);
1089 Builder.SetInsertPoint(IfEnd26);
1090 PHINode *AAddr1Off0 = Builder.CreatePHI(Builder.getIntNTy(FloatWidth), 3);
1094 PHINode *AAddr1Off32 =
nullptr;
1095 if (FloatWidth > 32) {
1097 Builder.CreatePHI(Builder.getIntNTy(FloatWidth > 80 ? 64 : 32), 3);
1103 if (FloatWidth <= 80) {
1104 E0 = Builder.CreatePHI(Builder.getIntNTy(BitWidthNew), 3);
1109 Value *And29 =
nullptr;
1110 if (FloatWidth > 80) {
1111 Value *Temp2 = Builder.CreateShl(Builder.getIntN(
BitWidth, 1),
1113 And29 = Builder.CreateAnd(Shr, Temp2,
"and29");
1115 Value *Conv28 = Builder.CreateTrunc(Shr, Builder.getInt32Ty());
1116 And29 = Builder.CreateAnd(
1119 unsigned TempMod = FPMantissaWidth % 32;
1120 Value *And34 =
nullptr;
1121 Value *Shl30 =
nullptr;
1122 if (FloatWidth > 80) {
1124 Value *
Add = Builder.CreateShl(AAddr1Off32, Builder.getInt64(TempMod));
1125 Shl30 = Builder.CreateAdd(
1126 Add, Builder.getInt64(((1ull << (62ull - TempMod)) - 1ull) << TempMod));
1127 And34 = Builder.CreateZExt(Shl30, Builder.getInt128Ty());
1129 Value *
Add = Builder.CreateShl(E0, Builder.getInt32(TempMod));
1130 Shl30 = Builder.CreateAdd(
1131 Add, Builder.getInt32(((1 << (30 - TempMod)) - 1) << TempMod));
1132 And34 = Builder.CreateAnd(FloatWidth > 32 ? AAddr1Off32 : AAddr1Off0,
1133 Builder.getInt32((1 << TempMod) - 1));
1135 Value *Or35 =
nullptr;
1136 if (FloatWidth > 80) {
1137 Value *And29Trunc = Builder.CreateTrunc(And29, Builder.getInt128Ty());
1138 Value *Or31 = Builder.CreateOr(And29Trunc, And34);
1139 Value *Or34 = Builder.CreateShl(Or31, Builder.getIntN(128, 64));
1140 Value *Temp3 = Builder.CreateShl(Builder.getIntN(128, 1),
1141 Builder.getIntN(128, FPMantissaWidth));
1142 Value *Temp4 = Builder.CreateSub(Temp3, Builder.getIntN(128, 1));
1143 Value *A6 = Builder.CreateAnd(AAddr1Off0, Temp4);
1144 Or35 = Builder.CreateOr(Or34, A6);
1146 Value *Or31 = Builder.CreateOr(And34, And29);
1147 Or35 = Builder.CreateOr(IsSigned ? Or31 : And34, Shl30);
1149 Value *A4 =
nullptr;
1151 Value *ZExt1 = Builder.CreateZExt(Or35, Builder.getIntNTy(FloatWidth));
1152 Value *Shl1 = Builder.CreateShl(ZExt1, Builder.getIntN(FloatWidth, 32));
1154 Builder.CreateAnd(AAddr1Off0, Builder.getIntN(FloatWidth, 0xFFFFFFFF));
1155 Value *Or1 = Builder.CreateOr(Shl1, And1);
1156 A4 = Builder.CreateBitCast(Or1, IToFP->
getType());
1160 A4 = Builder.CreateFPTrunc(A40, IToFP->
getType());
1166 A4 = Builder.CreateFPTrunc(A40, IToFP->
getType());
1168 A4 = Builder.CreateBitCast(Or35, IToFP->
getType());
1178 unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1;
1179 uint64_t MinInfExp = 1ULL << (ExponentWidth - 1);
1181 Value *MinInfExpVal = Builder.getIntN(BitWidthNew, MinInfExp);
1182 Value *Overflow = Builder.CreateICmpUGE(Sub2, MinInfExpVal);
1189 Inf = Builder.CreateSelectWithUnknownProfile(IsNeg, NegInf, Inf,
1192 A4 = Builder.CreateSelect(Overflow, Inf, A4);
1196 LLVMContext::MD_prof,
1200 Builder.CreateBr(End);
1203 Builder.SetInsertPoint(End, End->
begin());
1219 unsigned NumElements = VTy->getElementCount().getFixedValue();
1221 for (
unsigned Idx = 0; Idx < NumElements; ++Idx) {
1222 Value *Ext = Builder.CreateExtractElement(
I->getOperand(0), Idx);
1224 Value *NewOp =
nullptr;
1226 NewOp = Builder.CreateBinOp(
1227 BinOp->getOpcode(), Ext,
1228 Builder.CreateExtractElement(
I->getOperand(1), Idx));
1230 NewOp = Builder.CreateCast(CastI->getOpcode(), Ext,
1231 I->getType()->getScalarType());
1233 assert(
II->getIntrinsicID() == Intrinsic::fptoui_sat ||
1234 II->getIntrinsicID() == Intrinsic::fptosi_sat);
1235 NewOp = Builder.CreateIntrinsic(
I->getType()->getScalarType(),
1236 II->getIntrinsicID(), {Ext});
1240 Result = Builder.CreateInsertElement(Result, NewOp, Idx);
1242 ScalarizedI->copyIRFlags(
I,
true);
1247 I->replaceAllUsesWith(Result);
1248 I->dropAllReferences();
1249 I->eraseFromParent();
1254 if (
I.getOperand(0)->getType()->isVectorTy())
1264 unsigned MaxLegalFpConvertBitWidth =
1273 bool DisableExpandLargeFp =
1275 bool DisableExpandLargeDivRem =
1277 bool DisableFrem = !FRemExpander::shouldExpandAnyFremType(TLI);
1279 if (DisableExpandLargeFp && DisableFrem && DisableExpandLargeDivRem)
1283 Type *Ty =
I.getType();
1285 if (Ty->isScalableTy())
1288 switch (
I.getOpcode()) {
1289 case Instruction::FRem:
1290 return !DisableFrem && FRemExpander::shouldExpandFremType(TLI, Ty);
1291 case Instruction::FPToUI:
1292 case Instruction::FPToSI:
1293 return !DisableExpandLargeFp &&
1295 MaxLegalFpConvertBitWidth;
1296 case Instruction::UIToFP:
1297 case Instruction::SIToFP:
1298 return !DisableExpandLargeFp &&
1300 ->getIntegerBitWidth() > MaxLegalFpConvertBitWidth;
1301 case Instruction::UDiv:
1302 case Instruction::SDiv:
1303 case Instruction::URem:
1304 case Instruction::SRem:
1309 return !DisableExpandLargeDivRem &&
1311 MaxLegalDivRemBitWidth;
1312 case Instruction::Call: {
1314 if (
II && (
II->getIntrinsicID() == Intrinsic::fptoui_sat ||
1315 II->getIntrinsicID() == Intrinsic::fptosi_sat)) {
1316 return !DisableExpandLargeFp &&
1318 MaxLegalFpConvertBitWidth;
1330 if (!ShouldHandleInst(
I))
1337 while (!Worklist.
empty()) {
1340 switch (
I->getOpcode()) {
1341 case Instruction::FRem: {
1342 auto SQ = [&]() -> std::optional<SimplifyQuery> {
1344 auto Res = std::make_optional<SimplifyQuery>(
1345 I->getModule()->getDataLayout(),
I);
1356 case Instruction::FPToUI:
1359 case Instruction::FPToSI:
1363 case Instruction::UIToFP:
1364 case Instruction::SIToFP:
1368 case Instruction::UDiv:
1369 case Instruction::SDiv:
1370 case Instruction::URem:
1371 case Instruction::SRem: {
1378 unsigned Opc = BO->getOpcode();
1379 if (
Opc == Instruction::UDiv ||
Opc == Instruction::SDiv)
1386 case Instruction::Call: {
1388 assert(
II->getIntrinsicID() == Intrinsic::fptoui_sat ||
1389 II->getIntrinsicID() == Intrinsic::fptosi_sat);
1391 II->getIntrinsicID() == Intrinsic::fptosi_sat);
1401class ExpandIRInstsLegacyPass :
public FunctionPass {
1408 : FunctionPass(
ID), OptLevel(OptLevel) {}
1413 auto *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
1414 const TargetSubtargetInfo *Subtarget = TM->getSubtargetImpl(
F);
1415 auto *TLI = Subtarget->getTargetLowering();
1416 AssumptionCache *AC =
nullptr;
1418 const LibcallLoweringInfo &Libcalls =
1419 getAnalysis<LibcallLoweringInfoWrapper>().getLibcallLowering(
1420 *
F.getParent(), *Subtarget);
1422 if (OptLevel != CodeGenOptLevel::None && !
F.hasOptNone())
1423 AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
1424 return runImpl(
F, *TLI, Libcalls, AC);
1427 void getAnalysisUsage(AnalysisUsage &AU)
const override {
1430 if (OptLevel != CodeGenOptLevel::None)
1441 : TM(&TM), OptLevel(OptLevel) {}
1446 OS, MapClassName2PassName);
1448 OS <<
"O" << (int)OptLevel;
1465 if (!LibcallLowering) {
1467 "' analysis required");
1472 LibcallLowering->getLibcallLowering(*STI);
1478char ExpandIRInstsLegacyPass::ID = 0;
1480 "Expand certain fp instructions",
false,
false)
1486 return new ExpandIRInstsLegacyPass(OptLevel);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool runOnFunction(Function &F, bool PostInlining)
static bool expandFRem(BinaryOperator &I, std::optional< SimplifyQuery > &SQ)
static void expandIToFP(Instruction *IToFP)
Generate code to convert a fp number to integer, replacing S(U)IToFP with the generated code.
static cl::opt< unsigned > ExpandDivRemBits("expand-div-rem-bits", cl::Hidden, cl::init(IntegerType::MAX_INT_BITS), cl::desc("div and rem instructions on integers with " "more than <N> bits are expanded."))
static bool runImpl(Function &F, const TargetLowering &TLI, const LibcallLoweringInfo &Libcalls, AssumptionCache *AC)
static void expandPow2DivRem(BinaryOperator *BO)
Expand division or remainder by a power-of-2 constant.
static bool isSigned(unsigned Opcode)
static void addToWorklist(Instruction &I, SmallVector< Instruction *, 4 > &Worklist)
static Value * addSignedBias(IRBuilder<> &Builder, Value *X, unsigned BitWidth, unsigned ShiftAmt)
For signed div/rem by a power of 2, compute the bias-adjusted dividend: Sign = ashr X,...
static cl::opt< unsigned > ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden, cl::init(IntegerType::MAX_INT_BITS), cl::desc("fp convert instructions on integers with " "more than <N> bits are expanded."))
static void expandFPToI(Instruction *FPToI, bool IsSaturating, bool IsSigned)
Generate code to convert a fp number to integer, replacing FPToS(U)I with the generated code.
static bool isConstantPowerOfTwo(Value *V, bool SignedOp)
static void scalarize(Instruction *I, SmallVectorImpl< Instruction * > &Worklist)
This is the interface for a simple mod/ref and alias analysis over globals.
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file contains the declarations for profiling metadata utility functions.
This file defines the SmallVector class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
Class for arbitrary precision integers.
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
bool isNegative() const
Determine sign of this APInt.
unsigned countr_zero() const
Count the number of trailing zero bits.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
BinaryOps getOpcode() const
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ ICMP_SGT
signed greater than
static LLVM_ABI ConstantFP * getZero(Type *Ty, bool Negative=false)
static LLVM_ABI ConstantFP * getQNaN(Type *Ty, bool Negative=false, APInt *Payload=nullptr)
static LLVM_ABI ConstantFP * getInfinity(Type *Ty, bool Negative=false)
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI ExpandIRInstsPass(const TargetMachine &TM, CodeGenOptLevel OptLevel)
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
LLVM_ABI void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Convenience struct for specifying and reasoning about fast-math flags.
void setAllowContract(bool B=true)
void setAllowReciprocal(bool B=true)
void setNoNaNs(bool B=true)
void setNoInfs(bool B=true)
FunctionPass class - This class is used to implement most global optimizations.
Module * getParent()
Get the module that this global value is contained inside of...
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI bool isExact() const LLVM_READONLY
Determine whether the exact flag is set.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Class to represent integer types.
@ MAX_INT_BITS
Maximum number of bits that can be specified.
Tracks which library functions to use for a particular subtarget.
Record a mapping from subtarget to LibcallLoweringInfo.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
LLVM_ABI MDNode * createLikelyBranchWeights()
Return metadata containing two branch weights, with significant bias towards true destination.
LLVM_ABI MDNode * createUnlikelyBranchWeights()
Return metadata containing two branch weights, with significant bias towards false destination.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
unsigned getMaxDivRemBitWidthSupported() const
Returns the size in bits of the maximum div/rem the backend supports.
unsigned getMaxLargeFPConvertBitWidthSupported() const
Returns the size in bits of the maximum fp to/from int conversion the backend supports.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetLowering * getTargetLowering() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI unsigned getIntegerBitWidth() const
bool isX86_FP80Ty() const
Return true if this is x86 long double.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
static LLVM_ABI Type * getFP128Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
LLVM_ABI int getFPMantissaWidth() const
Return the width of the mantissa of this type.
LLVM_ABI const fltSemantics & getFltSemantics() const
void dropAllReferences()
Drop all references to operands.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
LLVM_ABI bool expandDivision(BinaryOperator *Div)
Generate code to divide two integers, replacing Div with the generated code.
cl::opt< bool > ProfcheckDisableMetadataFixes
LLVM_ABI bool isKnownNeverInfinity(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not an infinity or if the floating-point vector val...
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
LLVM_ABI void applyProfMetadataIfEnabled(Value *V, llvm::function_ref< void(Instruction *)> setMetadataCallback)
inst_iterator inst_begin(Function *F)
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
LLVM_ABI FunctionPass * createExpandIRInstsPass(CodeGenOptLevel)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
CodeGenOptLevel
Code generation optimization level.
inst_iterator inst_end(Function *F)
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
LLVM_ABI bool expandRemainder(BinaryOperator *Rem)
Generate code to calculate the remainder of two integers, replacing Rem with the generated code.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isVector() const
Return true if this is a vector value type.
A CRTP mix-in to automatically provide informational APIs needed for passes.