LLVM 20.0.0git
ExpandLargeFpConvert.cpp
Go to the documentation of this file.
1//===--- ExpandLargeFpConvert.cpp - Expand large fp convert----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9
10// This pass expands ‘fptoui .. to’, ‘fptosi .. to’, ‘uitofp .. to’,
11// ‘sitofp .. to’ instructions with a bitwidth above a threshold into
12// auto-generated functions. This is useful for targets like x86_64 that cannot
13// lower fp convertions with more than 128 bits.
14//
15//===----------------------------------------------------------------------===//
16
21#include "llvm/CodeGen/Passes.h"
25#include "llvm/IR/IRBuilder.h"
27#include "llvm/IR/PassManager.h"
29#include "llvm/Pass.h"
32
33using namespace llvm;
34
36 ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden,
38 cl::desc("fp convert instructions on integers with "
39 "more than <N> bits are expanded."));
40
41/// Generate code to convert a fp number to integer, replacing FPToS(U)I with
42/// the generated code. This currently generates code similarly to compiler-rt's
43/// implementations.
44///
45/// An example IR generated from compiler-rt/fixsfdi.c looks like below:
46/// define dso_local i64 @foo(float noundef %a) local_unnamed_addr #0 {
47/// entry:
48/// %0 = bitcast float %a to i32
49/// %conv.i = zext i32 %0 to i64
50/// %tobool.not = icmp sgt i32 %0, -1
51/// %conv = select i1 %tobool.not, i64 1, i64 -1
52/// %and = lshr i64 %conv.i, 23
53/// %shr = and i64 %and, 255
54/// %and2 = and i64 %conv.i, 8388607
55/// %or = or i64 %and2, 8388608
56/// %cmp = icmp ult i64 %shr, 127
57/// br i1 %cmp, label %cleanup, label %if.end
58///
59/// if.end: ; preds = %entry
60/// %sub = add nuw nsw i64 %shr, 4294967169
61/// %conv5 = and i64 %sub, 4294967232
62/// %cmp6.not = icmp eq i64 %conv5, 0
63/// br i1 %cmp6.not, label %if.end12, label %if.then8
64///
65/// if.then8: ; preds = %if.end
66/// %cond11 = select i1 %tobool.not, i64 9223372036854775807, i64 -9223372036854775808
67/// br label %cleanup
68///
69/// if.end12: ; preds = %if.end
70/// %cmp13 = icmp ult i64 %shr, 150
71/// br i1 %cmp13, label %if.then15, label %if.else
72///
73/// if.then15: ; preds = %if.end12
74/// %sub16 = sub nuw nsw i64 150, %shr
75/// %shr17 = lshr i64 %or, %sub16
76/// %mul = mul nsw i64 %shr17, %conv
77/// br label %cleanup
78///
79/// if.else: ; preds = %if.end12
80/// %sub18 = add nsw i64 %shr, -150
81/// %shl = shl i64 %or, %sub18
82/// %mul19 = mul nsw i64 %shl, %conv
83/// br label %cleanup
84///
85/// cleanup: ; preds = %entry, %if.else, %if.then15, %if.then8
86/// %retval.0 = phi i64 [ %cond11, %if.then8 ], [ %mul, %if.then15 ], [ %mul19, %if.else ], [ 0, %entry ]
87/// ret i64 %retval.0
88/// }
89///
90/// Replace fp to integer with generated code.
91static void expandFPToI(Instruction *FPToI) {
92 IRBuilder<> Builder(FPToI);
93 auto *FloatVal = FPToI->getOperand(0);
94 IntegerType *IntTy = cast<IntegerType>(FPToI->getType());
95
96 unsigned BitWidth = FPToI->getType()->getIntegerBitWidth();
97 unsigned FPMantissaWidth = FloatVal->getType()->getFPMantissaWidth() - 1;
98
99 // FIXME: fp16's range is covered by i32. So `fptoi half` can convert
100 // to i32 first following a sext/zext to target integer type.
101 Value *A1 = nullptr;
102 if (FloatVal->getType()->isHalfTy()) {
103 if (FPToI->getOpcode() == Instruction::FPToUI) {
104 Value *A0 = Builder.CreateFPToUI(FloatVal, Builder.getIntNTy(32));
105 A1 = Builder.CreateZExt(A0, IntTy);
106 } else { // FPToSI
107 Value *A0 = Builder.CreateFPToSI(FloatVal, Builder.getIntNTy(32));
108 A1 = Builder.CreateSExt(A0, IntTy);
109 }
110 FPToI->replaceAllUsesWith(A1);
111 FPToI->dropAllReferences();
112 FPToI->eraseFromParent();
113 return;
114 }
115
116 // fp80 conversion is implemented by fpext to fp128 first then do the
117 // conversion.
118 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
119 unsigned FloatWidth =
120 PowerOf2Ceil(FloatVal->getType()->getScalarSizeInBits());
121 unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1;
122 unsigned ExponentBias = (1 << (ExponentWidth - 1)) - 1;
123 Value *ImplicitBit = Builder.CreateShl(
124 Builder.getIntN(BitWidth, 1), Builder.getIntN(BitWidth, FPMantissaWidth));
125 Value *SignificandMask =
126 Builder.CreateSub(ImplicitBit, Builder.getIntN(BitWidth, 1));
127 Value *NegOne = Builder.CreateSExt(
128 ConstantInt::getSigned(Builder.getInt32Ty(), -1), IntTy);
129 Value *NegInf =
130 Builder.CreateShl(ConstantInt::getSigned(IntTy, 1),
131 ConstantInt::getSigned(IntTy, BitWidth - 1));
132
133 BasicBlock *Entry = Builder.GetInsertBlock();
134 Function *F = Entry->getParent();
135 Entry->setName(Twine(Entry->getName(), "fp-to-i-entry"));
136 BasicBlock *End =
137 Entry->splitBasicBlock(Builder.GetInsertPoint(), "fp-to-i-cleanup");
138 BasicBlock *IfEnd =
139 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-end", F, End);
140 BasicBlock *IfThen5 =
141 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-then5", F, End);
142 BasicBlock *IfEnd9 =
143 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-end9", F, End);
144 BasicBlock *IfThen12 =
145 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-then12", F, End);
146 BasicBlock *IfElse =
147 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-else", F, End);
148
149 Entry->getTerminator()->eraseFromParent();
150
151 // entry:
152 Builder.SetInsertPoint(Entry);
153 Value *FloatVal0 = FloatVal;
154 // fp80 conversion is implemented by fpext to fp128 first then do the
155 // conversion.
156 if (FloatVal->getType()->isX86_FP80Ty())
157 FloatVal0 =
158 Builder.CreateFPExt(FloatVal, Type::getFP128Ty(Builder.getContext()));
159 Value *ARep0 =
160 Builder.CreateBitCast(FloatVal0, Builder.getIntNTy(FloatWidth));
161 Value *ARep = Builder.CreateZExt(ARep0, FPToI->getType());
162 Value *PosOrNeg = Builder.CreateICmpSGT(
163 ARep0, ConstantInt::getSigned(Builder.getIntNTy(FloatWidth), -1));
164 Value *Sign = Builder.CreateSelect(PosOrNeg, ConstantInt::getSigned(IntTy, 1),
165 ConstantInt::getSigned(IntTy, -1));
166 Value *And =
167 Builder.CreateLShr(ARep, Builder.getIntN(BitWidth, FPMantissaWidth));
168 Value *And2 = Builder.CreateAnd(
169 And, Builder.getIntN(BitWidth, (1 << ExponentWidth) - 1));
170 Value *Abs = Builder.CreateAnd(ARep, SignificandMask);
171 Value *Or = Builder.CreateOr(Abs, ImplicitBit);
172 Value *Cmp =
173 Builder.CreateICmpULT(And2, Builder.getIntN(BitWidth, ExponentBias));
174 Builder.CreateCondBr(Cmp, End, IfEnd);
175
176 // if.end:
177 Builder.SetInsertPoint(IfEnd);
178 Value *Add1 = Builder.CreateAdd(
180 IntTy, -static_cast<int64_t>(ExponentBias + BitWidth)));
181 Value *Cmp3 = Builder.CreateICmpULT(
182 Add1, ConstantInt::getSigned(IntTy, -static_cast<int64_t>(BitWidth)));
183 Builder.CreateCondBr(Cmp3, IfThen5, IfEnd9);
184
185 // if.then5:
186 Builder.SetInsertPoint(IfThen5);
187 Value *PosInf = Builder.CreateXor(NegOne, NegInf);
188 Value *Cond8 = Builder.CreateSelect(PosOrNeg, PosInf, NegInf);
189 Builder.CreateBr(End);
190
191 // if.end9:
192 Builder.SetInsertPoint(IfEnd9);
193 Value *Cmp10 = Builder.CreateICmpULT(
194 And2, Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth));
195 Builder.CreateCondBr(Cmp10, IfThen12, IfElse);
196
197 // if.then12:
198 Builder.SetInsertPoint(IfThen12);
199 Value *Sub13 = Builder.CreateSub(
200 Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth), And2);
201 Value *Shr14 = Builder.CreateLShr(Or, Sub13);
202 Value *Mul = Builder.CreateMul(Shr14, Sign);
203 Builder.CreateBr(End);
204
205 // if.else:
206 Builder.SetInsertPoint(IfElse);
207 Value *Sub15 = Builder.CreateAdd(
209 IntTy, -static_cast<int64_t>(ExponentBias + FPMantissaWidth)));
210 Value *Shl = Builder.CreateShl(Or, Sub15);
211 Value *Mul16 = Builder.CreateMul(Shl, Sign);
212 Builder.CreateBr(End);
213
214 // cleanup:
215 Builder.SetInsertPoint(End, End->begin());
216 PHINode *Retval0 = Builder.CreatePHI(FPToI->getType(), 4);
217
218 Retval0->addIncoming(Cond8, IfThen5);
219 Retval0->addIncoming(Mul, IfThen12);
220 Retval0->addIncoming(Mul16, IfElse);
221 Retval0->addIncoming(Builder.getIntN(BitWidth, 0), Entry);
222
223 FPToI->replaceAllUsesWith(Retval0);
224 FPToI->dropAllReferences();
225 FPToI->eraseFromParent();
226}
227
228/// Generate code to convert a fp number to integer, replacing S(U)IToFP with
229/// the generated code. This currently generates code similarly to compiler-rt's
230/// implementations. This implementation has an implicit assumption that integer
231/// width is larger than fp.
232///
233/// An example IR generated from compiler-rt/floatdisf.c looks like below:
234/// define dso_local float @__floatdisf(i64 noundef %a) local_unnamed_addr #0 {
235/// entry:
236/// %cmp = icmp eq i64 %a, 0
237/// br i1 %cmp, label %return, label %if.end
238///
239/// if.end: ; preds = %entry
240/// %shr = ashr i64 %a, 63
241/// %xor = xor i64 %shr, %a
242/// %sub = sub nsw i64 %xor, %shr
243/// %0 = tail call i64 @llvm.ctlz.i64(i64 %sub, i1 true), !range !5
244/// %cast = trunc i64 %0 to i32
245/// %sub1 = sub nuw nsw i32 64, %cast
246/// %sub2 = xor i32 %cast, 63
247/// %cmp3 = icmp ult i32 %cast, 40
248/// br i1 %cmp3, label %if.then4, label %if.else
249///
250/// if.then4: ; preds = %if.end
251/// switch i32 %sub1, label %sw.default [
252/// i32 25, label %sw.bb
253/// i32 26, label %sw.epilog
254/// ]
255///
256/// sw.bb: ; preds = %if.then4
257/// %shl = shl i64 %sub, 1
258/// br label %sw.epilog
259///
260/// sw.default: ; preds = %if.then4
261/// %sub5 = sub nsw i64 38, %0
262/// %sh_prom = and i64 %sub5, 4294967295
263/// %shr6 = lshr i64 %sub, %sh_prom
264/// %shr9 = lshr i64 274877906943, %0
265/// %and = and i64 %shr9, %sub
266/// %cmp10 = icmp ne i64 %and, 0
267/// %conv11 = zext i1 %cmp10 to i64
268/// %or = or i64 %shr6, %conv11
269/// br label %sw.epilog
270///
271/// sw.epilog: ; preds = %sw.default, %if.then4, %sw.bb
272/// %a.addr.0 = phi i64 [ %or, %sw.default ], [ %sub, %if.then4 ], [ %shl, %sw.bb ]
273/// %1 = lshr i64 %a.addr.0, 2
274/// %2 = and i64 %1, 1
275/// %or16 = or i64 %2, %a.addr.0
276/// %inc = add nsw i64 %or16, 1
277/// %3 = and i64 %inc, 67108864
278/// %tobool.not = icmp eq i64 %3, 0
279/// %spec.select.v = select i1 %tobool.not, i64 2, i64 3
280/// %spec.select = ashr i64 %inc, %spec.select.v
281/// %spec.select56 = select i1 %tobool.not, i32 %sub2, i32 %sub1
282/// br label %if.end26
283///
284/// if.else: ; preds = %if.end
285/// %sub23 = add nuw nsw i64 %0, 4294967256
286/// %sh_prom24 = and i64 %sub23, 4294967295
287/// %shl25 = shl i64 %sub, %sh_prom24
288/// br label %if.end26
289///
290/// if.end26: ; preds = %sw.epilog, %if.else
291/// %a.addr.1 = phi i64 [ %shl25, %if.else ], [ %spec.select, %sw.epilog ]
292/// %e.0 = phi i32 [ %sub2, %if.else ], [ %spec.select56, %sw.epilog ]
293/// %conv27 = trunc i64 %shr to i32
294/// %and28 = and i32 %conv27, -2147483648
295/// %add = shl nuw nsw i32 %e.0, 23
296/// %shl29 = add nuw nsw i32 %add, 1065353216
297/// %conv31 = trunc i64 %a.addr.1 to i32
298/// %and32 = and i32 %conv31, 8388607
299/// %or30 = or i32 %and32, %and28
300/// %or33 = or i32 %or30, %shl29
301/// %4 = bitcast i32 %or33 to float
302/// br label %return
303///
304/// return: ; preds = %entry, %if.end26
305/// %retval.0 = phi float [ %4, %if.end26 ], [ 0.000000e+00, %entry ]
306/// ret float %retval.0
307/// }
308///
309/// Replace integer to fp with generated code.
310static void expandIToFP(Instruction *IToFP) {
311 IRBuilder<> Builder(IToFP);
312 auto *IntVal = IToFP->getOperand(0);
313 IntegerType *IntTy = cast<IntegerType>(IntVal->getType());
314
315 unsigned BitWidth = IntVal->getType()->getIntegerBitWidth();
316 unsigned FPMantissaWidth = IToFP->getType()->getFPMantissaWidth() - 1;
317 // fp80 conversion is implemented by conversion tp fp128 first following
318 // a fptrunc to fp80.
319 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
320 // FIXME: As there is no related builtins added in compliler-rt,
321 // here currently utilized the fp32 <-> fp16 lib calls to implement.
322 FPMantissaWidth = FPMantissaWidth == 10 ? 23 : FPMantissaWidth;
323 FPMantissaWidth = FPMantissaWidth == 7 ? 23 : FPMantissaWidth;
324 unsigned FloatWidth = PowerOf2Ceil(FPMantissaWidth);
325 bool IsSigned = IToFP->getOpcode() == Instruction::SIToFP;
326
327 assert(BitWidth > FloatWidth && "Unexpected conversion. expandIToFP() "
328 "assumes integer width is larger than fp.");
329
330 Value *Temp1 =
331 Builder.CreateShl(Builder.getIntN(BitWidth, 1),
332 Builder.getIntN(BitWidth, FPMantissaWidth + 3));
333
334 BasicBlock *Entry = Builder.GetInsertBlock();
335 Function *F = Entry->getParent();
336 Entry->setName(Twine(Entry->getName(), "itofp-entry"));
337 BasicBlock *End =
338 Entry->splitBasicBlock(Builder.GetInsertPoint(), "itofp-return");
339 BasicBlock *IfEnd =
340 BasicBlock::Create(Builder.getContext(), "itofp-if-end", F, End);
341 BasicBlock *IfThen4 =
342 BasicBlock::Create(Builder.getContext(), "itofp-if-then4", F, End);
343 BasicBlock *SwBB =
344 BasicBlock::Create(Builder.getContext(), "itofp-sw-bb", F, End);
345 BasicBlock *SwDefault =
346 BasicBlock::Create(Builder.getContext(), "itofp-sw-default", F, End);
347 BasicBlock *SwEpilog =
348 BasicBlock::Create(Builder.getContext(), "itofp-sw-epilog", F, End);
349 BasicBlock *IfThen20 =
350 BasicBlock::Create(Builder.getContext(), "itofp-if-then20", F, End);
351 BasicBlock *IfElse =
352 BasicBlock::Create(Builder.getContext(), "itofp-if-else", F, End);
353 BasicBlock *IfEnd26 =
354 BasicBlock::Create(Builder.getContext(), "itofp-if-end26", F, End);
355
356 Entry->getTerminator()->eraseFromParent();
357
358 Function *CTLZ =
359 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, IntTy);
360 ConstantInt *True = Builder.getTrue();
361
362 // entry:
363 Builder.SetInsertPoint(Entry);
364 Value *Cmp = Builder.CreateICmpEQ(IntVal, ConstantInt::getSigned(IntTy, 0));
365 Builder.CreateCondBr(Cmp, End, IfEnd);
366
367 // if.end:
368 Builder.SetInsertPoint(IfEnd);
369 Value *Shr =
370 Builder.CreateAShr(IntVal, Builder.getIntN(BitWidth, BitWidth - 1));
371 Value *Xor = Builder.CreateXor(Shr, IntVal);
372 Value *Sub = Builder.CreateSub(Xor, Shr);
373 Value *Call = Builder.CreateCall(CTLZ, {IsSigned ? Sub : IntVal, True});
374 Value *Cast = Builder.CreateTrunc(Call, Builder.getInt32Ty());
375 int BitWidthNew = FloatWidth == 128 ? BitWidth : 32;
376 Value *Sub1 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth),
377 FloatWidth == 128 ? Call : Cast);
378 Value *Sub2 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth - 1),
379 FloatWidth == 128 ? Call : Cast);
380 Value *Cmp3 = Builder.CreateICmpSGT(
381 Sub1, Builder.getIntN(BitWidthNew, FPMantissaWidth + 1));
382 Builder.CreateCondBr(Cmp3, IfThen4, IfElse);
383
384 // if.then4:
385 Builder.SetInsertPoint(IfThen4);
386 llvm::SwitchInst *SI = Builder.CreateSwitch(Sub1, SwDefault);
387 SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 2), SwBB);
388 SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 3), SwEpilog);
389
390 // sw.bb:
391 Builder.SetInsertPoint(SwBB);
392 Value *Shl =
393 Builder.CreateShl(IsSigned ? Sub : IntVal, Builder.getIntN(BitWidth, 1));
394 Builder.CreateBr(SwEpilog);
395
396 // sw.default:
397 Builder.SetInsertPoint(SwDefault);
398 Value *Sub5 = Builder.CreateSub(
399 Builder.getIntN(BitWidthNew, BitWidth - FPMantissaWidth - 3),
400 FloatWidth == 128 ? Call : Cast);
401 Value *ShProm = Builder.CreateZExt(Sub5, IntTy);
402 Value *Shr6 = Builder.CreateLShr(IsSigned ? Sub : IntVal,
403 FloatWidth == 128 ? Sub5 : ShProm);
404 Value *Sub8 =
405 Builder.CreateAdd(FloatWidth == 128 ? Call : Cast,
406 Builder.getIntN(BitWidthNew, FPMantissaWidth + 3));
407 Value *ShProm9 = Builder.CreateZExt(Sub8, IntTy);
408 Value *Shr9 = Builder.CreateLShr(ConstantInt::getSigned(IntTy, -1),
409 FloatWidth == 128 ? Sub8 : ShProm9);
410 Value *And = Builder.CreateAnd(Shr9, IsSigned ? Sub : IntVal);
411 Value *Cmp10 = Builder.CreateICmpNE(And, Builder.getIntN(BitWidth, 0));
412 Value *Conv11 = Builder.CreateZExt(Cmp10, IntTy);
413 Value *Or = Builder.CreateOr(Shr6, Conv11);
414 Builder.CreateBr(SwEpilog);
415
416 // sw.epilog:
417 Builder.SetInsertPoint(SwEpilog);
418 PHINode *AAddr0 = Builder.CreatePHI(IntTy, 3);
419 AAddr0->addIncoming(Or, SwDefault);
420 AAddr0->addIncoming(IsSigned ? Sub : IntVal, IfThen4);
421 AAddr0->addIncoming(Shl, SwBB);
422 Value *A0 = Builder.CreateTrunc(AAddr0, Builder.getInt32Ty());
423 Value *A1 = Builder.CreateLShr(A0, Builder.getIntN(32, 2));
424 Value *A2 = Builder.CreateAnd(A1, Builder.getIntN(32, 1));
425 Value *Conv16 = Builder.CreateZExt(A2, IntTy);
426 Value *Or17 = Builder.CreateOr(AAddr0, Conv16);
427 Value *Inc = Builder.CreateAdd(Or17, Builder.getIntN(BitWidth, 1));
428 Value *Shr18 = nullptr;
429 if (IsSigned)
430 Shr18 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 2));
431 else
432 Shr18 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 2));
433 Value *A3 = Builder.CreateAnd(Inc, Temp1, "a3");
434 Value *PosOrNeg = Builder.CreateICmpEQ(A3, Builder.getIntN(BitWidth, 0));
435 Value *ExtractT60 = Builder.CreateTrunc(Shr18, Builder.getIntNTy(FloatWidth));
436 Value *Extract63 = Builder.CreateLShr(Shr18, Builder.getIntN(BitWidth, 32));
437 Value *ExtractT64 = nullptr;
438 if (FloatWidth > 80)
439 ExtractT64 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());
440 else
441 ExtractT64 = Builder.CreateTrunc(Extract63, Builder.getInt32Ty());
442 Builder.CreateCondBr(PosOrNeg, IfEnd26, IfThen20);
443
444 // if.then20
445 Builder.SetInsertPoint(IfThen20);
446 Value *Shr21 = nullptr;
447 if (IsSigned)
448 Shr21 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 3));
449 else
450 Shr21 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 3));
451 Value *ExtractT = Builder.CreateTrunc(Shr21, Builder.getIntNTy(FloatWidth));
452 Value *Extract = Builder.CreateLShr(Shr21, Builder.getIntN(BitWidth, 32));
453 Value *ExtractT62 = nullptr;
454 if (FloatWidth > 80)
455 ExtractT62 = Builder.CreateTrunc(Sub1, Builder.getIntNTy(64));
456 else
457 ExtractT62 = Builder.CreateTrunc(Extract, Builder.getIntNTy(32));
458 Builder.CreateBr(IfEnd26);
459
460 // if.else:
461 Builder.SetInsertPoint(IfElse);
462 Value *Sub24 = Builder.CreateAdd(
463 FloatWidth == 128 ? Call : Cast,
464 ConstantInt::getSigned(Builder.getIntNTy(BitWidthNew),
465 -(BitWidth - FPMantissaWidth - 1)));
466 Value *ShProm25 = Builder.CreateZExt(Sub24, IntTy);
467 Value *Shl26 = Builder.CreateShl(IsSigned ? Sub : IntVal,
468 FloatWidth == 128 ? Sub24 : ShProm25);
469 Value *ExtractT61 = Builder.CreateTrunc(Shl26, Builder.getIntNTy(FloatWidth));
470 Value *Extract65 = Builder.CreateLShr(Shl26, Builder.getIntN(BitWidth, 32));
471 Value *ExtractT66 = nullptr;
472 if (FloatWidth > 80)
473 ExtractT66 = Builder.CreateTrunc(Sub2, Builder.getIntNTy(64));
474 else
475 ExtractT66 = Builder.CreateTrunc(Extract65, Builder.getInt32Ty());
476 Builder.CreateBr(IfEnd26);
477
478 // if.end26:
479 Builder.SetInsertPoint(IfEnd26);
480 PHINode *AAddr1Off0 = Builder.CreatePHI(Builder.getIntNTy(FloatWidth), 3);
481 AAddr1Off0->addIncoming(ExtractT, IfThen20);
482 AAddr1Off0->addIncoming(ExtractT60, SwEpilog);
483 AAddr1Off0->addIncoming(ExtractT61, IfElse);
484 PHINode *AAddr1Off32 = nullptr;
485 if (FloatWidth > 32) {
486 AAddr1Off32 =
487 Builder.CreatePHI(Builder.getIntNTy(FloatWidth > 80 ? 64 : 32), 3);
488 AAddr1Off32->addIncoming(ExtractT62, IfThen20);
489 AAddr1Off32->addIncoming(ExtractT64, SwEpilog);
490 AAddr1Off32->addIncoming(ExtractT66, IfElse);
491 }
492 PHINode *E0 = nullptr;
493 if (FloatWidth <= 80) {
494 E0 = Builder.CreatePHI(Builder.getIntNTy(BitWidthNew), 3);
495 E0->addIncoming(Sub1, IfThen20);
496 E0->addIncoming(Sub2, SwEpilog);
497 E0->addIncoming(Sub2, IfElse);
498 }
499 Value *And29 = nullptr;
500 if (FloatWidth > 80) {
501 Value *Temp2 = Builder.CreateShl(Builder.getIntN(BitWidth, 1),
502 Builder.getIntN(BitWidth, 63));
503 And29 = Builder.CreateAnd(Shr, Temp2, "and29");
504 } else {
505 Value *Conv28 = Builder.CreateTrunc(Shr, Builder.getIntNTy(32));
506 And29 = Builder.CreateAnd(
507 Conv28, ConstantInt::getSigned(Builder.getIntNTy(32), 0x80000000));
508 }
509 unsigned TempMod = FPMantissaWidth % 32;
510 Value *And34 = nullptr;
511 Value *Shl30 = nullptr;
512 if (FloatWidth > 80) {
513 TempMod += 32;
514 Value *Add = Builder.CreateShl(AAddr1Off32, Builder.getIntN(64, TempMod));
515 Shl30 = Builder.CreateAdd(
516 Add,
517 Builder.getIntN(64, ((1ull << (62ull - TempMod)) - 1ull) << TempMod));
518 And34 = Builder.CreateZExt(Shl30, Builder.getIntNTy(128));
519 } else {
520 Value *Add = Builder.CreateShl(E0, Builder.getIntN(32, TempMod));
521 Shl30 = Builder.CreateAdd(
522 Add, Builder.getIntN(32, ((1 << (30 - TempMod)) - 1) << TempMod));
523 And34 = Builder.CreateAnd(FloatWidth > 32 ? AAddr1Off32 : AAddr1Off0,
524 Builder.getIntN(32, (1 << TempMod) - 1));
525 }
526 Value *Or35 = nullptr;
527 if (FloatWidth > 80) {
528 Value *And29Trunc = Builder.CreateTrunc(And29, Builder.getIntNTy(128));
529 Value *Or31 = Builder.CreateOr(And29Trunc, And34);
530 Value *Or34 = Builder.CreateShl(Or31, Builder.getIntN(128, 64));
531 Value *Temp3 = Builder.CreateShl(Builder.getIntN(128, 1),
532 Builder.getIntN(128, FPMantissaWidth));
533 Value *Temp4 = Builder.CreateSub(Temp3, Builder.getIntN(128, 1));
534 Value *A6 = Builder.CreateAnd(AAddr1Off0, Temp4);
535 Or35 = Builder.CreateOr(Or34, A6);
536 } else {
537 Value *Or31 = Builder.CreateOr(And34, And29);
538 Or35 = Builder.CreateOr(IsSigned ? Or31 : And34, Shl30);
539 }
540 Value *A4 = nullptr;
541 if (IToFP->getType()->isDoubleTy()) {
542 Value *ZExt1 = Builder.CreateZExt(Or35, Builder.getIntNTy(FloatWidth));
543 Value *Shl1 = Builder.CreateShl(ZExt1, Builder.getIntN(FloatWidth, 32));
544 Value *And1 =
545 Builder.CreateAnd(AAddr1Off0, Builder.getIntN(FloatWidth, 0xFFFFFFFF));
546 Value *Or1 = Builder.CreateOr(Shl1, And1);
547 A4 = Builder.CreateBitCast(Or1, IToFP->getType());
548 } else if (IToFP->getType()->isX86_FP80Ty()) {
549 Value *A40 =
550 Builder.CreateBitCast(Or35, Type::getFP128Ty(Builder.getContext()));
551 A4 = Builder.CreateFPTrunc(A40, IToFP->getType());
552 } else if (IToFP->getType()->isHalfTy() || IToFP->getType()->isBFloatTy()) {
553 // Deal with "half" situation. This is a workaround since we don't have
554 // floattihf.c currently as referring.
555 Value *A40 =
556 Builder.CreateBitCast(Or35, Type::getFloatTy(Builder.getContext()));
557 A4 = Builder.CreateFPTrunc(A40, IToFP->getType());
558 } else // float type
559 A4 = Builder.CreateBitCast(Or35, IToFP->getType());
560 Builder.CreateBr(End);
561
562 // return:
563 Builder.SetInsertPoint(End, End->begin());
564 PHINode *Retval0 = Builder.CreatePHI(IToFP->getType(), 2);
565 Retval0->addIncoming(A4, IfEnd26);
566 Retval0->addIncoming(ConstantFP::getZero(IToFP->getType(), false), Entry);
567
568 IToFP->replaceAllUsesWith(Retval0);
569 IToFP->dropAllReferences();
570 IToFP->eraseFromParent();
571}
572
574 VectorType *VTy = cast<FixedVectorType>(I->getType());
575
576 IRBuilder<> Builder(I);
577
578 unsigned NumElements = VTy->getElementCount().getFixedValue();
579 Value *Result = PoisonValue::get(VTy);
580 for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
581 Value *Ext = Builder.CreateExtractElement(I->getOperand(0), Idx);
582 Value *Cast = Builder.CreateCast(cast<CastInst>(I)->getOpcode(), Ext,
583 I->getType()->getScalarType());
584 Result = Builder.CreateInsertElement(Result, Cast, Idx);
585 if (isa<Instruction>(Cast))
586 Replace.push_back(cast<Instruction>(Cast));
587 }
588 I->replaceAllUsesWith(Result);
589 I->dropAllReferences();
590 I->eraseFromParent();
591}
592
593static bool runImpl(Function &F, const TargetLowering &TLI) {
595 SmallVector<Instruction *, 4> ReplaceVector;
596 bool Modified = false;
597
598 unsigned MaxLegalFpConvertBitWidth =
601 MaxLegalFpConvertBitWidth = ExpandFpConvertBits;
602
603 if (MaxLegalFpConvertBitWidth >= llvm::IntegerType::MAX_INT_BITS)
604 return false;
605
606 for (auto &I : instructions(F)) {
607 switch (I.getOpcode()) {
608 case Instruction::FPToUI:
609 case Instruction::FPToSI: {
610 // TODO: This pass doesn't handle scalable vectors.
611 if (I.getOperand(0)->getType()->isScalableTy())
612 continue;
613
614 auto *IntTy = cast<IntegerType>(I.getType()->getScalarType());
615 if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth)
616 continue;
617
618 if (I.getOperand(0)->getType()->isVectorTy())
619 ReplaceVector.push_back(&I);
620 else
621 Replace.push_back(&I);
622 Modified = true;
623 break;
624 }
625 case Instruction::UIToFP:
626 case Instruction::SIToFP: {
627 // TODO: This pass doesn't handle scalable vectors.
628 if (I.getOperand(0)->getType()->isScalableTy())
629 continue;
630
631 auto *IntTy =
632 cast<IntegerType>(I.getOperand(0)->getType()->getScalarType());
633 if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth)
634 continue;
635
636 if (I.getOperand(0)->getType()->isVectorTy())
637 ReplaceVector.push_back(&I);
638 else
639 Replace.push_back(&I);
640 Modified = true;
641 break;
642 }
643 default:
644 break;
645 }
646 }
647
648 while (!ReplaceVector.empty()) {
649 Instruction *I = ReplaceVector.pop_back_val();
650 scalarize(I, Replace);
651 }
652
653 if (Replace.empty())
654 return false;
655
656 while (!Replace.empty()) {
657 Instruction *I = Replace.pop_back_val();
658 if (I->getOpcode() == Instruction::FPToUI ||
659 I->getOpcode() == Instruction::FPToSI) {
660 expandFPToI(I);
661 } else {
662 expandIToFP(I);
663 }
664 }
665
666 return Modified;
667}
668
669namespace {
670class ExpandLargeFpConvertLegacyPass : public FunctionPass {
671public:
672 static char ID;
673
674 ExpandLargeFpConvertLegacyPass() : FunctionPass(ID) {
677 }
678
679 bool runOnFunction(Function &F) override {
680 auto *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
681 auto *TLI = TM->getSubtargetImpl(F)->getTargetLowering();
682 return runImpl(F, *TLI);
683 }
684
685 void getAnalysisUsage(AnalysisUsage &AU) const override {
689 }
690};
691} // namespace
692
695 const TargetSubtargetInfo *STI = TM->getSubtargetImpl(F);
698}
699
700char ExpandLargeFpConvertLegacyPass::ID = 0;
701INITIALIZE_PASS_BEGIN(ExpandLargeFpConvertLegacyPass, "expand-large-fp-convert",
702 "Expand large fp convert", false, false)
703INITIALIZE_PASS_END(ExpandLargeFpConvertLegacyPass, "expand-large-fp-convert",
704 "Expand large fp convert", false, false)
705
707 return new ExpandLargeFpConvertLegacyPass();
708}
Expand Atomic instructions
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
bool End
Definition: ELF_riscv.cpp:480
static bool runImpl(Function &F, const TargetLowering &TLI)
static void expandIToFP(Instruction *IToFP)
Generate code to convert a fp number to integer, replacing S(U)IToFP with the generated code.
static void expandFPToI(Instruction *FPToI)
Generate code to convert a fp number to integer, replacing FPToS(U)I with the generated code.
static void scalarize(Instruction *I, SmallVectorImpl< Instruction * > &Replace)
expand large fp convert
static cl::opt< unsigned > ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden, cl::init(llvm::IntegerType::MAX_INT_BITS), cl::desc("fp convert instructions on integers with " "more than <N> bits are expanded."))
static bool runImpl(Function &F, const TargetLowering &TLI)
static Expected< BitVector > expand(StringRef S, StringRef Original)
Definition: GlobPattern.cpp:21
This is the interface for a simple mod/ref and alias analysis over globals.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
FunctionAnalysisManager FAM
const char LLVMTargetMachineRef TM
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file contains some functions that are useful when dealing with strings.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
BinaryOperator * Mul
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:212
static Constant * getZero(Type *Ty, bool Negative=false)
Definition: Constants.cpp:1038
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.h:124
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
Legacy wrapper pass to provide the GlobalsAAResult object.
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2265
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2480
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2468
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:536
Value * CreateICmpSGT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2273
Value * CreateFPTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2109
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:463
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1091
Value * CreateFPToUI(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2075
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:172
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2041
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1442
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:523
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:171
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:528
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2253
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2405
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition: IRBuilder.h:1148
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2249
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1349
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2135
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:494
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1125
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1421
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2029
LLVMContext & getContext() const
Definition: IRBuilder.h:173
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1480
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1332
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2015
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1502
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition: IRBuilder.h:1119
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2169
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:177
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2420
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1461
Value * CreateFPExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2118
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1524
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1366
Value * CreateFPToSI(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2082
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2674
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:92
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
Class to represent integer types.
Definition: DerivedTypes.h:40
@ MAX_INT_BITS
Maximum number of bits that can be specified.
Definition: DerivedTypes.h:52
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1852
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
bool empty() const
Definition: SmallVector.h:95
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:587
void push_back(const T &Elt)
Definition: SmallVector.h:427
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1210
Multiway switch.
unsigned getMaxLargeFPConvertBitWidthSupported() const
Returns the size in bits of the maximum larget fp convert the backend supports.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Target-Independent Code Generator Pass Configuration Options.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetLowering * getTargetLowering() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
unsigned getIntegerBitWidth() const
bool isX86_FP80Ty() const
Return true if this is x86 long double.
Definition: Type.h:159
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition: Type.h:145
static Type * getFP128Ty(LLVMContext &C)
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:142
int getFPMantissaWidth() const
Return the width of the mantissa of this type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:156
static Type * getFloatTy(LLVMContext &C)
void dropAllReferences()
Drop all references to operands.
Definition: User.h:299
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1539
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:394
FunctionPass * createExpandLargeFpConvertPass()
void initializeExpandLargeFpConvertLegacyPassPass(PassRegistry &)
@ Or
Bitwise or logical OR of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191