LLVM 19.0.0git
ExpandLargeFpConvert.cpp
Go to the documentation of this file.
1//===--- ExpandLargeFpConvert.cpp - Expand large fp convert----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9
10// This pass expands ‘fptoui .. to’, ‘fptosi .. to’, ‘uitofp .. to’,
11// ‘sitofp .. to’ instructions with a bitwidth above a threshold into
12// auto-generated functions. This is useful for targets like x86_64 that cannot
13// lower fp convertions with more than 128 bits.
14//
15//===----------------------------------------------------------------------===//
16
21#include "llvm/CodeGen/Passes.h"
25#include "llvm/IR/IRBuilder.h"
27#include "llvm/IR/PassManager.h"
29#include "llvm/Pass.h"
32
33using namespace llvm;
34
36 ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden,
38 cl::desc("fp convert instructions on integers with "
39 "more than <N> bits are expanded."));
40
41/// Generate code to convert a fp number to integer, replacing FPToS(U)I with
42/// the generated code. This currently generates code similarly to compiler-rt's
43/// implementations.
44///
45/// An example IR generated from compiler-rt/fixsfdi.c looks like below:
46/// define dso_local i64 @foo(float noundef %a) local_unnamed_addr #0 {
47/// entry:
48/// %0 = bitcast float %a to i32
49/// %conv.i = zext i32 %0 to i64
50/// %tobool.not = icmp sgt i32 %0, -1
51/// %conv = select i1 %tobool.not, i64 1, i64 -1
52/// %and = lshr i64 %conv.i, 23
53/// %shr = and i64 %and, 255
54/// %and2 = and i64 %conv.i, 8388607
55/// %or = or i64 %and2, 8388608
56/// %cmp = icmp ult i64 %shr, 127
57/// br i1 %cmp, label %cleanup, label %if.end
58///
59/// if.end: ; preds = %entry
60/// %sub = add nuw nsw i64 %shr, 4294967169
61/// %conv5 = and i64 %sub, 4294967232
62/// %cmp6.not = icmp eq i64 %conv5, 0
63/// br i1 %cmp6.not, label %if.end12, label %if.then8
64///
65/// if.then8: ; preds = %if.end
66/// %cond11 = select i1 %tobool.not, i64 9223372036854775807, i64 -9223372036854775808
67/// br label %cleanup
68///
69/// if.end12: ; preds = %if.end
70/// %cmp13 = icmp ult i64 %shr, 150
71/// br i1 %cmp13, label %if.then15, label %if.else
72///
73/// if.then15: ; preds = %if.end12
74/// %sub16 = sub nuw nsw i64 150, %shr
75/// %shr17 = lshr i64 %or, %sub16
76/// %mul = mul nsw i64 %shr17, %conv
77/// br label %cleanup
78///
79/// if.else: ; preds = %if.end12
80/// %sub18 = add nsw i64 %shr, -150
81/// %shl = shl i64 %or, %sub18
82/// %mul19 = mul nsw i64 %shl, %conv
83/// br label %cleanup
84///
85/// cleanup: ; preds = %entry, %if.else, %if.then15, %if.then8
86/// %retval.0 = phi i64 [ %cond11, %if.then8 ], [ %mul, %if.then15 ], [ %mul19, %if.else ], [ 0, %entry ]
87/// ret i64 %retval.0
88/// }
89///
90/// Replace fp to integer with generated code.
91static void expandFPToI(Instruction *FPToI) {
92 IRBuilder<> Builder(FPToI);
93 auto *FloatVal = FPToI->getOperand(0);
94 IntegerType *IntTy = cast<IntegerType>(FPToI->getType());
95
96 unsigned BitWidth = FPToI->getType()->getIntegerBitWidth();
97 unsigned FPMantissaWidth = FloatVal->getType()->getFPMantissaWidth() - 1;
98
99 // FIXME: fp16's range is covered by i32. So `fptoi half` can convert
100 // to i32 first following a sext/zext to target integer type.
101 Value *A1 = nullptr;
102 if (FloatVal->getType()->isHalfTy()) {
103 if (FPToI->getOpcode() == Instruction::FPToUI) {
104 Value *A0 = Builder.CreateFPToUI(FloatVal, Builder.getIntNTy(32));
105 A1 = Builder.CreateZExt(A0, IntTy);
106 } else { // FPToSI
107 Value *A0 = Builder.CreateFPToSI(FloatVal, Builder.getIntNTy(32));
108 A1 = Builder.CreateSExt(A0, IntTy);
109 }
110 FPToI->replaceAllUsesWith(A1);
111 FPToI->dropAllReferences();
112 FPToI->eraseFromParent();
113 return;
114 }
115
116 // fp80 conversion is implemented by fpext to fp128 first then do the
117 // conversion.
118 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
119 unsigned FloatWidth = PowerOf2Ceil(FPMantissaWidth);
120 unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1;
121 unsigned ExponentBias = (1 << (ExponentWidth - 1)) - 1;
122 Value *ImplicitBit = Builder.CreateShl(
123 Builder.getIntN(BitWidth, 1), Builder.getIntN(BitWidth, FPMantissaWidth));
124 Value *SignificandMask =
125 Builder.CreateSub(ImplicitBit, Builder.getIntN(BitWidth, 1));
126 Value *NegOne = Builder.CreateSExt(
127 ConstantInt::getSigned(Builder.getInt32Ty(), -1), IntTy);
128 Value *NegInf =
129 Builder.CreateShl(ConstantInt::getSigned(IntTy, 1),
130 ConstantInt::getSigned(IntTy, BitWidth - 1));
131
132 BasicBlock *Entry = Builder.GetInsertBlock();
133 Function *F = Entry->getParent();
134 Entry->setName(Twine(Entry->getName(), "fp-to-i-entry"));
135 BasicBlock *End =
136 Entry->splitBasicBlock(Builder.GetInsertPoint(), "fp-to-i-cleanup");
137 BasicBlock *IfEnd =
138 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-end", F, End);
139 BasicBlock *IfThen5 =
140 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-then5", F, End);
141 BasicBlock *IfEnd9 =
142 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-end9", F, End);
143 BasicBlock *IfThen12 =
144 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-then12", F, End);
145 BasicBlock *IfElse =
146 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-else", F, End);
147
148 Entry->getTerminator()->eraseFromParent();
149
150 // entry:
151 Builder.SetInsertPoint(Entry);
152 Value *FloatVal0 = FloatVal;
153 // fp80 conversion is implemented by fpext to fp128 first then do the
154 // conversion.
155 if (FloatVal->getType()->isX86_FP80Ty())
156 FloatVal0 =
157 Builder.CreateFPExt(FloatVal, Type::getFP128Ty(Builder.getContext()));
158 Value *ARep0 =
159 Builder.CreateBitCast(FloatVal0, Builder.getIntNTy(FloatWidth));
160 Value *ARep = Builder.CreateZExt(ARep0, FPToI->getType());
161 Value *PosOrNeg = Builder.CreateICmpSGT(
162 ARep0, ConstantInt::getSigned(Builder.getIntNTy(FloatWidth), -1));
163 Value *Sign = Builder.CreateSelect(PosOrNeg, ConstantInt::getSigned(IntTy, 1),
164 ConstantInt::getSigned(IntTy, -1));
165 Value *And =
166 Builder.CreateLShr(ARep, Builder.getIntN(BitWidth, FPMantissaWidth));
167 Value *And2 = Builder.CreateAnd(
168 And, Builder.getIntN(BitWidth, (1 << ExponentWidth) - 1));
169 Value *Abs = Builder.CreateAnd(ARep, SignificandMask);
170 Value *Or = Builder.CreateOr(Abs, ImplicitBit);
171 Value *Cmp =
172 Builder.CreateICmpULT(And2, Builder.getIntN(BitWidth, ExponentBias));
173 Builder.CreateCondBr(Cmp, End, IfEnd);
174
175 // if.end:
176 Builder.SetInsertPoint(IfEnd);
177 Value *Add1 = Builder.CreateAdd(
179 IntTy, -static_cast<int64_t>(ExponentBias + BitWidth)));
180 Value *Cmp3 = Builder.CreateICmpULT(
181 Add1, ConstantInt::getSigned(IntTy, -static_cast<int64_t>(BitWidth)));
182 Builder.CreateCondBr(Cmp3, IfThen5, IfEnd9);
183
184 // if.then5:
185 Builder.SetInsertPoint(IfThen5);
186 Value *PosInf = Builder.CreateXor(NegOne, NegInf);
187 Value *Cond8 = Builder.CreateSelect(PosOrNeg, PosInf, NegInf);
188 Builder.CreateBr(End);
189
190 // if.end9:
191 Builder.SetInsertPoint(IfEnd9);
192 Value *Cmp10 = Builder.CreateICmpULT(
193 And2, Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth));
194 Builder.CreateCondBr(Cmp10, IfThen12, IfElse);
195
196 // if.then12:
197 Builder.SetInsertPoint(IfThen12);
198 Value *Sub13 = Builder.CreateSub(
199 Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth), And2);
200 Value *Shr14 = Builder.CreateLShr(Or, Sub13);
201 Value *Mul = Builder.CreateMul(Shr14, Sign);
202 Builder.CreateBr(End);
203
204 // if.else:
205 Builder.SetInsertPoint(IfElse);
206 Value *Sub15 = Builder.CreateAdd(
208 IntTy, -static_cast<int64_t>(ExponentBias + FPMantissaWidth)));
209 Value *Shl = Builder.CreateShl(Or, Sub15);
210 Value *Mul16 = Builder.CreateMul(Shl, Sign);
211 Builder.CreateBr(End);
212
213 // cleanup:
214 Builder.SetInsertPoint(End, End->begin());
215 PHINode *Retval0 = Builder.CreatePHI(FPToI->getType(), 4);
216
217 Retval0->addIncoming(Cond8, IfThen5);
218 Retval0->addIncoming(Mul, IfThen12);
219 Retval0->addIncoming(Mul16, IfElse);
220 Retval0->addIncoming(Builder.getIntN(BitWidth, 0), Entry);
221
222 FPToI->replaceAllUsesWith(Retval0);
223 FPToI->dropAllReferences();
224 FPToI->eraseFromParent();
225}
226
227/// Generate code to convert a fp number to integer, replacing S(U)IToFP with
228/// the generated code. This currently generates code similarly to compiler-rt's
229/// implementations. This implementation has an implicit assumption that integer
230/// width is larger than fp.
231///
232/// An example IR generated from compiler-rt/floatdisf.c looks like below:
233/// define dso_local float @__floatdisf(i64 noundef %a) local_unnamed_addr #0 {
234/// entry:
235/// %cmp = icmp eq i64 %a, 0
236/// br i1 %cmp, label %return, label %if.end
237///
238/// if.end: ; preds = %entry
239/// %shr = ashr i64 %a, 63
240/// %xor = xor i64 %shr, %a
241/// %sub = sub nsw i64 %xor, %shr
242/// %0 = tail call i64 @llvm.ctlz.i64(i64 %sub, i1 true), !range !5
243/// %cast = trunc i64 %0 to i32
244/// %sub1 = sub nuw nsw i32 64, %cast
245/// %sub2 = xor i32 %cast, 63
246/// %cmp3 = icmp ult i32 %cast, 40
247/// br i1 %cmp3, label %if.then4, label %if.else
248///
249/// if.then4: ; preds = %if.end
250/// switch i32 %sub1, label %sw.default [
251/// i32 25, label %sw.bb
252/// i32 26, label %sw.epilog
253/// ]
254///
255/// sw.bb: ; preds = %if.then4
256/// %shl = shl i64 %sub, 1
257/// br label %sw.epilog
258///
259/// sw.default: ; preds = %if.then4
260/// %sub5 = sub nsw i64 38, %0
261/// %sh_prom = and i64 %sub5, 4294967295
262/// %shr6 = lshr i64 %sub, %sh_prom
263/// %shr9 = lshr i64 274877906943, %0
264/// %and = and i64 %shr9, %sub
265/// %cmp10 = icmp ne i64 %and, 0
266/// %conv11 = zext i1 %cmp10 to i64
267/// %or = or i64 %shr6, %conv11
268/// br label %sw.epilog
269///
270/// sw.epilog: ; preds = %sw.default, %if.then4, %sw.bb
271/// %a.addr.0 = phi i64 [ %or, %sw.default ], [ %sub, %if.then4 ], [ %shl, %sw.bb ]
272/// %1 = lshr i64 %a.addr.0, 2
273/// %2 = and i64 %1, 1
274/// %or16 = or i64 %2, %a.addr.0
275/// %inc = add nsw i64 %or16, 1
276/// %3 = and i64 %inc, 67108864
277/// %tobool.not = icmp eq i64 %3, 0
278/// %spec.select.v = select i1 %tobool.not, i64 2, i64 3
279/// %spec.select = ashr i64 %inc, %spec.select.v
280/// %spec.select56 = select i1 %tobool.not, i32 %sub2, i32 %sub1
281/// br label %if.end26
282///
283/// if.else: ; preds = %if.end
284/// %sub23 = add nuw nsw i64 %0, 4294967256
285/// %sh_prom24 = and i64 %sub23, 4294967295
286/// %shl25 = shl i64 %sub, %sh_prom24
287/// br label %if.end26
288///
289/// if.end26: ; preds = %sw.epilog, %if.else
290/// %a.addr.1 = phi i64 [ %shl25, %if.else ], [ %spec.select, %sw.epilog ]
291/// %e.0 = phi i32 [ %sub2, %if.else ], [ %spec.select56, %sw.epilog ]
292/// %conv27 = trunc i64 %shr to i32
293/// %and28 = and i32 %conv27, -2147483648
294/// %add = shl nuw nsw i32 %e.0, 23
295/// %shl29 = add nuw nsw i32 %add, 1065353216
296/// %conv31 = trunc i64 %a.addr.1 to i32
297/// %and32 = and i32 %conv31, 8388607
298/// %or30 = or i32 %and32, %and28
299/// %or33 = or i32 %or30, %shl29
300/// %4 = bitcast i32 %or33 to float
301/// br label %return
302///
303/// return: ; preds = %entry, %if.end26
304/// %retval.0 = phi float [ %4, %if.end26 ], [ 0.000000e+00, %entry ]
305/// ret float %retval.0
306/// }
307///
308/// Replace integer to fp with generated code.
309static void expandIToFP(Instruction *IToFP) {
310 IRBuilder<> Builder(IToFP);
311 auto *IntVal = IToFP->getOperand(0);
312 IntegerType *IntTy = cast<IntegerType>(IntVal->getType());
313
314 unsigned BitWidth = IntVal->getType()->getIntegerBitWidth();
315 unsigned FPMantissaWidth = IToFP->getType()->getFPMantissaWidth() - 1;
316 // fp80 conversion is implemented by conversion tp fp128 first following
317 // a fptrunc to fp80.
318 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
319 // FIXME: As there is no related builtins added in compliler-rt,
320 // here currently utilized the fp32 <-> fp16 lib calls to implement.
321 FPMantissaWidth = FPMantissaWidth == 10 ? 23 : FPMantissaWidth;
322 unsigned FloatWidth = PowerOf2Ceil(FPMantissaWidth);
323 bool IsSigned = IToFP->getOpcode() == Instruction::SIToFP;
324
325 assert(BitWidth > FloatWidth && "Unexpected conversion. expandIToFP() "
326 "assumes integer width is larger than fp.");
327
328 Value *Temp1 =
329 Builder.CreateShl(Builder.getIntN(BitWidth, 1),
330 Builder.getIntN(BitWidth, FPMantissaWidth + 3));
331
332 BasicBlock *Entry = Builder.GetInsertBlock();
333 Function *F = Entry->getParent();
334 Entry->setName(Twine(Entry->getName(), "itofp-entry"));
335 BasicBlock *End =
336 Entry->splitBasicBlock(Builder.GetInsertPoint(), "itofp-return");
337 BasicBlock *IfEnd =
338 BasicBlock::Create(Builder.getContext(), "itofp-if-end", F, End);
339 BasicBlock *IfThen4 =
340 BasicBlock::Create(Builder.getContext(), "itofp-if-then4", F, End);
341 BasicBlock *SwBB =
342 BasicBlock::Create(Builder.getContext(), "itofp-sw-bb", F, End);
343 BasicBlock *SwDefault =
344 BasicBlock::Create(Builder.getContext(), "itofp-sw-default", F, End);
345 BasicBlock *SwEpilog =
346 BasicBlock::Create(Builder.getContext(), "itofp-sw-epilog", F, End);
347 BasicBlock *IfThen20 =
348 BasicBlock::Create(Builder.getContext(), "itofp-if-then20", F, End);
349 BasicBlock *IfElse =
350 BasicBlock::Create(Builder.getContext(), "itofp-if-else", F, End);
351 BasicBlock *IfEnd26 =
352 BasicBlock::Create(Builder.getContext(), "itofp-if-end26", F, End);
353
354 Entry->getTerminator()->eraseFromParent();
355
356 Function *CTLZ =
357 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, IntTy);
358 ConstantInt *True = Builder.getTrue();
359
360 // entry:
361 Builder.SetInsertPoint(Entry);
362 Value *Cmp = Builder.CreateICmpEQ(IntVal, ConstantInt::getSigned(IntTy, 0));
363 Builder.CreateCondBr(Cmp, End, IfEnd);
364
365 // if.end:
366 Builder.SetInsertPoint(IfEnd);
367 Value *Shr =
368 Builder.CreateAShr(IntVal, Builder.getIntN(BitWidth, BitWidth - 1));
369 Value *Xor = Builder.CreateXor(Shr, IntVal);
370 Value *Sub = Builder.CreateSub(Xor, Shr);
371 Value *Call = Builder.CreateCall(CTLZ, {IsSigned ? Sub : IntVal, True});
372 Value *Cast = Builder.CreateTrunc(Call, Builder.getInt32Ty());
373 int BitWidthNew = FloatWidth == 128 ? BitWidth : 32;
374 Value *Sub1 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth),
375 FloatWidth == 128 ? Call : Cast);
376 Value *Sub2 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth - 1),
377 FloatWidth == 128 ? Call : Cast);
378 Value *Cmp3 = Builder.CreateICmpSGT(
379 Sub1, Builder.getIntN(BitWidthNew, FPMantissaWidth + 1));
380 Builder.CreateCondBr(Cmp3, IfThen4, IfElse);
381
382 // if.then4:
383 Builder.SetInsertPoint(IfThen4);
384 llvm::SwitchInst *SI = Builder.CreateSwitch(Sub1, SwDefault);
385 SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 2), SwBB);
386 SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 3), SwEpilog);
387
388 // sw.bb:
389 Builder.SetInsertPoint(SwBB);
390 Value *Shl =
391 Builder.CreateShl(IsSigned ? Sub : IntVal, Builder.getIntN(BitWidth, 1));
392 Builder.CreateBr(SwEpilog);
393
394 // sw.default:
395 Builder.SetInsertPoint(SwDefault);
396 Value *Sub5 = Builder.CreateSub(
397 Builder.getIntN(BitWidthNew, BitWidth - FPMantissaWidth - 3),
398 FloatWidth == 128 ? Call : Cast);
399 Value *ShProm = Builder.CreateZExt(Sub5, IntTy);
400 Value *Shr6 = Builder.CreateLShr(IsSigned ? Sub : IntVal,
401 FloatWidth == 128 ? Sub5 : ShProm);
402 Value *Sub8 =
403 Builder.CreateAdd(FloatWidth == 128 ? Call : Cast,
404 Builder.getIntN(BitWidthNew, FPMantissaWidth + 3));
405 Value *ShProm9 = Builder.CreateZExt(Sub8, IntTy);
406 Value *Shr9 = Builder.CreateLShr(ConstantInt::getSigned(IntTy, -1),
407 FloatWidth == 128 ? Sub8 : ShProm9);
408 Value *And = Builder.CreateAnd(Shr9, IsSigned ? Sub : IntVal);
409 Value *Cmp10 = Builder.CreateICmpNE(And, Builder.getIntN(BitWidth, 0));
410 Value *Conv11 = Builder.CreateZExt(Cmp10, IntTy);
411 Value *Or = Builder.CreateOr(Shr6, Conv11);
412 Builder.CreateBr(SwEpilog);
413
414 // sw.epilog:
415 Builder.SetInsertPoint(SwEpilog);
416 PHINode *AAddr0 = Builder.CreatePHI(IntTy, 3);
417 AAddr0->addIncoming(Or, SwDefault);
418 AAddr0->addIncoming(IsSigned ? Sub : IntVal, IfThen4);
419 AAddr0->addIncoming(Shl, SwBB);
420 Value *A0 = Builder.CreateTrunc(AAddr0, Builder.getInt32Ty());
421 Value *A1 = Builder.CreateLShr(A0, Builder.getIntN(32, 2));
422 Value *A2 = Builder.CreateAnd(A1, Builder.getIntN(32, 1));
423 Value *Conv16 = Builder.CreateZExt(A2, IntTy);
424 Value *Or17 = Builder.CreateOr(AAddr0, Conv16);
425 Value *Inc = Builder.CreateAdd(Or17, Builder.getIntN(BitWidth, 1));
426 Value *Shr18 = nullptr;
427 if (IsSigned)
428 Shr18 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 2));
429 else
430 Shr18 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 2));
431 Value *A3 = Builder.CreateAnd(Inc, Temp1, "a3");
432 Value *PosOrNeg = Builder.CreateICmpEQ(A3, Builder.getIntN(BitWidth, 0));
433 Value *ExtractT60 = Builder.CreateTrunc(Shr18, Builder.getIntNTy(FloatWidth));
434 Value *Extract63 = Builder.CreateLShr(Shr18, Builder.getIntN(BitWidth, 32));
435 Value *ExtractT64 = nullptr;
436 if (FloatWidth > 80)
437 ExtractT64 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());
438 else
439 ExtractT64 = Builder.CreateTrunc(Extract63, Builder.getInt32Ty());
440 Builder.CreateCondBr(PosOrNeg, IfEnd26, IfThen20);
441
442 // if.then20
443 Builder.SetInsertPoint(IfThen20);
444 Value *Shr21 = nullptr;
445 if (IsSigned)
446 Shr21 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 3));
447 else
448 Shr21 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 3));
449 Value *ExtractT = Builder.CreateTrunc(Shr21, Builder.getIntNTy(FloatWidth));
450 Value *Extract = Builder.CreateLShr(Shr21, Builder.getIntN(BitWidth, 32));
451 Value *ExtractT62 = nullptr;
452 if (FloatWidth > 80)
453 ExtractT62 = Builder.CreateTrunc(Sub1, Builder.getIntNTy(64));
454 else
455 ExtractT62 = Builder.CreateTrunc(Extract, Builder.getIntNTy(32));
456 Builder.CreateBr(IfEnd26);
457
458 // if.else:
459 Builder.SetInsertPoint(IfElse);
460 Value *Sub24 = Builder.CreateAdd(
461 FloatWidth == 128 ? Call : Cast,
462 ConstantInt::getSigned(Builder.getIntNTy(BitWidthNew),
463 -(BitWidth - FPMantissaWidth - 1)));
464 Value *ShProm25 = Builder.CreateZExt(Sub24, IntTy);
465 Value *Shl26 = Builder.CreateShl(IsSigned ? Sub : IntVal,
466 FloatWidth == 128 ? Sub24 : ShProm25);
467 Value *ExtractT61 = Builder.CreateTrunc(Shl26, Builder.getIntNTy(FloatWidth));
468 Value *Extract65 = Builder.CreateLShr(Shl26, Builder.getIntN(BitWidth, 32));
469 Value *ExtractT66 = nullptr;
470 if (FloatWidth > 80)
471 ExtractT66 = Builder.CreateTrunc(Sub2, Builder.getIntNTy(64));
472 else
473 ExtractT66 = Builder.CreateTrunc(Extract65, Builder.getInt32Ty());
474 Builder.CreateBr(IfEnd26);
475
476 // if.end26:
477 Builder.SetInsertPoint(IfEnd26);
478 PHINode *AAddr1Off0 = Builder.CreatePHI(Builder.getIntNTy(FloatWidth), 3);
479 AAddr1Off0->addIncoming(ExtractT, IfThen20);
480 AAddr1Off0->addIncoming(ExtractT60, SwEpilog);
481 AAddr1Off0->addIncoming(ExtractT61, IfElse);
482 PHINode *AAddr1Off32 = nullptr;
483 if (FloatWidth > 32) {
484 AAddr1Off32 =
485 Builder.CreatePHI(Builder.getIntNTy(FloatWidth > 80 ? 64 : 32), 3);
486 AAddr1Off32->addIncoming(ExtractT62, IfThen20);
487 AAddr1Off32->addIncoming(ExtractT64, SwEpilog);
488 AAddr1Off32->addIncoming(ExtractT66, IfElse);
489 }
490 PHINode *E0 = nullptr;
491 if (FloatWidth <= 80) {
492 E0 = Builder.CreatePHI(Builder.getIntNTy(BitWidthNew), 3);
493 E0->addIncoming(Sub1, IfThen20);
494 E0->addIncoming(Sub2, SwEpilog);
495 E0->addIncoming(Sub2, IfElse);
496 }
497 Value *And29 = nullptr;
498 if (FloatWidth > 80) {
499 Value *Temp2 = Builder.CreateShl(Builder.getIntN(BitWidth, 1),
500 Builder.getIntN(BitWidth, 63));
501 And29 = Builder.CreateAnd(Shr, Temp2, "and29");
502 } else {
503 Value *Conv28 = Builder.CreateTrunc(Shr, Builder.getIntNTy(32));
504 And29 = Builder.CreateAnd(
505 Conv28, ConstantInt::getSigned(Builder.getIntNTy(32), 0x80000000));
506 }
507 unsigned TempMod = FPMantissaWidth % 32;
508 Value *And34 = nullptr;
509 Value *Shl30 = nullptr;
510 if (FloatWidth > 80) {
511 TempMod += 32;
512 Value *Add = Builder.CreateShl(AAddr1Off32, Builder.getIntN(64, TempMod));
513 Shl30 = Builder.CreateAdd(
514 Add,
515 Builder.getIntN(64, ((1ull << (62ull - TempMod)) - 1ull) << TempMod));
516 And34 = Builder.CreateZExt(Shl30, Builder.getIntNTy(128));
517 } else {
518 Value *Add = Builder.CreateShl(E0, Builder.getIntN(32, TempMod));
519 Shl30 = Builder.CreateAdd(
520 Add, Builder.getIntN(32, ((1 << (30 - TempMod)) - 1) << TempMod));
521 And34 = Builder.CreateAnd(FloatWidth > 32 ? AAddr1Off32 : AAddr1Off0,
522 Builder.getIntN(32, (1 << TempMod) - 1));
523 }
524 Value *Or35 = nullptr;
525 if (FloatWidth > 80) {
526 Value *And29Trunc = Builder.CreateTrunc(And29, Builder.getIntNTy(128));
527 Value *Or31 = Builder.CreateOr(And29Trunc, And34);
528 Value *Or34 = Builder.CreateShl(Or31, Builder.getIntN(128, 64));
529 Value *Temp3 = Builder.CreateShl(Builder.getIntN(128, 1),
530 Builder.getIntN(128, FPMantissaWidth));
531 Value *Temp4 = Builder.CreateSub(Temp3, Builder.getIntN(128, 1));
532 Value *A6 = Builder.CreateAnd(AAddr1Off0, Temp4);
533 Or35 = Builder.CreateOr(Or34, A6);
534 } else {
535 Value *Or31 = Builder.CreateOr(And34, And29);
536 Or35 = Builder.CreateOr(IsSigned ? Or31 : And34, Shl30);
537 }
538 Value *A4 = nullptr;
539 if (IToFP->getType()->isDoubleTy()) {
540 Value *ZExt1 = Builder.CreateZExt(Or35, Builder.getIntNTy(FloatWidth));
541 Value *Shl1 = Builder.CreateShl(ZExt1, Builder.getIntN(FloatWidth, 32));
542 Value *And1 =
543 Builder.CreateAnd(AAddr1Off0, Builder.getIntN(FloatWidth, 0xFFFFFFFF));
544 Value *Or1 = Builder.CreateOr(Shl1, And1);
545 A4 = Builder.CreateBitCast(Or1, IToFP->getType());
546 } else if (IToFP->getType()->isX86_FP80Ty()) {
547 Value *A40 =
548 Builder.CreateBitCast(Or35, Type::getFP128Ty(Builder.getContext()));
549 A4 = Builder.CreateFPTrunc(A40, IToFP->getType());
550 } else if (IToFP->getType()->isHalfTy()) {
551 // Deal with "half" situation. This is a workaround since we don't have
552 // floattihf.c currently as referring.
553 Value *A40 =
554 Builder.CreateBitCast(Or35, Type::getFloatTy(Builder.getContext()));
555 A4 = Builder.CreateFPTrunc(A40, IToFP->getType());
556 } else // float type
557 A4 = Builder.CreateBitCast(Or35, IToFP->getType());
558 Builder.CreateBr(End);
559
560 // return:
561 Builder.SetInsertPoint(End, End->begin());
562 PHINode *Retval0 = Builder.CreatePHI(IToFP->getType(), 2);
563 Retval0->addIncoming(A4, IfEnd26);
564 Retval0->addIncoming(ConstantFP::getZero(IToFP->getType(), false), Entry);
565
566 IToFP->replaceAllUsesWith(Retval0);
567 IToFP->dropAllReferences();
568 IToFP->eraseFromParent();
569}
570
571static bool runImpl(Function &F, const TargetLowering &TLI) {
573 bool Modified = false;
574
575 unsigned MaxLegalFpConvertBitWidth =
578 MaxLegalFpConvertBitWidth = ExpandFpConvertBits;
579
580 if (MaxLegalFpConvertBitWidth >= llvm::IntegerType::MAX_INT_BITS)
581 return false;
582
583 for (auto &I : instructions(F)) {
584 switch (I.getOpcode()) {
585 case Instruction::FPToUI:
586 case Instruction::FPToSI: {
587 // TODO: This pass doesn't handle vectors.
588 if (I.getOperand(0)->getType()->isVectorTy())
589 continue;
590
591 auto *IntTy = dyn_cast<IntegerType>(I.getType());
592 if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth)
593 continue;
594
595 Replace.push_back(&I);
596 Modified = true;
597 break;
598 }
599 case Instruction::UIToFP:
600 case Instruction::SIToFP: {
601 // TODO: This pass doesn't handle vectors.
602 if (I.getOperand(0)->getType()->isVectorTy())
603 continue;
604
605 auto *IntTy = dyn_cast<IntegerType>(I.getOperand(0)->getType());
606 if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth)
607 continue;
608
609 Replace.push_back(&I);
610 Modified = true;
611 break;
612 }
613 default:
614 break;
615 }
616 }
617
618 if (Replace.empty())
619 return false;
620
621 while (!Replace.empty()) {
622 Instruction *I = Replace.pop_back_val();
623 if (I->getOpcode() == Instruction::FPToUI ||
624 I->getOpcode() == Instruction::FPToSI) {
625 expandFPToI(I);
626 } else {
627 expandIToFP(I);
628 }
629 }
630
631 return Modified;
632}
633
634namespace {
635class ExpandLargeFpConvertLegacyPass : public FunctionPass {
636public:
637 static char ID;
638
639 ExpandLargeFpConvertLegacyPass() : FunctionPass(ID) {
642 }
643
644 bool runOnFunction(Function &F) override {
645 auto *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
646 auto *TLI = TM->getSubtargetImpl(F)->getTargetLowering();
647 return runImpl(F, *TLI);
648 }
649
650 void getAnalysisUsage(AnalysisUsage &AU) const override {
654 }
655};
656} // namespace
657
660 const TargetSubtargetInfo *STI = TM->getSubtargetImpl(F);
663}
664
665char ExpandLargeFpConvertLegacyPass::ID = 0;
666INITIALIZE_PASS_BEGIN(ExpandLargeFpConvertLegacyPass, "expand-large-fp-convert",
667 "Expand large fp convert", false, false)
668INITIALIZE_PASS_END(ExpandLargeFpConvertLegacyPass, "expand-large-fp-convert",
669 "Expand large fp convert", false, false)
670
672 return new ExpandLargeFpConvertLegacyPass();
673}
Expand Atomic instructions
bool End
Definition: ELF_riscv.cpp:480
static bool runImpl(Function &F, const TargetLowering &TLI)
static void expandIToFP(Instruction *IToFP)
Generate code to convert a fp number to integer, replacing S(U)IToFP with the generated code.
static void expandFPToI(Instruction *FPToI)
Generate code to convert a fp number to integer, replacing FPToS(U)I with the generated code.
expand large fp convert
static cl::opt< unsigned > ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden, cl::init(llvm::IntegerType::MAX_INT_BITS), cl::desc("fp convert instructions on integers with " "more than <N> bits are expanded."))
static bool runImpl(Function &F, const TargetLowering &TLI)
static Expected< BitVector > expand(StringRef S, StringRef Original)
Definition: GlobPattern.cpp:21
This is the interface for a simple mod/ref and alias analysis over globals.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
FunctionAnalysisManager FAM
const char LLVMTargetMachineRef TM
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file contains some functions that are useful when dealing with strings.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
BinaryOperator * Mul
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:348
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:198
static Constant * getZero(Type *Ty, bool Negative=false)
Definition: Constants.cpp:1037
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.h:123
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
Legacy wrapper pass to provide the GlobalsAAResult object.
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2235
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2001
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:533
Value * CreateICmpSGT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2243
Value * CreateFPTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2079
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:460
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1110
Value * CreateFPToUI(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2051
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:175
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2017
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1431
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:520
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:174
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:525
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2223
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2375
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition: IRBuilder.h:1137
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2219
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1338
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2105
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:491
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1114
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1410
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2005
LLVMContext & getContext() const
Definition: IRBuilder.h:176
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1469
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1321
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1491
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition: IRBuilder.h:1108
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2390
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1450
Value * CreateFPExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2088
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1513
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1355
Value * CreateFPToSI(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2058
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2644
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:252
Class to represent integer types.
Definition: DerivedTypes.h:40
@ MAX_INT_BITS
Maximum number of bits that can be specified.
Definition: DerivedTypes.h:52
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
bool empty() const
Definition: SmallVector.h:94
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
Multiway switch.
unsigned getMaxLargeFPConvertBitWidthSupported() const
Returns the size in bits of the maximum larget fp convert the backend supports.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Target-Independent Code Generator Pass Configuration Options.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetLowering * getTargetLowering() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
unsigned getIntegerBitWidth() const
bool isX86_FP80Ty() const
Return true if this is x86 long double.
Definition: Type.h:160
static Type * getFP128Ty(LLVMContext &C)
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:143
int getFPMantissaWidth() const
Return the width of the mantissa of this type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:157
static Type * getFloatTy(LLVMContext &C)
void dropAllReferences()
Drop all references to operands.
Definition: User.h:299
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1459
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:361
FunctionPass * createExpandLargeFpConvertPass()
void initializeExpandLargeFpConvertLegacyPassPass(PassRegistry &)
@ Or
Bitwise or logical OR of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191