LLVM 18.0.0git
AMDGPUInstCombineIntrinsic.cpp
Go to the documentation of this file.
1//===- AMDGPInstCombineIntrinsic.cpp - AMDGPU specific InstCombine pass ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// \file
10// This file implements a TargetTransformInfo analysis pass specific to the
11// AMDGPU target machine. It uses the target's detailed information to provide
12// more precise answers to certain TTI queries, while letting the target
13// independent and default TTI implementations handle the rest.
14//
15//===----------------------------------------------------------------------===//
16
17#include "AMDGPUInstrInfo.h"
19#include "GCNSubtarget.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
23#include <optional>
24
25using namespace llvm;
26using namespace llvm::PatternMatch;
27
28#define DEBUG_TYPE "AMDGPUtti"
29
30namespace {
31
32struct AMDGPUImageDMaskIntrinsic {
33 unsigned Intr;
34};
35
36#define GET_AMDGPUImageDMaskIntrinsicTable_IMPL
37#include "InstCombineTables.inc"
38
39} // end anonymous namespace
40
41// Constant fold llvm.amdgcn.fmed3 intrinsics for standard inputs.
42//
43// A single NaN input is folded to minnum, so we rely on that folding for
44// handling NaNs.
45static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1,
46 const APFloat &Src2) {
47 APFloat Max3 = maxnum(maxnum(Src0, Src1), Src2);
48
49 APFloat::cmpResult Cmp0 = Max3.compare(Src0);
50 assert(Cmp0 != APFloat::cmpUnordered && "nans handled separately");
51 if (Cmp0 == APFloat::cmpEqual)
52 return maxnum(Src1, Src2);
53
54 APFloat::cmpResult Cmp1 = Max3.compare(Src1);
55 assert(Cmp1 != APFloat::cmpUnordered && "nans handled separately");
56 if (Cmp1 == APFloat::cmpEqual)
57 return maxnum(Src0, Src2);
58
59 return maxnum(Src0, Src1);
60}
61
62// Check if a value can be converted to a 16-bit value without losing
63// precision.
64// The value is expected to be either a float (IsFloat = true) or an unsigned
65// integer (IsFloat = false).
66static bool canSafelyConvertTo16Bit(Value &V, bool IsFloat) {
67 Type *VTy = V.getType();
68 if (VTy->isHalfTy() || VTy->isIntegerTy(16)) {
69 // The value is already 16-bit, so we don't want to convert to 16-bit again!
70 return false;
71 }
72 if (IsFloat) {
73 if (ConstantFP *ConstFloat = dyn_cast<ConstantFP>(&V)) {
74 // We need to check that if we cast the index down to a half, we do not
75 // lose precision.
76 APFloat FloatValue(ConstFloat->getValueAPF());
77 bool LosesInfo = true;
78 FloatValue.convert(APFloat::IEEEhalf(), APFloat::rmTowardZero,
79 &LosesInfo);
80 return !LosesInfo;
81 }
82 } else {
83 if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(&V)) {
84 // We need to check that if we cast the index down to an i16, we do not
85 // lose precision.
86 APInt IntValue(ConstInt->getValue());
87 return IntValue.getActiveBits() <= 16;
88 }
89 }
90
91 Value *CastSrc;
92 bool IsExt = IsFloat ? match(&V, m_FPExt(PatternMatch::m_Value(CastSrc)))
93 : match(&V, m_ZExt(PatternMatch::m_Value(CastSrc)));
94 if (IsExt) {
95 Type *CastSrcTy = CastSrc->getType();
96 if (CastSrcTy->isHalfTy() || CastSrcTy->isIntegerTy(16))
97 return true;
98 }
99
100 return false;
101}
102
103// Convert a value to 16-bit.
105 Type *VTy = V.getType();
106 if (isa<FPExtInst>(&V) || isa<SExtInst>(&V) || isa<ZExtInst>(&V))
107 return cast<Instruction>(&V)->getOperand(0);
108 if (VTy->isIntegerTy())
109 return Builder.CreateIntCast(&V, Type::getInt16Ty(V.getContext()), false);
110 if (VTy->isFloatingPointTy())
111 return Builder.CreateFPCast(&V, Type::getHalfTy(V.getContext()));
112
113 llvm_unreachable("Should never be called!");
114}
115
116/// Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with
117/// modified arguments (based on OldIntr) and replaces InstToReplace with
118/// this newly created intrinsic call.
119static std::optional<Instruction *> modifyIntrinsicCall(
120 IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr,
121 InstCombiner &IC,
122 std::function<void(SmallVectorImpl<Value *> &, SmallVectorImpl<Type *> &)>
123 Func) {
126 return std::nullopt;
127
128 SmallVector<Value *, 8> Args(OldIntr.args());
129
130 // Modify arguments and types
131 Func(Args, ArgTys);
132
133 Function *I = Intrinsic::getDeclaration(OldIntr.getModule(), NewIntr, ArgTys);
134
135 CallInst *NewCall = IC.Builder.CreateCall(I, Args);
136 NewCall->takeName(&OldIntr);
137 NewCall->copyMetadata(OldIntr);
138 if (isa<FPMathOperator>(NewCall))
139 NewCall->copyFastMathFlags(&OldIntr);
140
141 // Erase and replace uses
142 if (!InstToReplace.getType()->isVoidTy())
143 IC.replaceInstUsesWith(InstToReplace, NewCall);
144
145 bool RemoveOldIntr = &OldIntr != &InstToReplace;
146
147 auto RetValue = IC.eraseInstFromFunction(InstToReplace);
148 if (RemoveOldIntr)
149 IC.eraseInstFromFunction(OldIntr);
150
151 return RetValue;
152}
153
154static std::optional<Instruction *>
156 const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr,
157 IntrinsicInst &II, InstCombiner &IC) {
158 // Optimize _L to _LZ when _L is zero
159 if (const auto *LZMappingInfo =
161 if (auto *ConstantLod =
162 dyn_cast<ConstantFP>(II.getOperand(ImageDimIntr->LodIndex))) {
163 if (ConstantLod->isZero() || ConstantLod->isNegative()) {
164 const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
166 ImageDimIntr->Dim);
167 return modifyIntrinsicCall(
168 II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
169 Args.erase(Args.begin() + ImageDimIntr->LodIndex);
170 });
171 }
172 }
173 }
174
175 // Optimize _mip away, when 'lod' is zero
176 if (const auto *MIPMappingInfo =
178 if (auto *ConstantMip =
179 dyn_cast<ConstantInt>(II.getOperand(ImageDimIntr->MipIndex))) {
180 if (ConstantMip->isZero()) {
181 const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
182 AMDGPU::getImageDimIntrinsicByBaseOpcode(MIPMappingInfo->NONMIP,
183 ImageDimIntr->Dim);
184 return modifyIntrinsicCall(
185 II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
186 Args.erase(Args.begin() + ImageDimIntr->MipIndex);
187 });
188 }
189 }
190 }
191
192 // Optimize _bias away when 'bias' is zero
193 if (const auto *BiasMappingInfo =
195 if (auto *ConstantBias =
196 dyn_cast<ConstantFP>(II.getOperand(ImageDimIntr->BiasIndex))) {
197 if (ConstantBias->isZero()) {
198 const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
199 AMDGPU::getImageDimIntrinsicByBaseOpcode(BiasMappingInfo->NoBias,
200 ImageDimIntr->Dim);
201 return modifyIntrinsicCall(
202 II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
203 Args.erase(Args.begin() + ImageDimIntr->BiasIndex);
204 ArgTys.erase(ArgTys.begin() + ImageDimIntr->BiasTyArg);
205 });
206 }
207 }
208 }
209
210 // Optimize _offset away when 'offset' is zero
211 if (const auto *OffsetMappingInfo =
213 if (auto *ConstantOffset =
214 dyn_cast<ConstantInt>(II.getOperand(ImageDimIntr->OffsetIndex))) {
215 if (ConstantOffset->isZero()) {
216 const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
218 OffsetMappingInfo->NoOffset, ImageDimIntr->Dim);
219 return modifyIntrinsicCall(
220 II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
221 Args.erase(Args.begin() + ImageDimIntr->OffsetIndex);
222 });
223 }
224 }
225 }
226
227 // Try to use D16
228 if (ST->hasD16Images()) {
229
230 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
232
233 if (BaseOpcode->HasD16) {
234
235 // If the only use of image intrinsic is a fptrunc (with conversion to
236 // half) then both fptrunc and image intrinsic will be replaced with image
237 // intrinsic with D16 flag.
238 if (II.hasOneUse()) {
239 Instruction *User = II.user_back();
240
241 if (User->getOpcode() == Instruction::FPTrunc &&
243
244 return modifyIntrinsicCall(II, *User, ImageDimIntr->Intr, IC,
245 [&](auto &Args, auto &ArgTys) {
246 // Change return type of image intrinsic.
247 // Set it to return type of fptrunc.
248 ArgTys[0] = User->getType();
249 });
250 }
251 }
252 }
253 }
254
255 // Try to use A16 or G16
256 if (!ST->hasA16() && !ST->hasG16())
257 return std::nullopt;
258
259 // Address is interpreted as float if the instruction has a sampler or as
260 // unsigned int if there is no sampler.
261 bool HasSampler =
263 bool FloatCoord = false;
264 // true means derivatives can be converted to 16 bit, coordinates not
265 bool OnlyDerivatives = false;
266
267 for (unsigned OperandIndex = ImageDimIntr->GradientStart;
268 OperandIndex < ImageDimIntr->VAddrEnd; OperandIndex++) {
269 Value *Coord = II.getOperand(OperandIndex);
270 // If the values are not derived from 16-bit values, we cannot optimize.
271 if (!canSafelyConvertTo16Bit(*Coord, HasSampler)) {
272 if (OperandIndex < ImageDimIntr->CoordStart ||
273 ImageDimIntr->GradientStart == ImageDimIntr->CoordStart) {
274 return std::nullopt;
275 }
276 // All gradients can be converted, so convert only them
277 OnlyDerivatives = true;
278 break;
279 }
280
281 assert(OperandIndex == ImageDimIntr->GradientStart ||
282 FloatCoord == Coord->getType()->isFloatingPointTy());
283 FloatCoord = Coord->getType()->isFloatingPointTy();
284 }
285
286 if (!OnlyDerivatives && !ST->hasA16())
287 OnlyDerivatives = true; // Only supports G16
288
289 // Check if there is a bias parameter and if it can be converted to f16
290 if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) {
291 Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
292 assert(HasSampler &&
293 "Only image instructions with a sampler can have a bias");
294 if (!canSafelyConvertTo16Bit(*Bias, HasSampler))
295 OnlyDerivatives = true;
296 }
297
298 if (OnlyDerivatives && (!ST->hasG16() || ImageDimIntr->GradientStart ==
299 ImageDimIntr->CoordStart))
300 return std::nullopt;
301
302 Type *CoordType = FloatCoord ? Type::getHalfTy(II.getContext())
304
305 return modifyIntrinsicCall(
306 II, II, II.getIntrinsicID(), IC, [&](auto &Args, auto &ArgTys) {
307 ArgTys[ImageDimIntr->GradientTyArg] = CoordType;
308 if (!OnlyDerivatives) {
309 ArgTys[ImageDimIntr->CoordTyArg] = CoordType;
310
311 // Change the bias type
312 if (ImageDimIntr->NumBiasArgs != 0)
313 ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext());
314 }
315
316 unsigned EndIndex =
317 OnlyDerivatives ? ImageDimIntr->CoordStart : ImageDimIntr->VAddrEnd;
318 for (unsigned OperandIndex = ImageDimIntr->GradientStart;
319 OperandIndex < EndIndex; OperandIndex++) {
320 Args[OperandIndex] =
321 convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder);
322 }
323
324 // Convert the bias
325 if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) {
326 Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
327 Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder);
328 }
329 });
330}
331
333 const Value *Op0, const Value *Op1,
334 InstCombiner &IC) const {
335 // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
336 // infinity, gives +0.0. If we can prove we don't have one of the special
337 // cases then we can use a normal multiply instead.
338 // TODO: Create and use isKnownFiniteNonZero instead of just matching
339 // constants here.
342 // One operand is not zero or infinity or NaN.
343 return true;
344 }
345
346 auto *TLI = &IC.getTargetLibraryInfo();
347 if (isKnownNeverInfOrNaN(Op0, IC.getDataLayout(), TLI, 0,
348 &IC.getAssumptionCache(), &I,
349 &IC.getDominatorTree()) &&
350 isKnownNeverInfOrNaN(Op1, IC.getDataLayout(), TLI, 0,
351 &IC.getAssumptionCache(), &I,
352 &IC.getDominatorTree())) {
353 // Neither operand is infinity or NaN.
354 return true;
355 }
356 return false;
357}
358
359/// Match an fpext from half to float, or a constant we can convert.
360static bool matchFPExtFromF16(Value *Arg, Value *&FPExtSrc) {
361 if (match(Arg, m_OneUse(m_FPExt(m_Value(FPExtSrc)))))
362 return FPExtSrc->getType()->isHalfTy();
363
364 ConstantFP *CFP;
365 if (match(Arg, m_ConstantFP(CFP))) {
366 bool LosesInfo;
367 APFloat Val(CFP->getValueAPF());
369 if (LosesInfo)
370 return false;
371
372 FPExtSrc = ConstantFP::get(Type::getHalfTy(Arg->getContext()), Val);
373 return true;
374 }
375
376 return false;
377}
378
379// Trim all zero components from the end of the vector \p UseV and return
380// an appropriate bitset with known elements.
382 Instruction *I) {
383 auto *VTy = cast<FixedVectorType>(UseV->getType());
384 unsigned VWidth = VTy->getNumElements();
385 APInt DemandedElts = APInt::getAllOnes(VWidth);
386
387 for (int i = VWidth - 1; i > 0; --i) {
388 auto *Elt = findScalarElement(UseV, i);
389 if (!Elt)
390 break;
391
392 if (auto *ConstElt = dyn_cast<Constant>(Elt)) {
393 if (!ConstElt->isNullValue() && !isa<UndefValue>(Elt))
394 break;
395 } else {
396 break;
397 }
398
399 DemandedElts.clearBit(i);
400 }
401
402 return DemandedElts;
403}
404
406 IntrinsicInst &II,
407 APInt DemandedElts,
408 int DMaskIdx = -1,
409 bool IsLoad = true);
410
411/// Return true if it's legal to contract llvm.amdgcn.rcp(llvm.sqrt)
412static bool canContractSqrtToRsq(const FPMathOperator *SqrtOp) {
413 return (SqrtOp->getType()->isFloatTy() &&
414 (SqrtOp->hasApproxFunc() || SqrtOp->getFPAccuracy() >= 1.0f)) ||
415 SqrtOp->getType()->isHalfTy();
416}
417
418std::optional<Instruction *>
420 Intrinsic::ID IID = II.getIntrinsicID();
421 switch (IID) {
422 case Intrinsic::amdgcn_rcp: {
423 Value *Src = II.getArgOperand(0);
424
425 // TODO: Move to ConstantFolding/InstSimplify?
426 if (isa<UndefValue>(Src)) {
427 Type *Ty = II.getType();
428 auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
429 return IC.replaceInstUsesWith(II, QNaN);
430 }
431
432 if (II.isStrictFP())
433 break;
434
435 if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
436 const APFloat &ArgVal = C->getValueAPF();
437 APFloat Val(ArgVal.getSemantics(), 1);
439
440 // This is more precise than the instruction may give.
441 //
442 // TODO: The instruction always flushes denormal results (except for f16),
443 // should this also?
444 return IC.replaceInstUsesWith(II, ConstantFP::get(II.getContext(), Val));
445 }
446
447 FastMathFlags FMF = cast<FPMathOperator>(II).getFastMathFlags();
448 if (!FMF.allowContract())
449 break;
450 auto *SrcCI = dyn_cast<IntrinsicInst>(Src);
451 if (!SrcCI)
452 break;
453
454 auto IID = SrcCI->getIntrinsicID();
455 // llvm.amdgcn.rcp(llvm.amdgcn.sqrt(x)) -> llvm.amdgcn.rsq(x) if contractable
456 //
457 // llvm.amdgcn.rcp(llvm.sqrt(x)) -> llvm.amdgcn.rsq(x) if contractable and
458 // relaxed.
459 if (IID == Intrinsic::amdgcn_sqrt || IID == Intrinsic::sqrt) {
460 const FPMathOperator *SqrtOp = cast<FPMathOperator>(SrcCI);
461 FastMathFlags InnerFMF = SqrtOp->getFastMathFlags();
462 if (!InnerFMF.allowContract() || !SrcCI->hasOneUse())
463 break;
464
465 if (IID == Intrinsic::sqrt && !canContractSqrtToRsq(SqrtOp))
466 break;
467
469 SrcCI->getModule(), Intrinsic::amdgcn_rsq, {SrcCI->getType()});
470
471 InnerFMF |= FMF;
472 II.setFastMathFlags(InnerFMF);
473
474 II.setCalledFunction(NewDecl);
475 return IC.replaceOperand(II, 0, SrcCI->getArgOperand(0));
476 }
477
478 break;
479 }
480 case Intrinsic::amdgcn_sqrt:
481 case Intrinsic::amdgcn_rsq: {
482 Value *Src = II.getArgOperand(0);
483
484 // TODO: Move to ConstantFolding/InstSimplify?
485 if (isa<UndefValue>(Src)) {
486 Type *Ty = II.getType();
487 auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
488 return IC.replaceInstUsesWith(II, QNaN);
489 }
490
491 // f16 amdgcn.sqrt is identical to regular sqrt.
492 if (IID == Intrinsic::amdgcn_sqrt && Src->getType()->isHalfTy()) {
494 II.getModule(), Intrinsic::sqrt, {II.getType()});
495 II.setCalledFunction(NewDecl);
496 return &II;
497 }
498
499 break;
500 }
501 case Intrinsic::amdgcn_log:
502 case Intrinsic::amdgcn_exp2: {
503 const bool IsLog = IID == Intrinsic::amdgcn_log;
504 const bool IsExp = IID == Intrinsic::amdgcn_exp2;
505 Value *Src = II.getArgOperand(0);
506 Type *Ty = II.getType();
507
508 if (isa<PoisonValue>(Src))
509 return IC.replaceInstUsesWith(II, Src);
510
511 if (IC.getSimplifyQuery().isUndefValue(Src))
512 return IC.replaceInstUsesWith(II, ConstantFP::getNaN(Ty));
513
514 if (ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
515 if (C->isInfinity()) {
516 // exp2(+inf) -> +inf
517 // log2(+inf) -> +inf
518 if (!C->isNegative())
519 return IC.replaceInstUsesWith(II, C);
520
521 // exp2(-inf) -> 0
522 if (IsExp && C->isNegative())
523 return IC.replaceInstUsesWith(II, ConstantFP::getZero(Ty));
524 }
525
526 if (II.isStrictFP())
527 break;
528
529 if (C->isNaN()) {
530 Constant *Quieted = ConstantFP::get(Ty, C->getValue().makeQuiet());
531 return IC.replaceInstUsesWith(II, Quieted);
532 }
533
534 // f32 instruction doesn't handle denormals, f16 does.
535 if (C->isZero() || (C->getValue().isDenormal() && Ty->isFloatTy())) {
536 Constant *FoldedValue = IsLog ? ConstantFP::getInfinity(Ty, true)
537 : ConstantFP::get(Ty, 1.0);
538 return IC.replaceInstUsesWith(II, FoldedValue);
539 }
540
541 if (IsLog && C->isNegative())
542 return IC.replaceInstUsesWith(II, ConstantFP::getNaN(Ty));
543
544 // TODO: Full constant folding matching hardware behavior.
545 }
546
547 break;
548 }
549 case Intrinsic::amdgcn_frexp_mant:
550 case Intrinsic::amdgcn_frexp_exp: {
551 Value *Src = II.getArgOperand(0);
552 if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
553 int Exp;
554 APFloat Significand =
555 frexp(C->getValueAPF(), Exp, APFloat::rmNearestTiesToEven);
556
557 if (IID == Intrinsic::amdgcn_frexp_mant) {
558 return IC.replaceInstUsesWith(
559 II, ConstantFP::get(II.getContext(), Significand));
560 }
561
562 // Match instruction special case behavior.
563 if (Exp == APFloat::IEK_NaN || Exp == APFloat::IEK_Inf)
564 Exp = 0;
565
566 return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), Exp));
567 }
568
569 if (isa<UndefValue>(Src)) {
570 return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
571 }
572
573 break;
574 }
575 case Intrinsic::amdgcn_class: {
576 Value *Src0 = II.getArgOperand(0);
577 Value *Src1 = II.getArgOperand(1);
578 const ConstantInt *CMask = dyn_cast<ConstantInt>(Src1);
579 if (CMask) {
581 II.getModule(), Intrinsic::is_fpclass, Src0->getType()));
582
583 // Clamp any excess bits, as they're illegal for the generic intrinsic.
585 CMask->getZExtValue() & fcAllFlags));
586 return &II;
587 }
588
589 // Propagate poison.
590 if (isa<PoisonValue>(Src0) || isa<PoisonValue>(Src1))
591 return IC.replaceInstUsesWith(II, PoisonValue::get(II.getType()));
592
593 // llvm.amdgcn.class(_, undef) -> false
594 if (IC.getSimplifyQuery().isUndefValue(Src1))
595 return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), false));
596
597 // llvm.amdgcn.class(undef, mask) -> mask != 0
598 if (IC.getSimplifyQuery().isUndefValue(Src0)) {
599 Value *CmpMask = IC.Builder.CreateICmpNE(
600 Src1, ConstantInt::getNullValue(Src1->getType()));
601 return IC.replaceInstUsesWith(II, CmpMask);
602 }
603 break;
604 }
605 case Intrinsic::amdgcn_cvt_pkrtz: {
606 Value *Src0 = II.getArgOperand(0);
607 Value *Src1 = II.getArgOperand(1);
608 if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
609 if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
610 const fltSemantics &HalfSem =
612 bool LosesInfo;
613 APFloat Val0 = C0->getValueAPF();
614 APFloat Val1 = C1->getValueAPF();
615 Val0.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
616 Val1.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
617
618 Constant *Folded =
620 ConstantFP::get(II.getContext(), Val1)});
621 return IC.replaceInstUsesWith(II, Folded);
622 }
623 }
624
625 if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) {
626 return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
627 }
628
629 break;
630 }
631 case Intrinsic::amdgcn_cvt_pknorm_i16:
632 case Intrinsic::amdgcn_cvt_pknorm_u16:
633 case Intrinsic::amdgcn_cvt_pk_i16:
634 case Intrinsic::amdgcn_cvt_pk_u16: {
635 Value *Src0 = II.getArgOperand(0);
636 Value *Src1 = II.getArgOperand(1);
637
638 if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) {
639 return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
640 }
641
642 break;
643 }
644 case Intrinsic::amdgcn_ubfe:
645 case Intrinsic::amdgcn_sbfe: {
646 // Decompose simple cases into standard shifts.
647 Value *Src = II.getArgOperand(0);
648 if (isa<UndefValue>(Src)) {
649 return IC.replaceInstUsesWith(II, Src);
650 }
651
652 unsigned Width;
653 Type *Ty = II.getType();
654 unsigned IntSize = Ty->getIntegerBitWidth();
655
656 ConstantInt *CWidth = dyn_cast<ConstantInt>(II.getArgOperand(2));
657 if (CWidth) {
658 Width = CWidth->getZExtValue();
659 if ((Width & (IntSize - 1)) == 0) {
661 }
662
663 // Hardware ignores high bits, so remove those.
664 if (Width >= IntSize) {
665 return IC.replaceOperand(
666 II, 2, ConstantInt::get(CWidth->getType(), Width & (IntSize - 1)));
667 }
668 }
669
670 unsigned Offset;
671 ConstantInt *COffset = dyn_cast<ConstantInt>(II.getArgOperand(1));
672 if (COffset) {
673 Offset = COffset->getZExtValue();
674 if (Offset >= IntSize) {
675 return IC.replaceOperand(
676 II, 1,
677 ConstantInt::get(COffset->getType(), Offset & (IntSize - 1)));
678 }
679 }
680
681 bool Signed = IID == Intrinsic::amdgcn_sbfe;
682
683 if (!CWidth || !COffset)
684 break;
685
686 // The case of Width == 0 is handled above, which makes this transformation
687 // safe. If Width == 0, then the ashr and lshr instructions become poison
688 // value since the shift amount would be equal to the bit size.
689 assert(Width != 0);
690
691 // TODO: This allows folding to undef when the hardware has specific
692 // behavior?
693 if (Offset + Width < IntSize) {
694 Value *Shl = IC.Builder.CreateShl(Src, IntSize - Offset - Width);
695 Value *RightShift = Signed ? IC.Builder.CreateAShr(Shl, IntSize - Width)
696 : IC.Builder.CreateLShr(Shl, IntSize - Width);
697 RightShift->takeName(&II);
698 return IC.replaceInstUsesWith(II, RightShift);
699 }
700
701 Value *RightShift = Signed ? IC.Builder.CreateAShr(Src, Offset)
702 : IC.Builder.CreateLShr(Src, Offset);
703
704 RightShift->takeName(&II);
705 return IC.replaceInstUsesWith(II, RightShift);
706 }
707 case Intrinsic::amdgcn_exp:
708 case Intrinsic::amdgcn_exp_row:
709 case Intrinsic::amdgcn_exp_compr: {
710 ConstantInt *En = cast<ConstantInt>(II.getArgOperand(1));
711 unsigned EnBits = En->getZExtValue();
712 if (EnBits == 0xf)
713 break; // All inputs enabled.
714
715 bool IsCompr = IID == Intrinsic::amdgcn_exp_compr;
716 bool Changed = false;
717 for (int I = 0; I < (IsCompr ? 2 : 4); ++I) {
718 if ((!IsCompr && (EnBits & (1 << I)) == 0) ||
719 (IsCompr && ((EnBits & (0x3 << (2 * I))) == 0))) {
720 Value *Src = II.getArgOperand(I + 2);
721 if (!isa<UndefValue>(Src)) {
722 IC.replaceOperand(II, I + 2, UndefValue::get(Src->getType()));
723 Changed = true;
724 }
725 }
726 }
727
728 if (Changed) {
729 return &II;
730 }
731
732 break;
733 }
734 case Intrinsic::amdgcn_fmed3: {
735 // Note this does not preserve proper sNaN behavior if IEEE-mode is enabled
736 // for the shader.
737
738 Value *Src0 = II.getArgOperand(0);
739 Value *Src1 = II.getArgOperand(1);
740 Value *Src2 = II.getArgOperand(2);
741
742 // Checking for NaN before canonicalization provides better fidelity when
743 // mapping other operations onto fmed3 since the order of operands is
744 // unchanged.
745 CallInst *NewCall = nullptr;
746 if (match(Src0, PatternMatch::m_NaN()) || isa<UndefValue>(Src0)) {
747 NewCall = IC.Builder.CreateMinNum(Src1, Src2);
748 } else if (match(Src1, PatternMatch::m_NaN()) || isa<UndefValue>(Src1)) {
749 NewCall = IC.Builder.CreateMinNum(Src0, Src2);
750 } else if (match(Src2, PatternMatch::m_NaN()) || isa<UndefValue>(Src2)) {
751 NewCall = IC.Builder.CreateMaxNum(Src0, Src1);
752 }
753
754 if (NewCall) {
755 NewCall->copyFastMathFlags(&II);
756 NewCall->takeName(&II);
757 return IC.replaceInstUsesWith(II, NewCall);
758 }
759
760 bool Swap = false;
761 // Canonicalize constants to RHS operands.
762 //
763 // fmed3(c0, x, c1) -> fmed3(x, c0, c1)
764 if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
765 std::swap(Src0, Src1);
766 Swap = true;
767 }
768
769 if (isa<Constant>(Src1) && !isa<Constant>(Src2)) {
770 std::swap(Src1, Src2);
771 Swap = true;
772 }
773
774 if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
775 std::swap(Src0, Src1);
776 Swap = true;
777 }
778
779 if (Swap) {
780 II.setArgOperand(0, Src0);
781 II.setArgOperand(1, Src1);
782 II.setArgOperand(2, Src2);
783 return &II;
784 }
785
786 if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
787 if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
788 if (const ConstantFP *C2 = dyn_cast<ConstantFP>(Src2)) {
789 APFloat Result = fmed3AMDGCN(C0->getValueAPF(), C1->getValueAPF(),
790 C2->getValueAPF());
791 return IC.replaceInstUsesWith(
792 II, ConstantFP::get(IC.Builder.getContext(), Result));
793 }
794 }
795 }
796
797 if (!ST->hasMed3_16())
798 break;
799
800 Value *X, *Y, *Z;
801
802 // Repeat floating-point width reduction done for minnum/maxnum.
803 // fmed3((fpext X), (fpext Y), (fpext Z)) -> fpext (fmed3(X, Y, Z))
804 if (matchFPExtFromF16(Src0, X) && matchFPExtFromF16(Src1, Y) &&
805 matchFPExtFromF16(Src2, Z)) {
806 Value *NewCall = IC.Builder.CreateIntrinsic(IID, {X->getType()},
807 {X, Y, Z}, &II, II.getName());
808 return new FPExtInst(NewCall, II.getType());
809 }
810
811 break;
812 }
813 case Intrinsic::amdgcn_icmp:
814 case Intrinsic::amdgcn_fcmp: {
815 const ConstantInt *CC = cast<ConstantInt>(II.getArgOperand(2));
816 // Guard against invalid arguments.
817 int64_t CCVal = CC->getZExtValue();
818 bool IsInteger = IID == Intrinsic::amdgcn_icmp;
819 if ((IsInteger && (CCVal < CmpInst::FIRST_ICMP_PREDICATE ||
821 (!IsInteger && (CCVal < CmpInst::FIRST_FCMP_PREDICATE ||
823 break;
824
825 Value *Src0 = II.getArgOperand(0);
826 Value *Src1 = II.getArgOperand(1);
827
828 if (auto *CSrc0 = dyn_cast<Constant>(Src0)) {
829 if (auto *CSrc1 = dyn_cast<Constant>(Src1)) {
830 Constant *CCmp = ConstantExpr::getCompare(CCVal, CSrc0, CSrc1);
831 if (CCmp->isNullValue()) {
832 return IC.replaceInstUsesWith(
833 II, ConstantExpr::getSExt(CCmp, II.getType()));
834 }
835
836 // The result of V_ICMP/V_FCMP assembly instructions (which this
837 // intrinsic exposes) is one bit per thread, masked with the EXEC
838 // register (which contains the bitmask of live threads). So a
839 // comparison that always returns true is the same as a read of the
840 // EXEC register.
842 II.getModule(), Intrinsic::read_register, II.getType());
843 Metadata *MDArgs[] = {MDString::get(II.getContext(), "exec")};
844 MDNode *MD = MDNode::get(II.getContext(), MDArgs);
845 Value *Args[] = {MetadataAsValue::get(II.getContext(), MD)};
846 CallInst *NewCall = IC.Builder.CreateCall(NewF, Args);
847 NewCall->addFnAttr(Attribute::Convergent);
848 NewCall->takeName(&II);
849 return IC.replaceInstUsesWith(II, NewCall);
850 }
851
852 // Canonicalize constants to RHS.
853 CmpInst::Predicate SwapPred =
855 II.setArgOperand(0, Src1);
856 II.setArgOperand(1, Src0);
857 II.setArgOperand(
858 2, ConstantInt::get(CC->getType(), static_cast<int>(SwapPred)));
859 return &II;
860 }
861
862 if (CCVal != CmpInst::ICMP_EQ && CCVal != CmpInst::ICMP_NE)
863 break;
864
865 // Canonicalize compare eq with true value to compare != 0
866 // llvm.amdgcn.icmp(zext (i1 x), 1, eq)
867 // -> llvm.amdgcn.icmp(zext (i1 x), 0, ne)
868 // llvm.amdgcn.icmp(sext (i1 x), -1, eq)
869 // -> llvm.amdgcn.icmp(sext (i1 x), 0, ne)
870 Value *ExtSrc;
871 if (CCVal == CmpInst::ICMP_EQ &&
872 ((match(Src1, PatternMatch::m_One()) &&
873 match(Src0, m_ZExt(PatternMatch::m_Value(ExtSrc)))) ||
874 (match(Src1, PatternMatch::m_AllOnes()) &&
875 match(Src0, m_SExt(PatternMatch::m_Value(ExtSrc))))) &&
876 ExtSrc->getType()->isIntegerTy(1)) {
878 IC.replaceOperand(II, 2,
880 return &II;
881 }
882
883 CmpInst::Predicate SrcPred;
884 Value *SrcLHS;
885 Value *SrcRHS;
886
887 // Fold compare eq/ne with 0 from a compare result as the predicate to the
888 // intrinsic. The typical use is a wave vote function in the library, which
889 // will be fed from a user code condition compared with 0. Fold in the
890 // redundant compare.
891
892 // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, ne)
893 // -> llvm.amdgcn.[if]cmp(a, b, pred)
894 //
895 // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, eq)
896 // -> llvm.amdgcn.[if]cmp(a, b, inv pred)
897 if (match(Src1, PatternMatch::m_Zero()) &&
899 m_Cmp(SrcPred, PatternMatch::m_Value(SrcLHS),
900 PatternMatch::m_Value(SrcRHS))))) {
901 if (CCVal == CmpInst::ICMP_EQ)
902 SrcPred = CmpInst::getInversePredicate(SrcPred);
903
904 Intrinsic::ID NewIID = CmpInst::isFPPredicate(SrcPred)
905 ? Intrinsic::amdgcn_fcmp
906 : Intrinsic::amdgcn_icmp;
907
908 Type *Ty = SrcLHS->getType();
909 if (auto *CmpType = dyn_cast<IntegerType>(Ty)) {
910 // Promote to next legal integer type.
911 unsigned Width = CmpType->getBitWidth();
912 unsigned NewWidth = Width;
913
914 // Don't do anything for i1 comparisons.
915 if (Width == 1)
916 break;
917
918 if (Width <= 16)
919 NewWidth = 16;
920 else if (Width <= 32)
921 NewWidth = 32;
922 else if (Width <= 64)
923 NewWidth = 64;
924 else if (Width > 64)
925 break; // Can't handle this.
926
927 if (Width != NewWidth) {
928 IntegerType *CmpTy = IC.Builder.getIntNTy(NewWidth);
929 if (CmpInst::isSigned(SrcPred)) {
930 SrcLHS = IC.Builder.CreateSExt(SrcLHS, CmpTy);
931 SrcRHS = IC.Builder.CreateSExt(SrcRHS, CmpTy);
932 } else {
933 SrcLHS = IC.Builder.CreateZExt(SrcLHS, CmpTy);
934 SrcRHS = IC.Builder.CreateZExt(SrcRHS, CmpTy);
935 }
936 }
937 } else if (!Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isHalfTy())
938 break;
939
941 II.getModule(), NewIID, {II.getType(), SrcLHS->getType()});
942 Value *Args[] = {SrcLHS, SrcRHS,
943 ConstantInt::get(CC->getType(), SrcPred)};
944 CallInst *NewCall = IC.Builder.CreateCall(NewF, Args);
945 NewCall->takeName(&II);
946 return IC.replaceInstUsesWith(II, NewCall);
947 }
948
949 break;
950 }
951 case Intrinsic::amdgcn_mbcnt_hi: {
952 // exec_hi is all 0, so this is just a copy.
953 if (ST->isWave32())
954 return IC.replaceInstUsesWith(II, II.getArgOperand(1));
955 break;
956 }
957 case Intrinsic::amdgcn_ballot: {
958 if (auto *Src = dyn_cast<ConstantInt>(II.getArgOperand(0))) {
959 if (Src->isZero()) {
960 // amdgcn.ballot(i1 0) is zero.
962 }
963 }
964 break;
965 }
966 case Intrinsic::amdgcn_wqm_vote: {
967 // wqm_vote is identity when the argument is constant.
968 if (!isa<Constant>(II.getArgOperand(0)))
969 break;
970
971 return IC.replaceInstUsesWith(II, II.getArgOperand(0));
972 }
973 case Intrinsic::amdgcn_kill: {
974 const ConstantInt *C = dyn_cast<ConstantInt>(II.getArgOperand(0));
975 if (!C || !C->getZExtValue())
976 break;
977
978 // amdgcn.kill(i1 1) is a no-op
979 return IC.eraseInstFromFunction(II);
980 }
981 case Intrinsic::amdgcn_update_dpp: {
982 Value *Old = II.getArgOperand(0);
983
984 auto *BC = cast<ConstantInt>(II.getArgOperand(5));
985 auto *RM = cast<ConstantInt>(II.getArgOperand(3));
986 auto *BM = cast<ConstantInt>(II.getArgOperand(4));
987 if (BC->isZeroValue() || RM->getZExtValue() != 0xF ||
988 BM->getZExtValue() != 0xF || isa<UndefValue>(Old))
989 break;
990
991 // If bound_ctrl = 1, row mask = bank mask = 0xf we can omit old value.
992 return IC.replaceOperand(II, 0, UndefValue::get(Old->getType()));
993 }
994 case Intrinsic::amdgcn_permlane16:
995 case Intrinsic::amdgcn_permlanex16: {
996 // Discard vdst_in if it's not going to be read.
997 Value *VDstIn = II.getArgOperand(0);
998 if (isa<UndefValue>(VDstIn))
999 break;
1000
1001 ConstantInt *FetchInvalid = cast<ConstantInt>(II.getArgOperand(4));
1002 ConstantInt *BoundCtrl = cast<ConstantInt>(II.getArgOperand(5));
1003 if (!FetchInvalid->getZExtValue() && !BoundCtrl->getZExtValue())
1004 break;
1005
1006 return IC.replaceOperand(II, 0, UndefValue::get(VDstIn->getType()));
1007 }
1008 case Intrinsic::amdgcn_permlane64:
1009 // A constant value is trivially uniform.
1010 if (Constant *C = dyn_cast<Constant>(II.getArgOperand(0))) {
1011 return IC.replaceInstUsesWith(II, C);
1012 }
1013 break;
1014 case Intrinsic::amdgcn_readfirstlane:
1015 case Intrinsic::amdgcn_readlane: {
1016 // A constant value is trivially uniform.
1017 if (Constant *C = dyn_cast<Constant>(II.getArgOperand(0))) {
1018 return IC.replaceInstUsesWith(II, C);
1019 }
1020
1021 // The rest of these may not be safe if the exec may not be the same between
1022 // the def and use.
1023 Value *Src = II.getArgOperand(0);
1024 Instruction *SrcInst = dyn_cast<Instruction>(Src);
1025 if (SrcInst && SrcInst->getParent() != II.getParent())
1026 break;
1027
1028 // readfirstlane (readfirstlane x) -> readfirstlane x
1029 // readlane (readfirstlane x), y -> readfirstlane x
1030 if (match(Src,
1031 PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readfirstlane>())) {
1032 return IC.replaceInstUsesWith(II, Src);
1033 }
1034
1035 if (IID == Intrinsic::amdgcn_readfirstlane) {
1036 // readfirstlane (readlane x, y) -> readlane x, y
1037 if (match(Src, PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readlane>())) {
1038 return IC.replaceInstUsesWith(II, Src);
1039 }
1040 } else {
1041 // readlane (readlane x, y), y -> readlane x, y
1042 if (match(Src, PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readlane>(
1045 return IC.replaceInstUsesWith(II, Src);
1046 }
1047 }
1048
1049 break;
1050 }
1051 case Intrinsic::amdgcn_fmul_legacy: {
1052 Value *Op0 = II.getArgOperand(0);
1053 Value *Op1 = II.getArgOperand(1);
1054
1055 // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
1056 // infinity, gives +0.0.
1057 // TODO: Move to InstSimplify?
1058 if (match(Op0, PatternMatch::m_AnyZeroFP()) ||
1060 return IC.replaceInstUsesWith(II, ConstantFP::getZero(II.getType()));
1061
1062 // If we can prove we don't have one of the special cases then we can use a
1063 // normal fmul instruction instead.
1064 if (canSimplifyLegacyMulToMul(II, Op0, Op1, IC)) {
1065 auto *FMul = IC.Builder.CreateFMulFMF(Op0, Op1, &II);
1066 FMul->takeName(&II);
1067 return IC.replaceInstUsesWith(II, FMul);
1068 }
1069 break;
1070 }
1071 case Intrinsic::amdgcn_fma_legacy: {
1072 Value *Op0 = II.getArgOperand(0);
1073 Value *Op1 = II.getArgOperand(1);
1074 Value *Op2 = II.getArgOperand(2);
1075
1076 // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
1077 // infinity, gives +0.0.
1078 // TODO: Move to InstSimplify?
1079 if (match(Op0, PatternMatch::m_AnyZeroFP()) ||
1081 // It's tempting to just return Op2 here, but that would give the wrong
1082 // result if Op2 was -0.0.
1083 auto *Zero = ConstantFP::getZero(II.getType());
1084 auto *FAdd = IC.Builder.CreateFAddFMF(Zero, Op2, &II);
1085 FAdd->takeName(&II);
1086 return IC.replaceInstUsesWith(II, FAdd);
1087 }
1088
1089 // If we can prove we don't have one of the special cases then we can use a
1090 // normal fma instead.
1091 if (canSimplifyLegacyMulToMul(II, Op0, Op1, IC)) {
1093 II.getModule(), Intrinsic::fma, II.getType()));
1094 return &II;
1095 }
1096 break;
1097 }
1098 case Intrinsic::amdgcn_is_shared:
1099 case Intrinsic::amdgcn_is_private: {
1100 if (isa<UndefValue>(II.getArgOperand(0)))
1101 return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
1102
1103 if (isa<ConstantPointerNull>(II.getArgOperand(0)))
1105 break;
1106 }
1107 case Intrinsic::amdgcn_buffer_store_format:
1108 case Intrinsic::amdgcn_raw_buffer_store_format:
1109 case Intrinsic::amdgcn_struct_buffer_store_format:
1110 case Intrinsic::amdgcn_raw_tbuffer_store:
1111 case Intrinsic::amdgcn_struct_tbuffer_store:
1112 case Intrinsic::amdgcn_tbuffer_store:
1113 case Intrinsic::amdgcn_image_store_1d:
1114 case Intrinsic::amdgcn_image_store_1darray:
1115 case Intrinsic::amdgcn_image_store_2d:
1116 case Intrinsic::amdgcn_image_store_2darray:
1117 case Intrinsic::amdgcn_image_store_2darraymsaa:
1118 case Intrinsic::amdgcn_image_store_2dmsaa:
1119 case Intrinsic::amdgcn_image_store_3d:
1120 case Intrinsic::amdgcn_image_store_cube:
1121 case Intrinsic::amdgcn_image_store_mip_1d:
1122 case Intrinsic::amdgcn_image_store_mip_1darray:
1123 case Intrinsic::amdgcn_image_store_mip_2d:
1124 case Intrinsic::amdgcn_image_store_mip_2darray:
1125 case Intrinsic::amdgcn_image_store_mip_3d:
1126 case Intrinsic::amdgcn_image_store_mip_cube: {
1127 if (!isa<FixedVectorType>(II.getArgOperand(0)->getType()))
1128 break;
1129
1130 APInt DemandedElts =
1132
1133 int DMaskIdx = getAMDGPUImageDMaskIntrinsic(II.getIntrinsicID()) ? 1 : -1;
1134 if (simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts, DMaskIdx,
1135 false)) {
1136 return IC.eraseInstFromFunction(II);
1137 }
1138
1139 break;
1140 }
1141 }
1142 if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
1144 return simplifyAMDGCNImageIntrinsic(ST, ImageDimIntr, II, IC);
1145 }
1146 return std::nullopt;
1147}
1148
1149/// Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
1150///
1151/// The result of simplifying amdgcn image and buffer store intrinsics is updating
1152/// definitions of the intrinsics vector argument, not Uses of the result like
1153/// image and buffer loads.
1154/// Note: This only supports non-TFE/LWE image intrinsic calls; those have
1155/// struct returns.
1157 IntrinsicInst &II,
1158 APInt DemandedElts,
1159 int DMaskIdx, bool IsLoad) {
1160
1161 auto *IIVTy = cast<FixedVectorType>(IsLoad ? II.getType()
1162 : II.getOperand(0)->getType());
1163 unsigned VWidth = IIVTy->getNumElements();
1164 if (VWidth == 1)
1165 return nullptr;
1166 Type *EltTy = IIVTy->getElementType();
1167
1169 IC.Builder.SetInsertPoint(&II);
1170
1171 // Assume the arguments are unchanged and later override them, if needed.
1172 SmallVector<Value *, 16> Args(II.args());
1173
1174 if (DMaskIdx < 0) {
1175 // Buffer case.
1176
1177 const unsigned ActiveBits = DemandedElts.getActiveBits();
1178 const unsigned UnusedComponentsAtFront = DemandedElts.countr_zero();
1179
1180 // Start assuming the prefix of elements is demanded, but possibly clear
1181 // some other bits if there are trailing zeros (unused components at front)
1182 // and update offset.
1183 DemandedElts = (1 << ActiveBits) - 1;
1184
1185 if (UnusedComponentsAtFront > 0) {
1186 static const unsigned InvalidOffsetIdx = 0xf;
1187
1188 unsigned OffsetIdx;
1189 switch (II.getIntrinsicID()) {
1190 case Intrinsic::amdgcn_raw_buffer_load:
1191 case Intrinsic::amdgcn_raw_ptr_buffer_load:
1192 OffsetIdx = 1;
1193 break;
1194 case Intrinsic::amdgcn_s_buffer_load:
1195 // If resulting type is vec3, there is no point in trimming the
1196 // load with updated offset, as the vec3 would most likely be widened to
1197 // vec4 anyway during lowering.
1198 if (ActiveBits == 4 && UnusedComponentsAtFront == 1)
1199 OffsetIdx = InvalidOffsetIdx;
1200 else
1201 OffsetIdx = 1;
1202 break;
1203 case Intrinsic::amdgcn_struct_buffer_load:
1204 case Intrinsic::amdgcn_struct_ptr_buffer_load:
1205 OffsetIdx = 2;
1206 break;
1207 default:
1208 // TODO: handle tbuffer* intrinsics.
1209 OffsetIdx = InvalidOffsetIdx;
1210 break;
1211 }
1212
1213 if (OffsetIdx != InvalidOffsetIdx) {
1214 // Clear demanded bits and update the offset.
1215 DemandedElts &= ~((1 << UnusedComponentsAtFront) - 1);
1216 auto *Offset = Args[OffsetIdx];
1217 unsigned SingleComponentSizeInBits =
1218 IC.getDataLayout().getTypeSizeInBits(EltTy);
1219 unsigned OffsetAdd =
1220 UnusedComponentsAtFront * SingleComponentSizeInBits / 8;
1221 auto *OffsetAddVal = ConstantInt::get(Offset->getType(), OffsetAdd);
1222 Args[OffsetIdx] = IC.Builder.CreateAdd(Offset, OffsetAddVal);
1223 }
1224 }
1225 } else {
1226 // Image case.
1227
1228 ConstantInt *DMask = cast<ConstantInt>(Args[DMaskIdx]);
1229 unsigned DMaskVal = DMask->getZExtValue() & 0xf;
1230
1231 // Mask off values that are undefined because the dmask doesn't cover them
1232 DemandedElts &= (1 << llvm::popcount(DMaskVal)) - 1;
1233
1234 unsigned NewDMaskVal = 0;
1235 unsigned OrigLdStIdx = 0;
1236 for (unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) {
1237 const unsigned Bit = 1 << SrcIdx;
1238 if (!!(DMaskVal & Bit)) {
1239 if (!!DemandedElts[OrigLdStIdx])
1240 NewDMaskVal |= Bit;
1241 OrigLdStIdx++;
1242 }
1243 }
1244
1245 if (DMaskVal != NewDMaskVal)
1246 Args[DMaskIdx] = ConstantInt::get(DMask->getType(), NewDMaskVal);
1247 }
1248
1249 unsigned NewNumElts = DemandedElts.popcount();
1250 if (!NewNumElts)
1251 return UndefValue::get(IIVTy);
1252
1253 if (NewNumElts >= VWidth && DemandedElts.isMask()) {
1254 if (DMaskIdx >= 0)
1255 II.setArgOperand(DMaskIdx, Args[DMaskIdx]);
1256 return nullptr;
1257 }
1258
1259 // Validate function argument and return types, extracting overloaded types
1260 // along the way.
1261 SmallVector<Type *, 6> OverloadTys;
1263 return nullptr;
1264
1265 Type *NewTy =
1266 (NewNumElts == 1) ? EltTy : FixedVectorType::get(EltTy, NewNumElts);
1267 OverloadTys[0] = NewTy;
1268
1269 if (!IsLoad) {
1270 SmallVector<int, 8> EltMask;
1271 for (unsigned OrigStoreIdx = 0; OrigStoreIdx < VWidth; ++OrigStoreIdx)
1272 if (DemandedElts[OrigStoreIdx])
1273 EltMask.push_back(OrigStoreIdx);
1274
1275 if (NewNumElts == 1)
1276 Args[0] = IC.Builder.CreateExtractElement(II.getOperand(0), EltMask[0]);
1277 else
1278 Args[0] = IC.Builder.CreateShuffleVector(II.getOperand(0), EltMask);
1279 }
1280
1282 II.getModule(), II.getIntrinsicID(), OverloadTys);
1283 CallInst *NewCall = IC.Builder.CreateCall(NewIntrin, Args);
1284 NewCall->takeName(&II);
1285 NewCall->copyMetadata(II);
1286
1287 if (IsLoad) {
1288 if (NewNumElts == 1) {
1289 return IC.Builder.CreateInsertElement(UndefValue::get(IIVTy), NewCall,
1290 DemandedElts.countr_zero());
1291 }
1292
1293 SmallVector<int, 8> EltMask;
1294 unsigned NewLoadIdx = 0;
1295 for (unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) {
1296 if (!!DemandedElts[OrigLoadIdx])
1297 EltMask.push_back(NewLoadIdx++);
1298 else
1299 EltMask.push_back(NewNumElts);
1300 }
1301
1302 auto *Shuffle = IC.Builder.CreateShuffleVector(NewCall, EltMask);
1303
1304 return Shuffle;
1305 }
1306
1307 return NewCall;
1308}
1309
1311 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
1312 APInt &UndefElts2, APInt &UndefElts3,
1313 std::function<void(Instruction *, unsigned, APInt, APInt &)>
1314 SimplifyAndSetOp) const {
1315 switch (II.getIntrinsicID()) {
1316 case Intrinsic::amdgcn_buffer_load:
1317 case Intrinsic::amdgcn_buffer_load_format:
1318 case Intrinsic::amdgcn_raw_buffer_load:
1319 case Intrinsic::amdgcn_raw_ptr_buffer_load:
1320 case Intrinsic::amdgcn_raw_buffer_load_format:
1321 case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
1322 case Intrinsic::amdgcn_raw_tbuffer_load:
1323 case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
1324 case Intrinsic::amdgcn_s_buffer_load:
1325 case Intrinsic::amdgcn_struct_buffer_load:
1326 case Intrinsic::amdgcn_struct_ptr_buffer_load:
1327 case Intrinsic::amdgcn_struct_buffer_load_format:
1328 case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
1329 case Intrinsic::amdgcn_struct_tbuffer_load:
1330 case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
1331 case Intrinsic::amdgcn_tbuffer_load:
1332 return simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts);
1333 default: {
1334 if (getAMDGPUImageDMaskIntrinsic(II.getIntrinsicID())) {
1335 return simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts, 0);
1336 }
1337 break;
1338 }
1339 }
1340 return std::nullopt;
1341}
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
unsigned Intr
static bool canContractSqrtToRsq(const FPMathOperator *SqrtOp)
Return true if it's legal to contract llvm.amdgcn.rcp(llvm.sqrt)
static Value * convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder)
static APInt trimTrailingZerosInVector(InstCombiner &IC, Value *UseV, Instruction *I)
static std::optional< Instruction * > modifyIntrinsicCall(IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr, InstCombiner &IC, std::function< void(SmallVectorImpl< Value * > &, SmallVectorImpl< Type * > &)> Func)
Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with modified arguments (based on ...
static bool matchFPExtFromF16(Value *Arg, Value *&FPExtSrc)
Match an fpext from half to float, or a constant we can convert.
static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1, const APFloat &Src2)
static Value * simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, int DMaskIdx=-1, bool IsLoad=true)
Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
static std::optional< Instruction * > simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr, IntrinsicInst &II, InstCombiner &IC)
static bool canSafelyConvertTo16Bit(Value &V, bool IsFloat)
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
This file a TargetTransformInfo::Concept conforming object specific to the AMDGPU target machine.
assume Assume Builder
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Utilities for dealing with flags related to floating point properties and mode controls.
AMD GCN specific subclass of TargetSubtarget.
This file provides the interface for the instcombine pass implementation.
#define I(x, y, z)
Definition: MD5.cpp:58
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
if(VerifyEach)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
Definition: APFloat.h:986
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition: APFloat.h:1067
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5196
const fltSemantics & getSemantics() const
Definition: APFloat.h:1301
cmpResult compare(const APFloat &RHS) const
Definition: APFloat.h:1248
Class for arbitrary precision integers.
Definition: APInt.h:76
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:212
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1379
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1614
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1457
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1583
bool isMask(unsigned numBits) const
Definition: APInt.h:466
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
Definition: InstrTypes.h:1521
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1412
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
Definition: InstrTypes.h:1882
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1357
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1362
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
Definition: InstrTypes.h:1348
void setCalledOperand(Value *V)
Definition: InstrTypes.h:1448
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
Definition: InstrTypes.h:1451
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:711
@ FIRST_ICMP_PREDICATE
Definition: InstrTypes.h:742
@ FIRST_FCMP_PREDICATE
Definition: InstrTypes.h:729
@ ICMP_EQ
equal
Definition: InstrTypes.h:732
@ ICMP_NE
not equal
Definition: InstrTypes.h:733
bool isSigned() const
Definition: InstrTypes.h:961
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:863
bool isFPPredicate() const
Definition: InstrTypes.h:818
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:825
static Constant * getSExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2089
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:2357
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:260
const APFloat & getValueAPF() const
Definition: Constants.h:296
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:927
static Constant * getInfinity(Type *Ty, bool Negative=false)
Definition: Constants.cpp:1027
static Constant * getZero(Type *Ty, bool Negative=false)
Definition: Constants.cpp:1001
static Constant * getNaN(Type *Ty, bool Negative=false, uint64_t Payload=0)
Definition: Constants.cpp:968
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
Definition: Constants.h:176
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:840
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:145
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1342
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:356
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:76
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:672
This class represents an extension of floating point types.
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition: Operator.h:170
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
Definition: Operator.h:288
bool hasApproxFunc() const
Test if this operation allows approximations of math library functions or intrinsics.
Definition: Operator.h:283
float getFPAccuracy() const
Get the maximum error permitted by this operation in ULPs.
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
bool allowContract() const
Definition: FMF.h:70
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:693
bool hasMed3_16() const
Definition: GCNSubtarget.h:388
bool isWave32() const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
bool canSimplifyLegacyMulToMul(const Instruction &I, const Value *Op0, const Value *Op1, InstCombiner &IC) const
Value * CreateFAddFMF(Value *L, Value *R, Instruction *FMFSource, const Twine &Name="")
Copy fast-math-flags from an instruction rather than using the builder's default FMF.
Definition: IRBuilder.h:1538
CallInst * CreateMinNum(Value *LHS, Value *RHS, const Twine &Name="")
Create call to the minnum intrinsic.
Definition: IRBuilder.h:983
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2431
Value * CreateFMulFMF(Value *L, Value *R, Instruction *FMFSource, const Twine &Name="")
Copy fast-math-flags from an instruction rather than using the builder's default FMF.
Definition: IRBuilder.h:1592
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2419
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:525
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:941
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2001
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1428
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2204
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1997
CallInst * CreateMaxNum(Value *LHS, Value *RHS, const Twine &Name="")
Create call to the maxnum intrinsic.
Definition: IRBuilder.h:993
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1407
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2453
LLVMContext & getContext() const
Definition: IRBuilder.h:176
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1318
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2371
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1447
The core instruction combiner logic.
Definition: InstCombiner.h:46
const DataLayout & getDataLayout() const
Definition: InstCombiner.h:379
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
DominatorTree & getDominatorTree() const
Definition: InstCombiner.h:378
TargetLibraryInfo & getTargetLibraryInfo() const
Definition: InstCombiner.h:377
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Definition: InstCombiner.h:424
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
Definition: InstCombiner.h:448
BuilderTy & Builder
Definition: InstCombiner.h:59
AssumptionCache & getAssumptionCache() const
Definition: InstCombiner.h:376
const SimplifyQuery & getSimplifyQuery() const
Definition: InstCombiner.h:380
void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:71
void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
const BasicBlock * getParent() const
Definition: Instruction.h:90
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:87
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
Definition: DerivedTypes.h:40
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:54
Metadata node.
Definition: Metadata.h:950
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1416
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:499
static MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition: Metadata.cpp:102
Root of the metadata hierarchy.
Definition: Metadata.h:61
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1743
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static Type * getHalfTy(LLVMContext &C)
unsigned getIntegerBitWidth() const
const fltSemantics & getFltSemantics() const
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:154
static IntegerType * getInt16Ty(LLVMContext &C)
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:143
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:157
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:140
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1724
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1069
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:384
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY const MIMGOffsetMappingInfo * getMIMGOffsetMappingInfo(unsigned Offset)
const ImageDimIntrinsicInfo * getImageDimIntrinsicByBaseOpcode(unsigned BaseOpcode, unsigned Dim)
LLVM_READONLY const MIMGMIPMappingInfo * getMIMGMIPMappingInfo(unsigned MIP)
LLVM_READONLY const MIMGBiasMappingInfo * getMIMGBiasMappingInfo(unsigned Bias)
LLVM_READONLY const MIMGLZMappingInfo * getMIMGLZMappingInfo(unsigned L)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool getIntrinsicSignature(Function *F, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
Definition: Function.cpp:1694
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1422
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
Definition: PatternMatch.h:461
CastClass_match< OpTy, Instruction::SExt > m_SExt(const OpTy &Op)
Matches SExt.
CastClass_match< OpTy, Instruction::ZExt > m_ZExt(const OpTy &Op)
Matches ZExt.
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
cstfp_pred_ty< is_any_zero_fp > m_AnyZeroFP()
Match a floating-point negative zero or positive zero.
Definition: PatternMatch.h:672
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:780
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:525
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
class_match< ConstantFP > m_ConstantFP()
Match an arbitrary ConstantFP and ignore it.
Definition: PatternMatch.h:152
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:89
match_combine_or< CastClass_match< OpTy, Instruction::ZExt >, CastClass_match< OpTy, Instruction::SExt > > m_ZExtOrSExt(const OpTy &Op)
CastClass_match< OpTy, Instruction::FPExt > m_FPExt(const OpTy &Op)
cstfp_pred_ty< is_finitenonzero > m_FiniteNonZero()
Match a finite non-zero FP constant.
Definition: PatternMatch.h:660
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
cstfp_pred_ty< is_nan > m_NaN()
Match an arbitrary NaN constant.
Definition: PatternMatch.h:618
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:545
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:440
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:349
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
Definition: APFloat.h:1371
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1404
@ FMul
Product of floats.
@ FAdd
Sum of floats.
bool isKnownNeverInfOrNaN(const Value *V, const DataLayout &DL, const TargetLibraryInfo *TLI, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if the floating-point value can never contain a NaN or infinity.
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
cmpResult
IEEE-754R 5.11: Floating Point Comparison Relations.
Definition: APFloat.h:220
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:234
static const fltSemantics & IEEEhalf() LLVM_READNONE
Definition: APFloat.cpp:247
bool isUndefValue(Value *V) const
If CanUseUndef is true, returns whether V is undef.