LLVM  15.0.0git
AMDGPUInstCombineIntrinsic.cpp
Go to the documentation of this file.
1 //===- AMDGPInstCombineIntrinsic.cpp - AMDGPU specific InstCombine pass ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // \file
10 // This file implements a TargetTransformInfo analysis pass specific to the
11 // AMDGPU target machine. It uses the target's detailed information to provide
12 // more precise answers to certain TTI queries, while letting the target
13 // independent and default TTI implementations handle the rest.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "AMDGPUInstrInfo.h"
19 #include "GCNSubtarget.h"
20 #include "llvm/IR/IntrinsicsAMDGPU.h"
22 
23 using namespace llvm;
24 
25 #define DEBUG_TYPE "AMDGPUtti"
26 
27 namespace {
28 
29 struct AMDGPUImageDMaskIntrinsic {
30  unsigned Intr;
31 };
32 
33 #define GET_AMDGPUImageDMaskIntrinsicTable_IMPL
34 #include "InstCombineTables.inc"
35 
36 } // end anonymous namespace
37 
38 // Constant fold llvm.amdgcn.fmed3 intrinsics for standard inputs.
39 //
40 // A single NaN input is folded to minnum, so we rely on that folding for
41 // handling NaNs.
42 static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1,
43  const APFloat &Src2) {
44  APFloat Max3 = maxnum(maxnum(Src0, Src1), Src2);
45 
46  APFloat::cmpResult Cmp0 = Max3.compare(Src0);
47  assert(Cmp0 != APFloat::cmpUnordered && "nans handled separately");
48  if (Cmp0 == APFloat::cmpEqual)
49  return maxnum(Src1, Src2);
50 
51  APFloat::cmpResult Cmp1 = Max3.compare(Src1);
52  assert(Cmp1 != APFloat::cmpUnordered && "nans handled separately");
53  if (Cmp1 == APFloat::cmpEqual)
54  return maxnum(Src0, Src2);
55 
56  return maxnum(Src0, Src1);
57 }
58 
59 // Check if a value can be converted to a 16-bit value without losing
60 // precision.
61 // The value is expected to be either a float (IsFloat = true) or an unsigned
62 // integer (IsFloat = false).
63 static bool canSafelyConvertTo16Bit(Value &V, bool IsFloat) {
64  Type *VTy = V.getType();
65  if (VTy->isHalfTy() || VTy->isIntegerTy(16)) {
66  // The value is already 16-bit, so we don't want to convert to 16-bit again!
67  return false;
68  }
69  if (IsFloat) {
70  if (ConstantFP *ConstFloat = dyn_cast<ConstantFP>(&V)) {
71  // We need to check that if we cast the index down to a half, we do not
72  // lose precision.
73  APFloat FloatValue(ConstFloat->getValueAPF());
74  bool LosesInfo = true;
76  &LosesInfo);
77  return !LosesInfo;
78  }
79  } else {
80  if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(&V)) {
81  // We need to check that if we cast the index down to an i16, we do not
82  // lose precision.
83  APInt IntValue(ConstInt->getValue());
84  return IntValue.getActiveBits() <= 16;
85  }
86  }
87 
88  Value *CastSrc;
89  bool IsExt = IsFloat ? match(&V, m_FPExt(PatternMatch::m_Value(CastSrc)))
90  : match(&V, m_ZExt(PatternMatch::m_Value(CastSrc)));
91  if (IsExt) {
92  Type *CastSrcTy = CastSrc->getType();
93  if (CastSrcTy->isHalfTy() || CastSrcTy->isIntegerTy(16))
94  return true;
95  }
96 
97  return false;
98 }
99 
100 // Convert a value to 16-bit.
102  Type *VTy = V.getType();
103  if (isa<FPExtInst>(&V) || isa<SExtInst>(&V) || isa<ZExtInst>(&V))
104  return cast<Instruction>(&V)->getOperand(0);
105  if (VTy->isIntegerTy())
106  return Builder.CreateIntCast(&V, Type::getInt16Ty(V.getContext()), false);
107  if (VTy->isFloatingPointTy())
108  return Builder.CreateFPCast(&V, Type::getHalfTy(V.getContext()));
109 
110  llvm_unreachable("Should never be called!");
111 }
112 
113 /// Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with
114 /// modified arguments (based on OldIntr) and replaces InstToReplace with
115 /// this newly created intrinsic call.
117  IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr,
118  InstCombiner &IC,
120  Func) {
121  SmallVector<Type *, 4> ArgTys;
122  if (!Intrinsic::getIntrinsicSignature(OldIntr.getCalledFunction(), ArgTys))
123  return None;
124 
125  SmallVector<Value *, 8> Args(OldIntr.args());
126 
127  // Modify arguments and types
128  Func(Args, ArgTys);
129 
130  Function *I = Intrinsic::getDeclaration(OldIntr.getModule(), NewIntr, ArgTys);
131 
132  CallInst *NewCall = IC.Builder.CreateCall(I, Args);
133  NewCall->takeName(&OldIntr);
134  NewCall->copyMetadata(OldIntr);
135  if (isa<FPMathOperator>(NewCall))
136  NewCall->copyFastMathFlags(&OldIntr);
137 
138  // Erase and replace uses
139  if (!InstToReplace.getType()->isVoidTy())
140  IC.replaceInstUsesWith(InstToReplace, NewCall);
141 
142  bool RemoveOldIntr = &OldIntr != &InstToReplace;
143 
144  auto RetValue = IC.eraseInstFromFunction(InstToReplace);
145  if (RemoveOldIntr)
146  IC.eraseInstFromFunction(OldIntr);
147 
148  return RetValue;
149 }
150 
153  const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr,
154  IntrinsicInst &II, InstCombiner &IC) {
155  // Optimize _L to _LZ when _L is zero
156  if (const auto *LZMappingInfo =
157  AMDGPU::getMIMGLZMappingInfo(ImageDimIntr->BaseOpcode)) {
158  if (auto *ConstantLod =
159  dyn_cast<ConstantFP>(II.getOperand(ImageDimIntr->LodIndex))) {
160  if (ConstantLod->isZero() || ConstantLod->isNegative()) {
161  const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
163  ImageDimIntr->Dim);
164  return modifyIntrinsicCall(
165  II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
166  Args.erase(Args.begin() + ImageDimIntr->LodIndex);
167  });
168  }
169  }
170  }
171 
172  // Optimize _mip away, when 'lod' is zero
173  if (const auto *MIPMappingInfo =
174  AMDGPU::getMIMGMIPMappingInfo(ImageDimIntr->BaseOpcode)) {
175  if (auto *ConstantMip =
176  dyn_cast<ConstantInt>(II.getOperand(ImageDimIntr->MipIndex))) {
177  if (ConstantMip->isZero()) {
178  const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
179  AMDGPU::getImageDimIntrinsicByBaseOpcode(MIPMappingInfo->NONMIP,
180  ImageDimIntr->Dim);
181  return modifyIntrinsicCall(
182  II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
183  Args.erase(Args.begin() + ImageDimIntr->MipIndex);
184  });
185  }
186  }
187  }
188 
189  // Optimize _bias away when 'bias' is zero
190  if (const auto *BiasMappingInfo =
191  AMDGPU::getMIMGBiasMappingInfo(ImageDimIntr->BaseOpcode)) {
192  if (auto *ConstantBias =
193  dyn_cast<ConstantFP>(II.getOperand(ImageDimIntr->BiasIndex))) {
194  if (ConstantBias->isZero()) {
195  const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
196  AMDGPU::getImageDimIntrinsicByBaseOpcode(BiasMappingInfo->NoBias,
197  ImageDimIntr->Dim);
198  return modifyIntrinsicCall(
199  II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
200  Args.erase(Args.begin() + ImageDimIntr->BiasIndex);
201  ArgTys.erase(ArgTys.begin() + ImageDimIntr->BiasTyArg);
202  });
203  }
204  }
205  }
206 
207  // Optimize _offset away when 'offset' is zero
208  if (const auto *OffsetMappingInfo =
209  AMDGPU::getMIMGOffsetMappingInfo(ImageDimIntr->BaseOpcode)) {
210  if (auto *ConstantOffset =
211  dyn_cast<ConstantInt>(II.getOperand(ImageDimIntr->OffsetIndex))) {
212  if (ConstantOffset->isZero()) {
213  const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
215  OffsetMappingInfo->NoOffset, ImageDimIntr->Dim);
216  return modifyIntrinsicCall(
217  II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
218  Args.erase(Args.begin() + ImageDimIntr->OffsetIndex);
219  });
220  }
221  }
222  }
223 
224  // Try to use D16
225  if (ST->hasD16Images()) {
226 
227  const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
228  AMDGPU::getMIMGBaseOpcodeInfo(ImageDimIntr->BaseOpcode);
229 
230  if (BaseOpcode->HasD16) {
231 
232  // If the only use of image intrinsic is a fptrunc (with conversion to
233  // half) then both fptrunc and image intrinsic will be replaced with image
234  // intrinsic with D16 flag.
235  if (II.hasOneUse()) {
236  Instruction *User = II.user_back();
237 
238  if (User->getOpcode() == Instruction::FPTrunc &&
239  User->getType()->getScalarType()->isHalfTy()) {
240 
241  return modifyIntrinsicCall(II, *User, ImageDimIntr->Intr, IC,
242  [&](auto &Args, auto &ArgTys) {
243  // Change return type of image intrinsic.
244  // Set it to return type of fptrunc.
245  ArgTys[0] = User->getType();
246  });
247  }
248  }
249  }
250  }
251 
252  // Try to use A16 or G16
253  if (!ST->hasA16() && !ST->hasG16())
254  return None;
255 
256  // Address is interpreted as float if the instruction has a sampler or as
257  // unsigned int if there is no sampler.
258  bool HasSampler =
259  AMDGPU::getMIMGBaseOpcodeInfo(ImageDimIntr->BaseOpcode)->Sampler;
260  bool FloatCoord = false;
261  // true means derivatives can be converted to 16 bit, coordinates not
262  bool OnlyDerivatives = false;
263 
264  for (unsigned OperandIndex = ImageDimIntr->GradientStart;
265  OperandIndex < ImageDimIntr->VAddrEnd; OperandIndex++) {
266  Value *Coord = II.getOperand(OperandIndex);
267  // If the values are not derived from 16-bit values, we cannot optimize.
268  if (!canSafelyConvertTo16Bit(*Coord, HasSampler)) {
269  if (OperandIndex < ImageDimIntr->CoordStart ||
270  ImageDimIntr->GradientStart == ImageDimIntr->CoordStart) {
271  return None;
272  }
273  // All gradients can be converted, so convert only them
274  OnlyDerivatives = true;
275  break;
276  }
277 
278  assert(OperandIndex == ImageDimIntr->GradientStart ||
279  FloatCoord == Coord->getType()->isFloatingPointTy());
280  FloatCoord = Coord->getType()->isFloatingPointTy();
281  }
282 
283  if (!OnlyDerivatives && !ST->hasA16())
284  OnlyDerivatives = true; // Only supports G16
285 
286  // Check if there is a bias parameter and if it can be converted to f16
287  if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) {
288  Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
289  assert(HasSampler &&
290  "Only image instructions with a sampler can have a bias");
291  if (!canSafelyConvertTo16Bit(*Bias, HasSampler))
292  OnlyDerivatives = true;
293  }
294 
295  if (OnlyDerivatives && (!ST->hasG16() || ImageDimIntr->GradientStart ==
296  ImageDimIntr->CoordStart))
297  return None;
298 
299  Type *CoordType = FloatCoord ? Type::getHalfTy(II.getContext())
301 
302  return modifyIntrinsicCall(
303  II, II, II.getIntrinsicID(), IC, [&](auto &Args, auto &ArgTys) {
304  ArgTys[ImageDimIntr->GradientTyArg] = CoordType;
305  if (!OnlyDerivatives) {
306  ArgTys[ImageDimIntr->CoordTyArg] = CoordType;
307 
308  // Change the bias type
309  if (ImageDimIntr->NumBiasArgs != 0)
310  ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext());
311  }
312 
313  unsigned EndIndex =
314  OnlyDerivatives ? ImageDimIntr->CoordStart : ImageDimIntr->VAddrEnd;
315  for (unsigned OperandIndex = ImageDimIntr->GradientStart;
316  OperandIndex < EndIndex; OperandIndex++) {
317  Args[OperandIndex] =
318  convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder);
319  }
320 
321  // Convert the bias
322  if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) {
323  Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
324  Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder);
325  }
326  });
327 }
328 
329 bool GCNTTIImpl::canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1,
330  InstCombiner &IC) const {
331  // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
332  // infinity, gives +0.0. If we can prove we don't have one of the special
333  // cases then we can use a normal multiply instead.
334  // TODO: Create and use isKnownFiniteNonZero instead of just matching
335  // constants here.
336  if (match(Op0, PatternMatch::m_FiniteNonZero()) ||
338  // One operand is not zero or infinity or NaN.
339  return true;
340  }
341  auto *TLI = &IC.getTargetLibraryInfo();
342  if (isKnownNeverInfinity(Op0, TLI) && isKnownNeverNaN(Op0, TLI) &&
343  isKnownNeverInfinity(Op1, TLI) && isKnownNeverNaN(Op1, TLI)) {
344  // Neither operand is infinity or NaN.
345  return true;
346  }
347  return false;
348 }
349 
352  Intrinsic::ID IID = II.getIntrinsicID();
353  switch (IID) {
354  case Intrinsic::amdgcn_rcp: {
355  Value *Src = II.getArgOperand(0);
356 
357  // TODO: Move to ConstantFolding/InstSimplify?
358  if (isa<UndefValue>(Src)) {
359  Type *Ty = II.getType();
360  auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
361  return IC.replaceInstUsesWith(II, QNaN);
362  }
363 
364  if (II.isStrictFP())
365  break;
366 
367  if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
368  const APFloat &ArgVal = C->getValueAPF();
369  APFloat Val(ArgVal.getSemantics(), 1);
371 
372  // This is more precise than the instruction may give.
373  //
374  // TODO: The instruction always flushes denormal results (except for f16),
375  // should this also?
376  return IC.replaceInstUsesWith(II, ConstantFP::get(II.getContext(), Val));
377  }
378 
379  break;
380  }
381  case Intrinsic::amdgcn_rsq: {
382  Value *Src = II.getArgOperand(0);
383 
384  // TODO: Move to ConstantFolding/InstSimplify?
385  if (isa<UndefValue>(Src)) {
386  Type *Ty = II.getType();
387  auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
388  return IC.replaceInstUsesWith(II, QNaN);
389  }
390 
391  break;
392  }
393  case Intrinsic::amdgcn_frexp_mant:
394  case Intrinsic::amdgcn_frexp_exp: {
395  Value *Src = II.getArgOperand(0);
396  if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
397  int Exp;
398  APFloat Significand =
399  frexp(C->getValueAPF(), Exp, APFloat::rmNearestTiesToEven);
400 
401  if (IID == Intrinsic::amdgcn_frexp_mant) {
402  return IC.replaceInstUsesWith(
403  II, ConstantFP::get(II.getContext(), Significand));
404  }
405 
406  // Match instruction special case behavior.
407  if (Exp == APFloat::IEK_NaN || Exp == APFloat::IEK_Inf)
408  Exp = 0;
409 
410  return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), Exp));
411  }
412 
413  if (isa<UndefValue>(Src)) {
414  return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
415  }
416 
417  break;
418  }
419  case Intrinsic::amdgcn_class: {
420  enum {
421  S_NAN = 1 << 0, // Signaling NaN
422  Q_NAN = 1 << 1, // Quiet NaN
423  N_INFINITY = 1 << 2, // Negative infinity
424  N_NORMAL = 1 << 3, // Negative normal
425  N_SUBNORMAL = 1 << 4, // Negative subnormal
426  N_ZERO = 1 << 5, // Negative zero
427  P_ZERO = 1 << 6, // Positive zero
428  P_SUBNORMAL = 1 << 7, // Positive subnormal
429  P_NORMAL = 1 << 8, // Positive normal
430  P_INFINITY = 1 << 9 // Positive infinity
431  };
432 
433  const uint32_t FullMask = S_NAN | Q_NAN | N_INFINITY | N_NORMAL |
436 
437  Value *Src0 = II.getArgOperand(0);
438  Value *Src1 = II.getArgOperand(1);
439  const ConstantInt *CMask = dyn_cast<ConstantInt>(Src1);
440  if (!CMask) {
441  if (isa<UndefValue>(Src0)) {
442  return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
443  }
444 
445  if (isa<UndefValue>(Src1)) {
446  return IC.replaceInstUsesWith(II,
447  ConstantInt::get(II.getType(), false));
448  }
449  break;
450  }
451 
452  uint32_t Mask = CMask->getZExtValue();
453 
454  // If all tests are made, it doesn't matter what the value is.
455  if ((Mask & FullMask) == FullMask) {
456  return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), true));
457  }
458 
459  if ((Mask & FullMask) == 0) {
460  return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), false));
461  }
462 
463  if (Mask == (S_NAN | Q_NAN)) {
464  // Equivalent of isnan. Replace with standard fcmp.
465  Value *FCmp = IC.Builder.CreateFCmpUNO(Src0, Src0);
466  FCmp->takeName(&II);
467  return IC.replaceInstUsesWith(II, FCmp);
468  }
469 
470  if (Mask == (N_ZERO | P_ZERO)) {
471  // Equivalent of == 0.
472  Value *FCmp =
473  IC.Builder.CreateFCmpOEQ(Src0, ConstantFP::get(Src0->getType(), 0.0));
474 
475  FCmp->takeName(&II);
476  return IC.replaceInstUsesWith(II, FCmp);
477  }
478 
479  // fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other
480  if (((Mask & S_NAN) || (Mask & Q_NAN)) &&
481  isKnownNeverNaN(Src0, &IC.getTargetLibraryInfo())) {
482  return IC.replaceOperand(
483  II, 1, ConstantInt::get(Src1->getType(), Mask & ~(S_NAN | Q_NAN)));
484  }
485 
486  const ConstantFP *CVal = dyn_cast<ConstantFP>(Src0);
487  if (!CVal) {
488  if (isa<UndefValue>(Src0)) {
489  return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
490  }
491 
492  // Clamp mask to used bits
493  if ((Mask & FullMask) != Mask) {
494  CallInst *NewCall = IC.Builder.CreateCall(
495  II.getCalledFunction(),
496  {Src0, ConstantInt::get(Src1->getType(), Mask & FullMask)});
497 
498  NewCall->takeName(&II);
499  return IC.replaceInstUsesWith(II, NewCall);
500  }
501 
502  break;
503  }
504 
505  const APFloat &Val = CVal->getValueAPF();
506 
507  bool Result =
508  ((Mask & S_NAN) && Val.isNaN() && Val.isSignaling()) ||
509  ((Mask & Q_NAN) && Val.isNaN() && !Val.isSignaling()) ||
510  ((Mask & N_INFINITY) && Val.isInfinity() && Val.isNegative()) ||
511  ((Mask & N_NORMAL) && Val.isNormal() && Val.isNegative()) ||
512  ((Mask & N_SUBNORMAL) && Val.isDenormal() && Val.isNegative()) ||
513  ((Mask & N_ZERO) && Val.isZero() && Val.isNegative()) ||
514  ((Mask & P_ZERO) && Val.isZero() && !Val.isNegative()) ||
515  ((Mask & P_SUBNORMAL) && Val.isDenormal() && !Val.isNegative()) ||
516  ((Mask & P_NORMAL) && Val.isNormal() && !Val.isNegative()) ||
517  ((Mask & P_INFINITY) && Val.isInfinity() && !Val.isNegative());
518 
519  return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), Result));
520  }
521  case Intrinsic::amdgcn_cvt_pkrtz: {
522  Value *Src0 = II.getArgOperand(0);
523  Value *Src1 = II.getArgOperand(1);
524  if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
525  if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
526  const fltSemantics &HalfSem =
528  bool LosesInfo;
529  APFloat Val0 = C0->getValueAPF();
530  APFloat Val1 = C1->getValueAPF();
531  Val0.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
532  Val1.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
533 
534  Constant *Folded =
536  ConstantFP::get(II.getContext(), Val1)});
537  return IC.replaceInstUsesWith(II, Folded);
538  }
539  }
540 
541  if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) {
542  return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
543  }
544 
545  break;
546  }
547  case Intrinsic::amdgcn_cvt_pknorm_i16:
548  case Intrinsic::amdgcn_cvt_pknorm_u16:
549  case Intrinsic::amdgcn_cvt_pk_i16:
550  case Intrinsic::amdgcn_cvt_pk_u16: {
551  Value *Src0 = II.getArgOperand(0);
552  Value *Src1 = II.getArgOperand(1);
553 
554  if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) {
555  return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
556  }
557 
558  break;
559  }
560  case Intrinsic::amdgcn_ubfe:
561  case Intrinsic::amdgcn_sbfe: {
562  // Decompose simple cases into standard shifts.
563  Value *Src = II.getArgOperand(0);
564  if (isa<UndefValue>(Src)) {
565  return IC.replaceInstUsesWith(II, Src);
566  }
567 
568  unsigned Width;
569  Type *Ty = II.getType();
570  unsigned IntSize = Ty->getIntegerBitWidth();
571 
572  ConstantInt *CWidth = dyn_cast<ConstantInt>(II.getArgOperand(2));
573  if (CWidth) {
574  Width = CWidth->getZExtValue();
575  if ((Width & (IntSize - 1)) == 0) {
577  }
578 
579  // Hardware ignores high bits, so remove those.
580  if (Width >= IntSize) {
581  return IC.replaceOperand(
582  II, 2, ConstantInt::get(CWidth->getType(), Width & (IntSize - 1)));
583  }
584  }
585 
586  unsigned Offset;
587  ConstantInt *COffset = dyn_cast<ConstantInt>(II.getArgOperand(1));
588  if (COffset) {
589  Offset = COffset->getZExtValue();
590  if (Offset >= IntSize) {
591  return IC.replaceOperand(
592  II, 1,
593  ConstantInt::get(COffset->getType(), Offset & (IntSize - 1)));
594  }
595  }
596 
597  bool Signed = IID == Intrinsic::amdgcn_sbfe;
598 
599  if (!CWidth || !COffset)
600  break;
601 
602  // The case of Width == 0 is handled above, which makes this transformation
603  // safe. If Width == 0, then the ashr and lshr instructions become poison
604  // value since the shift amount would be equal to the bit size.
605  assert(Width != 0);
606 
607  // TODO: This allows folding to undef when the hardware has specific
608  // behavior?
609  if (Offset + Width < IntSize) {
610  Value *Shl = IC.Builder.CreateShl(Src, IntSize - Offset - Width);
611  Value *RightShift = Signed ? IC.Builder.CreateAShr(Shl, IntSize - Width)
612  : IC.Builder.CreateLShr(Shl, IntSize - Width);
613  RightShift->takeName(&II);
614  return IC.replaceInstUsesWith(II, RightShift);
615  }
616 
617  Value *RightShift = Signed ? IC.Builder.CreateAShr(Src, Offset)
618  : IC.Builder.CreateLShr(Src, Offset);
619 
620  RightShift->takeName(&II);
621  return IC.replaceInstUsesWith(II, RightShift);
622  }
623  case Intrinsic::amdgcn_exp:
624  case Intrinsic::amdgcn_exp_compr: {
625  ConstantInt *En = cast<ConstantInt>(II.getArgOperand(1));
626  unsigned EnBits = En->getZExtValue();
627  if (EnBits == 0xf)
628  break; // All inputs enabled.
629 
630  bool IsCompr = IID == Intrinsic::amdgcn_exp_compr;
631  bool Changed = false;
632  for (int I = 0; I < (IsCompr ? 2 : 4); ++I) {
633  if ((!IsCompr && (EnBits & (1 << I)) == 0) ||
634  (IsCompr && ((EnBits & (0x3 << (2 * I))) == 0))) {
635  Value *Src = II.getArgOperand(I + 2);
636  if (!isa<UndefValue>(Src)) {
637  IC.replaceOperand(II, I + 2, UndefValue::get(Src->getType()));
638  Changed = true;
639  }
640  }
641  }
642 
643  if (Changed) {
644  return &II;
645  }
646 
647  break;
648  }
649  case Intrinsic::amdgcn_fmed3: {
650  // Note this does not preserve proper sNaN behavior if IEEE-mode is enabled
651  // for the shader.
652 
653  Value *Src0 = II.getArgOperand(0);
654  Value *Src1 = II.getArgOperand(1);
655  Value *Src2 = II.getArgOperand(2);
656 
657  // Checking for NaN before canonicalization provides better fidelity when
658  // mapping other operations onto fmed3 since the order of operands is
659  // unchanged.
660  CallInst *NewCall = nullptr;
661  if (match(Src0, PatternMatch::m_NaN()) || isa<UndefValue>(Src0)) {
662  NewCall = IC.Builder.CreateMinNum(Src1, Src2);
663  } else if (match(Src1, PatternMatch::m_NaN()) || isa<UndefValue>(Src1)) {
664  NewCall = IC.Builder.CreateMinNum(Src0, Src2);
665  } else if (match(Src2, PatternMatch::m_NaN()) || isa<UndefValue>(Src2)) {
666  NewCall = IC.Builder.CreateMaxNum(Src0, Src1);
667  }
668 
669  if (NewCall) {
670  NewCall->copyFastMathFlags(&II);
671  NewCall->takeName(&II);
672  return IC.replaceInstUsesWith(II, NewCall);
673  }
674 
675  bool Swap = false;
676  // Canonicalize constants to RHS operands.
677  //
678  // fmed3(c0, x, c1) -> fmed3(x, c0, c1)
679  if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
680  std::swap(Src0, Src1);
681  Swap = true;
682  }
683 
684  if (isa<Constant>(Src1) && !isa<Constant>(Src2)) {
685  std::swap(Src1, Src2);
686  Swap = true;
687  }
688 
689  if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
690  std::swap(Src0, Src1);
691  Swap = true;
692  }
693 
694  if (Swap) {
695  II.setArgOperand(0, Src0);
696  II.setArgOperand(1, Src1);
697  II.setArgOperand(2, Src2);
698  return &II;
699  }
700 
701  if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
702  if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
703  if (const ConstantFP *C2 = dyn_cast<ConstantFP>(Src2)) {
704  APFloat Result = fmed3AMDGCN(C0->getValueAPF(), C1->getValueAPF(),
705  C2->getValueAPF());
706  return IC.replaceInstUsesWith(
707  II, ConstantFP::get(IC.Builder.getContext(), Result));
708  }
709  }
710  }
711 
712  break;
713  }
714  case Intrinsic::amdgcn_icmp:
715  case Intrinsic::amdgcn_fcmp: {
716  const ConstantInt *CC = cast<ConstantInt>(II.getArgOperand(2));
717  // Guard against invalid arguments.
718  int64_t CCVal = CC->getZExtValue();
719  bool IsInteger = IID == Intrinsic::amdgcn_icmp;
720  if ((IsInteger && (CCVal < CmpInst::FIRST_ICMP_PREDICATE ||
721  CCVal > CmpInst::LAST_ICMP_PREDICATE)) ||
722  (!IsInteger && (CCVal < CmpInst::FIRST_FCMP_PREDICATE ||
724  break;
725 
726  Value *Src0 = II.getArgOperand(0);
727  Value *Src1 = II.getArgOperand(1);
728 
729  if (auto *CSrc0 = dyn_cast<Constant>(Src0)) {
730  if (auto *CSrc1 = dyn_cast<Constant>(Src1)) {
731  Constant *CCmp = ConstantExpr::getCompare(CCVal, CSrc0, CSrc1);
732  if (CCmp->isNullValue()) {
733  return IC.replaceInstUsesWith(
734  II, ConstantExpr::getSExt(CCmp, II.getType()));
735  }
736 
737  // The result of V_ICMP/V_FCMP assembly instructions (which this
738  // intrinsic exposes) is one bit per thread, masked with the EXEC
739  // register (which contains the bitmask of live threads). So a
740  // comparison that always returns true is the same as a read of the
741  // EXEC register.
743  II.getModule(), Intrinsic::read_register, II.getType());
744  Metadata *MDArgs[] = {MDString::get(II.getContext(), "exec")};
745  MDNode *MD = MDNode::get(II.getContext(), MDArgs);
746  Value *Args[] = {MetadataAsValue::get(II.getContext(), MD)};
747  CallInst *NewCall = IC.Builder.CreateCall(NewF, Args);
749  NewCall->takeName(&II);
750  return IC.replaceInstUsesWith(II, NewCall);
751  }
752 
753  // Canonicalize constants to RHS.
754  CmpInst::Predicate SwapPred =
756  II.setArgOperand(0, Src1);
757  II.setArgOperand(1, Src0);
758  II.setArgOperand(
759  2, ConstantInt::get(CC->getType(), static_cast<int>(SwapPred)));
760  return &II;
761  }
762 
763  if (CCVal != CmpInst::ICMP_EQ && CCVal != CmpInst::ICMP_NE)
764  break;
765 
766  // Canonicalize compare eq with true value to compare != 0
767  // llvm.amdgcn.icmp(zext (i1 x), 1, eq)
768  // -> llvm.amdgcn.icmp(zext (i1 x), 0, ne)
769  // llvm.amdgcn.icmp(sext (i1 x), -1, eq)
770  // -> llvm.amdgcn.icmp(sext (i1 x), 0, ne)
771  Value *ExtSrc;
772  if (CCVal == CmpInst::ICMP_EQ &&
773  ((match(Src1, PatternMatch::m_One()) &&
774  match(Src0, m_ZExt(PatternMatch::m_Value(ExtSrc)))) ||
775  (match(Src1, PatternMatch::m_AllOnes()) &&
776  match(Src0, m_SExt(PatternMatch::m_Value(ExtSrc))))) &&
777  ExtSrc->getType()->isIntegerTy(1)) {
779  IC.replaceOperand(II, 2,
781  return &II;
782  }
783 
784  CmpInst::Predicate SrcPred;
785  Value *SrcLHS;
786  Value *SrcRHS;
787 
788  // Fold compare eq/ne with 0 from a compare result as the predicate to the
789  // intrinsic. The typical use is a wave vote function in the library, which
790  // will be fed from a user code condition compared with 0. Fold in the
791  // redundant compare.
792 
793  // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, ne)
794  // -> llvm.amdgcn.[if]cmp(a, b, pred)
795  //
796  // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, eq)
797  // -> llvm.amdgcn.[if]cmp(a, b, inv pred)
798  if (match(Src1, PatternMatch::m_Zero()) &&
800  m_Cmp(SrcPred, PatternMatch::m_Value(SrcLHS),
801  PatternMatch::m_Value(SrcRHS))))) {
802  if (CCVal == CmpInst::ICMP_EQ)
803  SrcPred = CmpInst::getInversePredicate(SrcPred);
804 
805  Intrinsic::ID NewIID = CmpInst::isFPPredicate(SrcPred)
806  ? Intrinsic::amdgcn_fcmp
807  : Intrinsic::amdgcn_icmp;
808 
809  Type *Ty = SrcLHS->getType();
810  if (auto *CmpType = dyn_cast<IntegerType>(Ty)) {
811  // Promote to next legal integer type.
812  unsigned Width = CmpType->getBitWidth();
813  unsigned NewWidth = Width;
814 
815  // Don't do anything for i1 comparisons.
816  if (Width == 1)
817  break;
818 
819  if (Width <= 16)
820  NewWidth = 16;
821  else if (Width <= 32)
822  NewWidth = 32;
823  else if (Width <= 64)
824  NewWidth = 64;
825  else if (Width > 64)
826  break; // Can't handle this.
827 
828  if (Width != NewWidth) {
829  IntegerType *CmpTy = IC.Builder.getIntNTy(NewWidth);
830  if (CmpInst::isSigned(SrcPred)) {
831  SrcLHS = IC.Builder.CreateSExt(SrcLHS, CmpTy);
832  SrcRHS = IC.Builder.CreateSExt(SrcRHS, CmpTy);
833  } else {
834  SrcLHS = IC.Builder.CreateZExt(SrcLHS, CmpTy);
835  SrcRHS = IC.Builder.CreateZExt(SrcRHS, CmpTy);
836  }
837  }
838  } else if (!Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isHalfTy())
839  break;
840 
842  II.getModule(), NewIID, {II.getType(), SrcLHS->getType()});
843  Value *Args[] = {SrcLHS, SrcRHS,
844  ConstantInt::get(CC->getType(), SrcPred)};
845  CallInst *NewCall = IC.Builder.CreateCall(NewF, Args);
846  NewCall->takeName(&II);
847  return IC.replaceInstUsesWith(II, NewCall);
848  }
849 
850  break;
851  }
852  case Intrinsic::amdgcn_ballot: {
853  if (auto *Src = dyn_cast<ConstantInt>(II.getArgOperand(0))) {
854  if (Src->isZero()) {
855  // amdgcn.ballot(i1 0) is zero.
857  }
858 
859  if (Src->isOne()) {
860  // amdgcn.ballot(i1 1) is exec.
861  const char *RegName = "exec";
862  if (II.getType()->isIntegerTy(32))
863  RegName = "exec_lo";
864  else if (!II.getType()->isIntegerTy(64))
865  break;
866 
868  II.getModule(), Intrinsic::read_register, II.getType());
869  Metadata *MDArgs[] = {MDString::get(II.getContext(), RegName)};
870  MDNode *MD = MDNode::get(II.getContext(), MDArgs);
871  Value *Args[] = {MetadataAsValue::get(II.getContext(), MD)};
872  CallInst *NewCall = IC.Builder.CreateCall(NewF, Args);
874  NewCall->takeName(&II);
875  return IC.replaceInstUsesWith(II, NewCall);
876  }
877  }
878  break;
879  }
880  case Intrinsic::amdgcn_wqm_vote: {
881  // wqm_vote is identity when the argument is constant.
882  if (!isa<Constant>(II.getArgOperand(0)))
883  break;
884 
885  return IC.replaceInstUsesWith(II, II.getArgOperand(0));
886  }
887  case Intrinsic::amdgcn_kill: {
888  const ConstantInt *C = dyn_cast<ConstantInt>(II.getArgOperand(0));
889  if (!C || !C->getZExtValue())
890  break;
891 
892  // amdgcn.kill(i1 1) is a no-op
893  return IC.eraseInstFromFunction(II);
894  }
895  case Intrinsic::amdgcn_update_dpp: {
896  Value *Old = II.getArgOperand(0);
897 
898  auto *BC = cast<ConstantInt>(II.getArgOperand(5));
899  auto *RM = cast<ConstantInt>(II.getArgOperand(3));
900  auto *BM = cast<ConstantInt>(II.getArgOperand(4));
901  if (BC->isZeroValue() || RM->getZExtValue() != 0xF ||
902  BM->getZExtValue() != 0xF || isa<UndefValue>(Old))
903  break;
904 
905  // If bound_ctrl = 1, row mask = bank mask = 0xf we can omit old value.
906  return IC.replaceOperand(II, 0, UndefValue::get(Old->getType()));
907  }
908  case Intrinsic::amdgcn_permlane16:
909  case Intrinsic::amdgcn_permlanex16: {
910  // Discard vdst_in if it's not going to be read.
911  Value *VDstIn = II.getArgOperand(0);
912  if (isa<UndefValue>(VDstIn))
913  break;
914 
915  ConstantInt *FetchInvalid = cast<ConstantInt>(II.getArgOperand(4));
916  ConstantInt *BoundCtrl = cast<ConstantInt>(II.getArgOperand(5));
917  if (!FetchInvalid->getZExtValue() && !BoundCtrl->getZExtValue())
918  break;
919 
920  return IC.replaceOperand(II, 0, UndefValue::get(VDstIn->getType()));
921  }
922  case Intrinsic::amdgcn_readfirstlane:
923  case Intrinsic::amdgcn_readlane: {
924  // A constant value is trivially uniform.
925  if (Constant *C = dyn_cast<Constant>(II.getArgOperand(0))) {
926  return IC.replaceInstUsesWith(II, C);
927  }
928 
929  // The rest of these may not be safe if the exec may not be the same between
930  // the def and use.
931  Value *Src = II.getArgOperand(0);
932  Instruction *SrcInst = dyn_cast<Instruction>(Src);
933  if (SrcInst && SrcInst->getParent() != II.getParent())
934  break;
935 
936  // readfirstlane (readfirstlane x) -> readfirstlane x
937  // readlane (readfirstlane x), y -> readfirstlane x
938  if (match(Src,
939  PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readfirstlane>())) {
940  return IC.replaceInstUsesWith(II, Src);
941  }
942 
943  if (IID == Intrinsic::amdgcn_readfirstlane) {
944  // readfirstlane (readlane x, y) -> readlane x, y
945  if (match(Src, PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readlane>())) {
946  return IC.replaceInstUsesWith(II, Src);
947  }
948  } else {
949  // readlane (readlane x, y), y -> readlane x, y
950  if (match(Src, PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readlane>(
953  return IC.replaceInstUsesWith(II, Src);
954  }
955  }
956 
957  break;
958  }
959  case Intrinsic::amdgcn_ldexp: {
960  // FIXME: This doesn't introduce new instructions and belongs in
961  // InstructionSimplify.
962  Type *Ty = II.getType();
963  Value *Op0 = II.getArgOperand(0);
964  Value *Op1 = II.getArgOperand(1);
965 
966  // Folding undef to qnan is safe regardless of the FP mode.
967  if (isa<UndefValue>(Op0)) {
968  auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
969  return IC.replaceInstUsesWith(II, QNaN);
970  }
971 
972  const APFloat *C = nullptr;
974 
975  // FIXME: Should flush denorms depending on FP mode, but that's ignored
976  // everywhere else.
977  //
978  // These cases should be safe, even with strictfp.
979  // ldexp(0.0, x) -> 0.0
980  // ldexp(-0.0, x) -> -0.0
981  // ldexp(inf, x) -> inf
982  // ldexp(-inf, x) -> -inf
983  if (C && (C->isZero() || C->isInfinity())) {
984  return IC.replaceInstUsesWith(II, Op0);
985  }
986 
987  // With strictfp, be more careful about possibly needing to flush denormals
988  // or not, and snan behavior depends on ieee_mode.
989  if (II.isStrictFP())
990  break;
991 
992  if (C && C->isNaN()) {
993  // FIXME: We just need to make the nan quiet here, but that's unavailable
994  // on APFloat, only IEEEfloat
995  auto *Quieted =
997  return IC.replaceInstUsesWith(II, Quieted);
998  }
999 
1000  // ldexp(x, 0) -> x
1001  // ldexp(x, undef) -> x
1002  if (isa<UndefValue>(Op1) || match(Op1, PatternMatch::m_ZeroInt())) {
1003  return IC.replaceInstUsesWith(II, Op0);
1004  }
1005 
1006  break;
1007  }
1008  case Intrinsic::amdgcn_fmul_legacy: {
1009  Value *Op0 = II.getArgOperand(0);
1010  Value *Op1 = II.getArgOperand(1);
1011 
1012  // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
1013  // infinity, gives +0.0.
1014  // TODO: Move to InstSimplify?
1015  if (match(Op0, PatternMatch::m_AnyZeroFP()) ||
1018 
1019  // If we can prove we don't have one of the special cases then we can use a
1020  // normal fmul instruction instead.
1021  if (canSimplifyLegacyMulToMul(Op0, Op1, IC)) {
1022  auto *FMul = IC.Builder.CreateFMulFMF(Op0, Op1, &II);
1023  FMul->takeName(&II);
1024  return IC.replaceInstUsesWith(II, FMul);
1025  }
1026  break;
1027  }
1028  case Intrinsic::amdgcn_fma_legacy: {
1029  Value *Op0 = II.getArgOperand(0);
1030  Value *Op1 = II.getArgOperand(1);
1031  Value *Op2 = II.getArgOperand(2);
1032 
1033  // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
1034  // infinity, gives +0.0.
1035  // TODO: Move to InstSimplify?
1036  if (match(Op0, PatternMatch::m_AnyZeroFP()) ||
1037  match(Op1, PatternMatch::m_AnyZeroFP())) {
1038  // It's tempting to just return Op2 here, but that would give the wrong
1039  // result if Op2 was -0.0.
1040  auto *Zero = ConstantFP::getNullValue(II.getType());
1041  auto *FAdd = IC.Builder.CreateFAddFMF(Zero, Op2, &II);
1042  FAdd->takeName(&II);
1043  return IC.replaceInstUsesWith(II, FAdd);
1044  }
1045 
1046  // If we can prove we don't have one of the special cases then we can use a
1047  // normal fma instead.
1048  if (canSimplifyLegacyMulToMul(Op0, Op1, IC)) {
1050  II.getModule(), Intrinsic::fma, II.getType()));
1051  return &II;
1052  }
1053  break;
1054  }
1055  case Intrinsic::amdgcn_is_shared:
1056  case Intrinsic::amdgcn_is_private: {
1057  if (isa<UndefValue>(II.getArgOperand(0)))
1058  return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
1059 
1060  if (isa<ConstantPointerNull>(II.getArgOperand(0)))
1061  return IC.replaceInstUsesWith(II, ConstantInt::getFalse(II.getType()));
1062  break;
1063  }
1064  default: {
1065  if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
1067  return simplifyAMDGCNImageIntrinsic(ST, ImageDimIntr, II, IC);
1068  }
1069  }
1070  }
1071  return None;
1072 }
1073 
1074 /// Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
1075 ///
1076 /// Note: This only supports non-TFE/LWE image intrinsic calls; those have
1077 /// struct returns.
1079  IntrinsicInst &II,
1080  APInt DemandedElts,
1081  int DMaskIdx = -1) {
1082 
1083  auto *IIVTy = cast<FixedVectorType>(II.getType());
1084  unsigned VWidth = IIVTy->getNumElements();
1085  if (VWidth == 1)
1086  return nullptr;
1087 
1089  IC.Builder.SetInsertPoint(&II);
1090 
1091  // Assume the arguments are unchanged and later override them, if needed.
1093 
1094  if (DMaskIdx < 0) {
1095  // Buffer case.
1096 
1097  const unsigned ActiveBits = DemandedElts.getActiveBits();
1098  const unsigned UnusedComponentsAtFront = DemandedElts.countTrailingZeros();
1099 
1100  // Start assuming the prefix of elements is demanded, but possibly clear
1101  // some other bits if there are trailing zeros (unused components at front)
1102  // and update offset.
1103  DemandedElts = (1 << ActiveBits) - 1;
1104 
1105  if (UnusedComponentsAtFront > 0) {
1106  static const unsigned InvalidOffsetIdx = 0xf;
1107 
1108  unsigned OffsetIdx;
1109  switch (II.getIntrinsicID()) {
1110  case Intrinsic::amdgcn_raw_buffer_load:
1111  OffsetIdx = 1;
1112  break;
1113  case Intrinsic::amdgcn_s_buffer_load:
1114  // If resulting type is vec3, there is no point in trimming the
1115  // load with updated offset, as the vec3 would most likely be widened to
1116  // vec4 anyway during lowering.
1117  if (ActiveBits == 4 && UnusedComponentsAtFront == 1)
1118  OffsetIdx = InvalidOffsetIdx;
1119  else
1120  OffsetIdx = 1;
1121  break;
1122  case Intrinsic::amdgcn_struct_buffer_load:
1123  OffsetIdx = 2;
1124  break;
1125  default:
1126  // TODO: handle tbuffer* intrinsics.
1127  OffsetIdx = InvalidOffsetIdx;
1128  break;
1129  }
1130 
1131  if (OffsetIdx != InvalidOffsetIdx) {
1132  // Clear demanded bits and update the offset.
1133  DemandedElts &= ~((1 << UnusedComponentsAtFront) - 1);
1134  auto *Offset = II.getArgOperand(OffsetIdx);
1135  unsigned SingleComponentSizeInBits =
1137  unsigned OffsetAdd =
1138  UnusedComponentsAtFront * SingleComponentSizeInBits / 8;
1139  auto *OffsetAddVal = ConstantInt::get(Offset->getType(), OffsetAdd);
1140  Args[OffsetIdx] = IC.Builder.CreateAdd(Offset, OffsetAddVal);
1141  }
1142  }
1143  } else {
1144  // Image case.
1145 
1146  ConstantInt *DMask = cast<ConstantInt>(II.getArgOperand(DMaskIdx));
1147  unsigned DMaskVal = DMask->getZExtValue() & 0xf;
1148 
1149  // Mask off values that are undefined because the dmask doesn't cover them
1150  DemandedElts &= (1 << countPopulation(DMaskVal)) - 1;
1151 
1152  unsigned NewDMaskVal = 0;
1153  unsigned OrigLoadIdx = 0;
1154  for (unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) {
1155  const unsigned Bit = 1 << SrcIdx;
1156  if (!!(DMaskVal & Bit)) {
1157  if (!!DemandedElts[OrigLoadIdx])
1158  NewDMaskVal |= Bit;
1159  OrigLoadIdx++;
1160  }
1161  }
1162 
1163  if (DMaskVal != NewDMaskVal)
1164  Args[DMaskIdx] = ConstantInt::get(DMask->getType(), NewDMaskVal);
1165  }
1166 
1167  unsigned NewNumElts = DemandedElts.countPopulation();
1168  if (!NewNumElts)
1169  return UndefValue::get(II.getType());
1170 
1171  if (NewNumElts >= VWidth && DemandedElts.isMask()) {
1172  if (DMaskIdx >= 0)
1173  II.setArgOperand(DMaskIdx, Args[DMaskIdx]);
1174  return nullptr;
1175  }
1176 
1177  // Validate function argument and return types, extracting overloaded types
1178  // along the way.
1179  SmallVector<Type *, 6> OverloadTys;
1180  if (!Intrinsic::getIntrinsicSignature(II.getCalledFunction(), OverloadTys))
1181  return nullptr;
1182 
1183  Module *M = II.getParent()->getParent()->getParent();
1184  Type *EltTy = IIVTy->getElementType();
1185  Type *NewTy =
1186  (NewNumElts == 1) ? EltTy : FixedVectorType::get(EltTy, NewNumElts);
1187 
1188  OverloadTys[0] = NewTy;
1189  Function *NewIntrin =
1190  Intrinsic::getDeclaration(M, II.getIntrinsicID(), OverloadTys);
1191 
1192  CallInst *NewCall = IC.Builder.CreateCall(NewIntrin, Args);
1193  NewCall->takeName(&II);
1194  NewCall->copyMetadata(II);
1195 
1196  if (NewNumElts == 1) {
1198  NewCall,
1199  DemandedElts.countTrailingZeros());
1200  }
1201 
1202  SmallVector<int, 8> EltMask;
1203  unsigned NewLoadIdx = 0;
1204  for (unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) {
1205  if (!!DemandedElts[OrigLoadIdx])
1206  EltMask.push_back(NewLoadIdx++);
1207  else
1208  EltMask.push_back(NewNumElts);
1209  }
1210 
1211  Value *Shuffle = IC.Builder.CreateShuffleVector(NewCall, EltMask);
1212 
1213  return Shuffle;
1214 }
1215 
1217  InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
1218  APInt &UndefElts2, APInt &UndefElts3,
1219  std::function<void(Instruction *, unsigned, APInt, APInt &)>
1220  SimplifyAndSetOp) const {
1221  switch (II.getIntrinsicID()) {
1222  case Intrinsic::amdgcn_buffer_load:
1223  case Intrinsic::amdgcn_buffer_load_format:
1224  case Intrinsic::amdgcn_raw_buffer_load:
1225  case Intrinsic::amdgcn_raw_buffer_load_format:
1226  case Intrinsic::amdgcn_raw_tbuffer_load:
1227  case Intrinsic::amdgcn_s_buffer_load:
1228  case Intrinsic::amdgcn_struct_buffer_load:
1229  case Intrinsic::amdgcn_struct_buffer_load_format:
1230  case Intrinsic::amdgcn_struct_tbuffer_load:
1231  case Intrinsic::amdgcn_tbuffer_load:
1232  return simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts);
1233  default: {
1234  if (getAMDGPUImageDMaskIntrinsic(II.getIntrinsicID())) {
1235  return simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts, 0);
1236  }
1237  break;
1238  }
1239  }
1240  return None;
1241 }
llvm::APFloat::isDenormal
bool isDenormal() const
Definition: APFloat.h:1215
llvm::InstCombiner::getTargetLibraryInfo
TargetLibraryInfo & getTargetLibraryInfo() const
Definition: InstCombiner.h:369
llvm::APFloat::isInfinity
bool isInfinity() const
Definition: APFloat.h:1211
Signed
@ Signed
Definition: NVPTXISelLowering.cpp:4637
llvm::CmpInst::getSwappedPredicate
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:849
llvm::IRBuilderBase::SetInsertPoint
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:179
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:65
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:740
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
InstCombiner.h
llvm::RecurKind::FMul
@ FMul
Product of floats.
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1410
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:719
llvm::APFloatBase::IEK_NaN
@ IEK_NaN
Definition: APFloat.h:230
llvm::ConstantInt::getType
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
Definition: Constants.h:173
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
llvm::APInt::isMask
bool isMask(unsigned numBits) const
Definition: APInt.h:469
llvm::Function
Definition: Function.h:60
llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:53
llvm::IRBuilderBase::CreateFCmpOEQ
Value * CreateFCmpOEQ(Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2106
llvm::PatternMatch::m_NaN
cstfp_pred_ty< is_nan > m_NaN()
Match an arbitrary NaN constant.
Definition: PatternMatch.h:616
llvm::DataLayout::getTypeSizeInBits
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:673
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:309
llvm::ConstantExpr::getSExt
static Constant * getSExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2134
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
llvm::InstCombiner::Builder
BuilderTy & Builder
Definition: InstCombiner.h:58
llvm::Type::getFltSemantics
const fltSemantics & getFltSemantics() const
Definition: Type.cpp:67
llvm::AMDGPU::ImageDimIntrinsicInfo
Definition: AMDGPUInstrInfo.h:47
llvm::IRBuilder< TargetFolder, IRBuilderCallbackInserter >
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:741
llvm::CmpInst::getInversePredicate
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:833
llvm::PatternMatch::m_APFloat
apfloat_match m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
Definition: PatternMatch.h:287
llvm::APFloat::isZero
bool isZero() const
Definition: APFloat.h:1210
llvm::APFloat::divide
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition: APFloat.h:996
llvm::CallBase::isStrictFP
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
Definition: InstrTypes.h:1845
llvm::IRBuilderBase::CreateMaxNum
CallInst * CreateMaxNum(Value *LHS, Value *RHS, const Twine &Name="")
Create call to the maxnum intrinsic.
Definition: IRBuilder.h:890
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::IRBuilderBase::CreateShuffleVector
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2313
llvm::APFloatBase::IEK_Inf
@ IEK_Inf
Definition: APFloat.h:231
llvm::ConstantFP::getValueAPF
const APFloat & getValueAPF() const
Definition: Constants.h:297
llvm::Optional
Definition: APInt.h:33
llvm::MCID::Convergent
@ Convergent
Definition: MCInstrDesc.h:184
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::IRBuilderBase::CreateInsertElement
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2289
llvm::Instruction::copyMetadata
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Definition: Instruction.cpp:841
llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:163
llvm::IRBuilderBase::CreateAShr
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1330
llvm::APFloat::getSemantics
const fltSemantics & getSemantics() const
Definition: APFloat.h:1222
llvm::Intrinsic::getIntrinsicSignature
bool getIntrinsicSignature(Function *F, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
Definition: Function.cpp:1755
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1289
llvm::APInt::countPopulation
unsigned countPopulation() const
Count the number of bits set.
Definition: APInt.h:1583
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
modifyIntrinsicCall
static Optional< Instruction * > modifyIntrinsicCall(IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr, InstCombiner &IC, std::function< void(SmallVectorImpl< Value * > &, SmallVectorImpl< Type * > &)> Func)
Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with modified arguments (based on ...
Definition: AMDGPUInstCombineIntrinsic.cpp:116
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::InstCombiner::replaceOperand
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
Definition: InstCombiner.h:438
llvm::GCNTTIImpl::instCombineIntrinsic
Optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Definition: AMDGPUInstCombineIntrinsic.cpp:351
fmed3AMDGCN
static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1, const APFloat &Src2)
Definition: AMDGPUInstCombineIntrinsic.cpp:42
llvm::IRBuilderBase::CreateFMulFMF
Value * CreateFMulFMF(Value *L, Value *R, Instruction *FMFSource, const Twine &Name="")
Copy fast-math-flags from an instruction rather than using the builder's default FMF.
Definition: IRBuilder.h:1470
llvm::CmpInst::isFPPredicate
bool isFPPredicate() const
Definition: InstrTypes.h:826
llvm::PatternMatch::m_ZExtOrSExt
match_combine_or< CastClass_match< OpTy, Instruction::ZExt >, CastClass_match< OpTy, Instruction::SExt > > m_ZExtOrSExt(const OpTy &Op)
Definition: PatternMatch.h:1658
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::CmpInst::FIRST_FCMP_PREDICATE
@ FIRST_FCMP_PREDICATE
Definition: InstrTypes.h:737
llvm::Constant::isNullValue
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:76
GCNSubtarget.h
llvm::APFloatBase::IEEEhalf
static const fltSemantics & IEEEhalf() LLVM_READNONE
Definition: APFloat.cpp:164
llvm::SIInstrFlags::N_INFINITY
@ N_INFINITY
Definition: SIDefines.h:134
llvm::User
Definition: User.h:44
Intr
unsigned Intr
Definition: AMDGPUBaseInfo.cpp:2276
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1396
llvm::AMDGPU::getMIMGOffsetMappingInfo
const LLVM_READONLY MIMGOffsetMappingInfo * getMIMGOffsetMappingInfo(unsigned Offset)
llvm::IRBuilderBase::getIntNTy
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:514
llvm::CmpInst::FIRST_ICMP_PREDICATE
@ FIRST_ICMP_PREDICATE
Definition: InstrTypes.h:750
llvm::APFloat::isNaN
bool isNaN() const
Definition: APFloat.h:1212
llvm::PatternMatch::m_ZExt
CastClass_match< OpTy, Instruction::ZExt > m_ZExt(const OpTy &Op)
Matches ZExt.
Definition: PatternMatch.h:1639
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::APFloat::isNegative
bool isNegative() const
Definition: APFloat.h:1214
llvm::Instruction
Definition: Instruction.h:42
llvm::CallBase::addFnAttr
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
Definition: InstrTypes.h:1506
llvm::InstCombiner::eraseInstFromFunction
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
llvm::IRBuilderBase::getContext
LLVMContext & getContext() const
Definition: IRBuilder.h:175
llvm::ConstantFP
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:257
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1769
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:919
llvm::AMDGPU::getMIMGLZMappingInfo
const LLVM_READONLY MIMGLZMappingInfo * getMIMGLZMappingInfo(unsigned L)
llvm::APInt::countTrailingZeros
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.h:1555
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:684
llvm::Metadata
Root of the metadata hierarchy.
Definition: Metadata.h:62
llvm::None
const NoneType None
Definition: None.h:24
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
llvm::PatternMatch::m_One
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:513
simplifyAMDGCNMemoryIntrinsicDemanded
static Value * simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, int DMaskIdx=-1)
Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
Definition: AMDGPUInstCombineIntrinsic.cpp:1078
llvm::maxnum
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1306
AMDGPUTargetTransformInfo.h
llvm::SIInstrFlags::Q_NAN
@ Q_NAN
Definition: SIDefines.h:133
llvm::IRBuilderBase::CreateZExt
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1860
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:191
llvm::APFloat::getQNaN
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
Definition: APFloat.h:915
llvm::SIInstrFlags::N_NORMAL
@ N_NORMAL
Definition: SIDefines.h:135
llvm::APFloat
Definition: APFloat.h:700
llvm::ConstantExpr::getCompare
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:2423
llvm::PatternMatch::m_Zero
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:535
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::countPopulation
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:567
llvm::IRBuilderBase::CreateFAddFMF
Value * CreateFAddFMF(Value *L, Value *R, Instruction *FMFSource, const Twine &Name="")
Copy fast-math-flags from an instruction rather than using the builder's default FMF.
Definition: IRBuilder.h:1420
llvm::APFloat::isNormal
bool isNormal() const
Definition: APFloat.h:1218
llvm::SIInstrFlags::S_NAN
@ S_NAN
Definition: SIDefines.h:132
llvm::InstCombiner::getDataLayout
const DataLayout & getDataLayout() const
Definition: InstCombiner.h:371
llvm::APFloatBase::cmpResult
cmpResult
IEEE-754R 5.11: Floating Point Comparison Relations.
Definition: APFloat.h:179
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:577
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::Instruction::user_back
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:88
llvm::APFloatBase::cmpUnordered
@ cmpUnordered
Definition: APFloat.h:183
llvm::PatternMatch::m_AllOnes
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
Definition: PatternMatch.h:445
I
#define I(x, y, z)
Definition: MD5.cpp:58
convertTo16Bit
static Value * convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder)
Definition: AMDGPUInstCombineIntrinsic.cpp:101
llvm::AMDGPU::getImageDimIntrinsicByBaseOpcode
const ImageDimIntrinsicInfo * getImageDimIntrinsicByBaseOpcode(unsigned BaseOpcode, unsigned Dim)
llvm::IRBuilderBase::CreateAdd
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1201
llvm::Type::isHalfTy
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:142
llvm::MDString::get
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:499
simplifyAMDGCNImageIntrinsic
static Optional< Instruction * > simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr, IntrinsicInst &II, InstCombiner &IC)
Definition: AMDGPUInstCombineIntrinsic.cpp:152
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:82
llvm::Type::isVoidTy
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::MDNode
Metadata node.
Definition: Metadata.h:926
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::AMDGPU::getMIMGBaseOpcodeInfo
const LLVM_READONLY MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
llvm::AMDGPU::getMIMGMIPMappingInfo
const LLVM_READONLY MIMGMIPMappingInfo * getMIMGMIPMappingInfo(unsigned MIP)
llvm::PatternMatch::m_SExt
CastClass_match< OpTy, Instruction::SExt > m_SExt(const OpTy &Op)
Matches SExt.
Definition: PatternMatch.h:1633
llvm::AArch64::RM
@ RM
Definition: AArch64ISelLowering.h:481
llvm::scalbn
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Definition: APFloat.h:1261
llvm::APFloat::isSignaling
bool isSignaling() const
Definition: APFloat.h:1216
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
if
if(llvm_vc STREQUAL "") set(fake_version_inc "$
Definition: CMakeLists.txt:14
uint32_t
llvm::PatternMatch::m_FiniteNonZero
cstfp_pred_ty< is_finitenonzero > m_FiniteNonZero()
Match a finite non-zero FP constant.
Definition: PatternMatch.h:662
llvm::AMDGPU::getMIMGBiasMappingInfo
const LLVM_READONLY MIMGBiasMappingInfo * getMIMGBiasMappingInfo(unsigned Bias)
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:991
llvm::IRBuilderBase::InsertPointGuard
Definition: IRBuilder.h:350
llvm::Instruction::copyFastMathFlags
void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
Definition: Instruction.cpp:244
llvm::ConstantVector::get
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1389
llvm::isKnownNeverInfinity
bool isKnownNeverInfinity(const Value *V, const TargetLibraryInfo *TLI, unsigned Depth=0)
Return true if the floating-point scalar value is not an infinity or if the floating-point vector val...
Definition: ValueTracking.cpp:3640
for
this could be done in SelectionDAGISel along with other special for
Definition: README.txt:104
llvm::isKnownNeverNaN
bool isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
Definition: ValueTracking.cpp:3704
llvm::CallBase::setArgOperand
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1346
llvm::ConstantInt::getFalse
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:874
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:142
llvm::InstCombiner::replaceInstUsesWith
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Definition: InstCombiner.h:417
llvm::SIInstrFlags::P_SUBNORMAL
@ P_SUBNORMAL
Definition: SIDefines.h:139
llvm::SIInstrFlags::P_INFINITY
@ P_INFINITY
Definition: SIDefines.h:141
llvm::PatternMatch::m_FPExt
CastClass_match< OpTy, Instruction::FPExt > m_FPExt(const OpTy &Op)
Definition: PatternMatch.h:1697
AMDGPUInstrInfo.h
llvm::APFloatBase::rmTowardZero
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:193
llvm::SIInstrFlags::P_ZERO
@ P_ZERO
Definition: SIDefines.h:138
llvm::Constant::getNullValue
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:350
llvm::Type::isFloatTy
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:148
canSafelyConvertTo16Bit
static bool canSafelyConvertTo16Bit(Value &V, bool IsFloat)
Definition: AMDGPUInstCombineIntrinsic.cpp:63
llvm::Type::getHalfTy
static Type * getHalfTy(LLVMContext &C)
Definition: Type.cpp:224
llvm::MetadataAsValue::get
static MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition: Metadata.cpp:102
llvm::ConstantFP::get
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:966
llvm::fltSemantics
Definition: APFloat.cpp:54
llvm::CmpInst::isSigned
bool isSigned() const
Definition: InstrTypes.h:947
llvm::AMDGPU::getImageDimIntrinsicInfo
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
llvm::Type::isDoubleTy
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:151
llvm::PatternMatch::m_ZeroInt
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:522
llvm::frexp
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
Definition: APFloat.h:1273
llvm::InstCombiner
The core instruction combiner logic.
Definition: InstCombiner.h:45
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:432
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:46
llvm::CallBase::setCalledOperand
void setCalledOperand(Value *V)
Definition: InstrTypes.h:1432
llvm::PatternMatch::m_AnyZeroFP
cstfp_pred_ty< is_any_zero_fp > m_AnyZeroFP()
Match a floating-point negative zero or positive zero.
Definition: PatternMatch.h:674
llvm::IRBuilderBase::CreateMinNum
CallInst * CreateMinNum(Value *LHS, Value *RHS, const Twine &Name="")
Create call to the minnum intrinsic.
Definition: IRBuilder.h:885
llvm::IRBuilderBase::CreateSExt
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1864
llvm::SIInstrFlags::N_ZERO
@ N_ZERO
Definition: SIDefines.h:137
llvm::APFloatBase::rmNearestTiesToEven
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:189
llvm::RecurKind::FAdd
@ FAdd
Sum of floats.
llvm::APFloat::convert
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:4836
llvm::PatternMatch::m_Specific
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:780
llvm::IRBuilderBase::CreateLShr
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1310
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1341
llvm::IRBuilderBase::CreateFCmpUNO
Value * CreateFCmpUNO(Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2141
llvm::IRBuilderBase::CreateShl
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1289
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::APInt::getActiveBits
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1447
llvm::tgtok::Bit
@ Bit
Definition: TGLexer.h:50
llvm::SmallVectorImpl< Value * >
llvm::SIInstrFlags::N_SUBNORMAL
@ N_SUBNORMAL
Definition: SIDefines.h:136
llvm::CmpInst::LAST_FCMP_PREDICATE
@ LAST_FCMP_PREDICATE
Definition: InstrTypes.h:738
RegName
#define RegName(no)
llvm::Type::getInt16Ty
static IntegerType * getInt16Ty(LLVMContext &C)
Definition: Type.cpp:238
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1474
llvm::APFloat::compare
cmpResult compare(const APFloat &RHS) const
Definition: APFloat.h:1169
llvm::Value::takeName
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:378
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:394
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::GCNTTIImpl::canSimplifyLegacyMulToMul
bool canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1, InstCombiner &IC) const
Definition: AMDGPUInstCombineIntrinsic.cpp:329
llvm::GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic
Optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
Definition: AMDGPUInstCombineIntrinsic.cpp:1216
llvm::APFloatBase::cmpEqual
@ cmpEqual
Definition: APFloat.h:181
llvm::SIInstrFlags::P_NORMAL
@ P_NORMAL
Definition: SIDefines.h:140
llvm::PatternMatch::m_Cmp
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:89
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::IRBuilderBase::CreateCall
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2229
llvm::CallBase::args
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
Definition: InstrTypes.h:1332
llvm::AMDGPU::MIMGBaseOpcodeInfo::Sampler
bool Sampler
Definition: AMDGPUBaseInfo.h:305
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37
llvm::CmpInst::LAST_ICMP_PREDICATE
@ LAST_ICMP_PREDICATE
Definition: InstrTypes.h:751