LLVM  16.0.0git
PPCTargetTransformInfo.cpp
Go to the documentation of this file.
1 //===-- PPCTargetTransformInfo.cpp - PPC specific TTI ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
14 #include "llvm/CodeGen/CostTable.h"
17 #include "llvm/IR/IntrinsicsPowerPC.h"
18 #include "llvm/IR/ProfDataUtils.h"
20 #include "llvm/Support/Debug.h"
21 #include "llvm/Support/KnownBits.h"
24 
25 using namespace llvm;
26 
27 #define DEBUG_TYPE "ppctti"
28 
29 static cl::opt<bool> DisablePPCConstHoist("disable-ppc-constant-hoisting",
30 cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden);
31 
32 static cl::opt<bool>
33 EnablePPCColdCC("ppc-enable-coldcc", cl::Hidden, cl::init(false),
34  cl::desc("Enable using coldcc calling conv for cold "
35  "internal functions"));
36 
37 static cl::opt<bool>
38 LsrNoInsnsCost("ppc-lsr-no-insns-cost", cl::Hidden, cl::init(false),
39  cl::desc("Do not add instruction count to lsr cost model"));
40 
41 // The latency of mtctr is only justified if there are more than 4
42 // comparisons that will be removed as a result.
43 static cl::opt<unsigned>
44 SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden,
45  cl::desc("Loops with a constant trip count smaller than "
46  "this value will not use the count register."));
47 
48 //===----------------------------------------------------------------------===//
49 //
50 // PPC cost model.
51 //
52 //===----------------------------------------------------------------------===//
53 
55 PPCTTIImpl::getPopcntSupport(unsigned TyWidth) {
56  assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
57  if (ST->hasPOPCNTD() != PPCSubtarget::POPCNTD_Unavailable && TyWidth <= 64)
58  return ST->hasPOPCNTD() == PPCSubtarget::POPCNTD_Slow ?
60  return TTI::PSK_Software;
61 }
62 
65  Intrinsic::ID IID = II.getIntrinsicID();
66  switch (IID) {
67  default:
68  break;
69  case Intrinsic::ppc_altivec_lvx:
70  case Intrinsic::ppc_altivec_lvxl:
71  // Turn PPC lvx -> load if the pointer is known aligned.
73  II.getArgOperand(0), Align(16), IC.getDataLayout(), &II,
74  &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) {
77  return new LoadInst(II.getType(), Ptr, "", false, Align(16));
78  }
79  break;
80  case Intrinsic::ppc_vsx_lxvw4x:
81  case Intrinsic::ppc_vsx_lxvd2x: {
82  // Turn PPC VSX loads into normal loads.
85  return new LoadInst(II.getType(), Ptr, Twine(""), false, Align(1));
86  }
87  case Intrinsic::ppc_altivec_stvx:
88  case Intrinsic::ppc_altivec_stvxl:
89  // Turn stvx -> store if the pointer is known aligned.
91  II.getArgOperand(1), Align(16), IC.getDataLayout(), &II,
92  &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) {
93  Type *OpPtrTy = PointerType::getUnqual(II.getArgOperand(0)->getType());
94  Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(1), OpPtrTy);
95  return new StoreInst(II.getArgOperand(0), Ptr, false, Align(16));
96  }
97  break;
98  case Intrinsic::ppc_vsx_stxvw4x:
99  case Intrinsic::ppc_vsx_stxvd2x: {
100  // Turn PPC VSX stores into normal stores.
101  Type *OpPtrTy = PointerType::getUnqual(II.getArgOperand(0)->getType());
102  Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(1), OpPtrTy);
103  return new StoreInst(II.getArgOperand(0), Ptr, false, Align(1));
104  }
105  case Intrinsic::ppc_altivec_vperm:
106  // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
107  // Note that ppc_altivec_vperm has a big-endian bias, so when creating
108  // a vectorshuffle for little endian, we must undo the transformation
109  // performed on vec_perm in altivec.h. That is, we must complement
110  // the permutation mask with respect to 31 and reverse the order of
111  // V1 and V2.
112  if (Constant *Mask = dyn_cast<Constant>(II.getArgOperand(2))) {
113  assert(cast<FixedVectorType>(Mask->getType())->getNumElements() == 16 &&
114  "Bad type for intrinsic!");
115 
116  // Check that all of the elements are integer constants or undefs.
117  bool AllEltsOk = true;
118  for (unsigned i = 0; i != 16; ++i) {
119  Constant *Elt = Mask->getAggregateElement(i);
120  if (!Elt || !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) {
121  AllEltsOk = false;
122  break;
123  }
124  }
125 
126  if (AllEltsOk) {
127  // Cast the input vectors to byte vectors.
128  Value *Op0 =
129  IC.Builder.CreateBitCast(II.getArgOperand(0), Mask->getType());
130  Value *Op1 =
131  IC.Builder.CreateBitCast(II.getArgOperand(1), Mask->getType());
132  Value *Result = UndefValue::get(Op0->getType());
133 
134  // Only extract each element once.
135  Value *ExtractedElts[32];
136  memset(ExtractedElts, 0, sizeof(ExtractedElts));
137 
138  for (unsigned i = 0; i != 16; ++i) {
139  if (isa<UndefValue>(Mask->getAggregateElement(i)))
140  continue;
141  unsigned Idx =
142  cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
143  Idx &= 31; // Match the hardware behavior.
144  if (DL.isLittleEndian())
145  Idx = 31 - Idx;
146 
147  if (!ExtractedElts[Idx]) {
148  Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0;
149  Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1;
150  ExtractedElts[Idx] = IC.Builder.CreateExtractElement(
151  Idx < 16 ? Op0ToUse : Op1ToUse, IC.Builder.getInt32(Idx & 15));
152  }
153 
154  // Insert this value into the result vector.
155  Result = IC.Builder.CreateInsertElement(Result, ExtractedElts[Idx],
156  IC.Builder.getInt32(i));
157  }
158  return CastInst::Create(Instruction::BitCast, Result, II.getType());
159  }
160  }
161  break;
162  }
163  return None;
164 }
165 
169  return BaseT::getIntImmCost(Imm, Ty, CostKind);
170 
171  assert(Ty->isIntegerTy());
172 
173  unsigned BitSize = Ty->getPrimitiveSizeInBits();
174  if (BitSize == 0)
175  return ~0U;
176 
177  if (Imm == 0)
178  return TTI::TCC_Free;
179 
180  if (Imm.getBitWidth() <= 64) {
181  if (isInt<16>(Imm.getSExtValue()))
182  return TTI::TCC_Basic;
183 
184  if (isInt<32>(Imm.getSExtValue())) {
185  // A constant that can be materialized using lis.
186  if ((Imm.getZExtValue() & 0xFFFF) == 0)
187  return TTI::TCC_Basic;
188 
189  return 2 * TTI::TCC_Basic;
190  }
191  }
192 
193  return 4 * TTI::TCC_Basic;
194 }
195 
197  const APInt &Imm, Type *Ty,
200  return BaseT::getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
201 
202  assert(Ty->isIntegerTy());
203 
204  unsigned BitSize = Ty->getPrimitiveSizeInBits();
205  if (BitSize == 0)
206  return ~0U;
207 
208  switch (IID) {
209  default:
210  return TTI::TCC_Free;
211  case Intrinsic::sadd_with_overflow:
212  case Intrinsic::uadd_with_overflow:
213  case Intrinsic::ssub_with_overflow:
214  case Intrinsic::usub_with_overflow:
215  if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<16>(Imm.getSExtValue()))
216  return TTI::TCC_Free;
217  break;
218  case Intrinsic::experimental_stackmap:
219  if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
220  return TTI::TCC_Free;
221  break;
222  case Intrinsic::experimental_patchpoint_void:
223  case Intrinsic::experimental_patchpoint_i64:
224  if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
225  return TTI::TCC_Free;
226  break;
227  }
229 }
230 
231 InstructionCost PPCTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
232  const APInt &Imm, Type *Ty,
234  Instruction *Inst) {
236  return BaseT::getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind, Inst);
237 
238  assert(Ty->isIntegerTy());
239 
240  unsigned BitSize = Ty->getPrimitiveSizeInBits();
241  if (BitSize == 0)
242  return ~0U;
243 
244  unsigned ImmIdx = ~0U;
245  bool ShiftedFree = false, RunFree = false, UnsignedFree = false,
246  ZeroFree = false;
247  switch (Opcode) {
248  default:
249  return TTI::TCC_Free;
250  case Instruction::GetElementPtr:
251  // Always hoist the base address of a GetElementPtr. This prevents the
252  // creation of new constants for every base constant that gets constant
253  // folded with the offset.
254  if (Idx == 0)
255  return 2 * TTI::TCC_Basic;
256  return TTI::TCC_Free;
257  case Instruction::And:
258  RunFree = true; // (for the rotate-and-mask instructions)
259  [[fallthrough]];
260  case Instruction::Add:
261  case Instruction::Or:
262  case Instruction::Xor:
263  ShiftedFree = true;
264  [[fallthrough]];
265  case Instruction::Sub:
266  case Instruction::Mul:
267  case Instruction::Shl:
268  case Instruction::LShr:
269  case Instruction::AShr:
270  ImmIdx = 1;
271  break;
272  case Instruction::ICmp:
273  UnsignedFree = true;
274  ImmIdx = 1;
275  // Zero comparisons can use record-form instructions.
276  [[fallthrough]];
277  case Instruction::Select:
278  ZeroFree = true;
279  break;
280  case Instruction::PHI:
281  case Instruction::Call:
282  case Instruction::Ret:
283  case Instruction::Load:
284  case Instruction::Store:
285  break;
286  }
287 
288  if (ZeroFree && Imm == 0)
289  return TTI::TCC_Free;
290 
291  if (Idx == ImmIdx && Imm.getBitWidth() <= 64) {
292  if (isInt<16>(Imm.getSExtValue()))
293  return TTI::TCC_Free;
294 
295  if (RunFree) {
296  if (Imm.getBitWidth() <= 32 &&
297  (isShiftedMask_32(Imm.getZExtValue()) ||
298  isShiftedMask_32(~Imm.getZExtValue())))
299  return TTI::TCC_Free;
300 
301  if (ST->isPPC64() &&
302  (isShiftedMask_64(Imm.getZExtValue()) ||
303  isShiftedMask_64(~Imm.getZExtValue())))
304  return TTI::TCC_Free;
305  }
306 
307  if (UnsignedFree && isUInt<16>(Imm.getZExtValue()))
308  return TTI::TCC_Free;
309 
310  if (ShiftedFree && (Imm.getZExtValue() & 0xFFFF) == 0)
311  return TTI::TCC_Free;
312  }
313 
315 }
316 
317 // Check if the current Type is an MMA vector type. Valid MMA types are
318 // v256i1 and v512i1 respectively.
319 static bool isMMAType(Type *Ty) {
320  return Ty->isVectorTy() && (Ty->getScalarSizeInBits() == 1) &&
321  (Ty->getPrimitiveSizeInBits() > 128);
322 }
323 
327  // We already implement getCastInstrCost and getMemoryOpCost where we perform
328  // the vector adjustment there.
329  if (isa<CastInst>(U) || isa<LoadInst>(U) || isa<StoreInst>(U))
331 
332  if (U->getType()->isVectorTy()) {
333  // Instructions that need to be split should cost more.
334  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(U->getType());
335  return LT.first * BaseT::getInstructionCost(U, Operands, CostKind);
336  }
337 
339 }
340 
341 // Determining the address of a TLS variable results in a function call in
342 // certain TLS models.
343 static bool memAddrUsesCTR(const Value *MemAddr, const PPCTargetMachine &TM,
345  // No need to traverse again if we already checked this operand.
346  if (!Visited.insert(MemAddr).second)
347  return false;
348  const auto *GV = dyn_cast<GlobalValue>(MemAddr);
349  if (!GV) {
350  // Recurse to check for constants that refer to TLS global variables.
351  if (const auto *CV = dyn_cast<Constant>(MemAddr))
352  for (const auto &CO : CV->operands())
353  if (memAddrUsesCTR(CO, TM, Visited))
354  return true;
355  return false;
356  }
357 
358  if (!GV->isThreadLocal())
359  return false;
360  TLSModel::Model Model = TM.getTLSModel(GV);
362 }
363 
364 bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
366  const PPCTargetMachine &TM = ST->getTargetMachine();
367 
368  // Loop through the inline asm constraints and look for something that
369  // clobbers ctr.
370  auto asmClobbersCTR = [](InlineAsm *IA) {
371  InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints();
372  for (const InlineAsm::ConstraintInfo &C : CIV) {
373  if (C.Type != InlineAsm::isInput)
374  for (const auto &Code : C.Codes)
375  if (StringRef(Code).equals_insensitive("{ctr}"))
376  return true;
377  }
378  return false;
379  };
380 
381  auto isLargeIntegerTy = [](bool Is32Bit, Type *Ty) {
382  if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
383  return ITy->getBitWidth() > (Is32Bit ? 32U : 64U);
384 
385  return false;
386  };
387 
388  auto supportedHalfPrecisionOp = [](Instruction *Inst) {
389  switch (Inst->getOpcode()) {
390  default:
391  return false;
392  case Instruction::FPTrunc:
393  case Instruction::FPExt:
394  case Instruction::Load:
395  case Instruction::Store:
396  case Instruction::FPToUI:
397  case Instruction::UIToFP:
398  case Instruction::FPToSI:
399  case Instruction::SIToFP:
400  return true;
401  }
402  };
403 
404  for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
405  J != JE; ++J) {
406  // There are no direct operations on half precision so assume that
407  // anything with that type requires a call except for a few select
408  // operations with Power9.
409  if (Instruction *CurrInst = dyn_cast<Instruction>(J)) {
410  for (const auto &Op : CurrInst->operands()) {
411  if (Op->getType()->getScalarType()->isHalfTy() ||
412  CurrInst->getType()->getScalarType()->isHalfTy())
413  return !(ST->isISA3_0() && supportedHalfPrecisionOp(CurrInst));
414  }
415  }
416  if (CallInst *CI = dyn_cast<CallInst>(J)) {
417  // Inline ASM is okay, unless it clobbers the ctr register.
418  if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledOperand())) {
419  if (asmClobbersCTR(IA))
420  return true;
421  continue;
422  }
423 
424  if (Function *F = CI->getCalledFunction()) {
425  // Most intrinsics don't become function calls, but some might.
426  // sin, cos, exp and log are always calls.
427  unsigned Opcode = 0;
428  if (F->getIntrinsicID() != Intrinsic::not_intrinsic) {
429  switch (F->getIntrinsicID()) {
430  default: continue;
431  // If we have a call to loop_decrement or set_loop_iterations,
432  // we're definitely using CTR.
433  case Intrinsic::set_loop_iterations:
434  case Intrinsic::loop_decrement:
435  return true;
436 
437  // Binary operations on 128-bit value will use CTR.
438  case Intrinsic::experimental_constrained_fadd:
439  case Intrinsic::experimental_constrained_fsub:
440  case Intrinsic::experimental_constrained_fmul:
441  case Intrinsic::experimental_constrained_fdiv:
442  case Intrinsic::experimental_constrained_frem:
443  if (F->getType()->getScalarType()->isFP128Ty() ||
444  F->getType()->getScalarType()->isPPC_FP128Ty())
445  return true;
446  break;
447 
448  case Intrinsic::experimental_constrained_fptosi:
449  case Intrinsic::experimental_constrained_fptoui:
450  case Intrinsic::experimental_constrained_sitofp:
451  case Intrinsic::experimental_constrained_uitofp: {
452  Type *SrcType = CI->getArgOperand(0)->getType()->getScalarType();
453  Type *DstType = CI->getType()->getScalarType();
454  if (SrcType->isPPC_FP128Ty() || DstType->isPPC_FP128Ty() ||
455  isLargeIntegerTy(!TM.isPPC64(), SrcType) ||
456  isLargeIntegerTy(!TM.isPPC64(), DstType))
457  return true;
458  break;
459  }
460 
461  // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp
462  // because, although it does clobber the counter register, the
463  // control can't then return to inside the loop unless there is also
464  // an eh_sjlj_setjmp.
465  case Intrinsic::eh_sjlj_setjmp:
466 
467  case Intrinsic::memcpy:
468  case Intrinsic::memmove:
469  case Intrinsic::memset:
470  case Intrinsic::powi:
471  case Intrinsic::log:
472  case Intrinsic::log2:
473  case Intrinsic::log10:
474  case Intrinsic::exp:
475  case Intrinsic::exp2:
476  case Intrinsic::pow:
477  case Intrinsic::sin:
478  case Intrinsic::cos:
479  case Intrinsic::experimental_constrained_powi:
480  case Intrinsic::experimental_constrained_log:
481  case Intrinsic::experimental_constrained_log2:
482  case Intrinsic::experimental_constrained_log10:
483  case Intrinsic::experimental_constrained_exp:
484  case Intrinsic::experimental_constrained_exp2:
485  case Intrinsic::experimental_constrained_pow:
486  case Intrinsic::experimental_constrained_sin:
487  case Intrinsic::experimental_constrained_cos:
488  return true;
489  case Intrinsic::copysign:
490  if (CI->getArgOperand(0)->getType()->getScalarType()->
491  isPPC_FP128Ty())
492  return true;
493  else
494  continue; // ISD::FCOPYSIGN is never a library call.
495  case Intrinsic::fmuladd:
496  case Intrinsic::fma: Opcode = ISD::FMA; break;
497  case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
498  case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
499  case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
500  case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
501  case Intrinsic::rint: Opcode = ISD::FRINT; break;
502  case Intrinsic::lrint: Opcode = ISD::LRINT; break;
503  case Intrinsic::llrint: Opcode = ISD::LLRINT; break;
504  case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
505  case Intrinsic::round: Opcode = ISD::FROUND; break;
506  case Intrinsic::lround: Opcode = ISD::LROUND; break;
507  case Intrinsic::llround: Opcode = ISD::LLROUND; break;
508  case Intrinsic::minnum: Opcode = ISD::FMINNUM; break;
509  case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break;
510  case Intrinsic::experimental_constrained_fcmp:
511  Opcode = ISD::STRICT_FSETCC;
512  break;
513  case Intrinsic::experimental_constrained_fcmps:
514  Opcode = ISD::STRICT_FSETCCS;
515  break;
516  case Intrinsic::experimental_constrained_fma:
517  Opcode = ISD::STRICT_FMA;
518  break;
519  case Intrinsic::experimental_constrained_sqrt:
520  Opcode = ISD::STRICT_FSQRT;
521  break;
522  case Intrinsic::experimental_constrained_floor:
523  Opcode = ISD::STRICT_FFLOOR;
524  break;
525  case Intrinsic::experimental_constrained_ceil:
526  Opcode = ISD::STRICT_FCEIL;
527  break;
528  case Intrinsic::experimental_constrained_trunc:
529  Opcode = ISD::STRICT_FTRUNC;
530  break;
531  case Intrinsic::experimental_constrained_rint:
532  Opcode = ISD::STRICT_FRINT;
533  break;
534  case Intrinsic::experimental_constrained_lrint:
535  Opcode = ISD::STRICT_LRINT;
536  break;
537  case Intrinsic::experimental_constrained_llrint:
538  Opcode = ISD::STRICT_LLRINT;
539  break;
540  case Intrinsic::experimental_constrained_nearbyint:
541  Opcode = ISD::STRICT_FNEARBYINT;
542  break;
543  case Intrinsic::experimental_constrained_round:
544  Opcode = ISD::STRICT_FROUND;
545  break;
546  case Intrinsic::experimental_constrained_lround:
547  Opcode = ISD::STRICT_LROUND;
548  break;
549  case Intrinsic::experimental_constrained_llround:
550  Opcode = ISD::STRICT_LLROUND;
551  break;
552  case Intrinsic::experimental_constrained_minnum:
553  Opcode = ISD::STRICT_FMINNUM;
554  break;
555  case Intrinsic::experimental_constrained_maxnum:
556  Opcode = ISD::STRICT_FMAXNUM;
557  break;
558  case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO; break;
559  case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO; break;
560  }
561  }
562 
563  // PowerPC does not use [US]DIVREM or other library calls for
564  // operations on regular types which are not otherwise library calls
565  // (i.e. soft float or atomics). If adapting for targets that do,
566  // additional care is required here.
567 
568  LibFunc Func;
569  if (!F->hasLocalLinkage() && F->hasName() && LibInfo &&
570  LibInfo->getLibFunc(F->getName(), Func) &&
571  LibInfo->hasOptimizedCodeGen(Func)) {
572  // Non-read-only functions are never treated as intrinsics.
573  if (!CI->onlyReadsMemory())
574  return true;
575 
576  // Conversion happens only for FP calls.
577  if (!CI->getArgOperand(0)->getType()->isFloatingPointTy())
578  return true;
579 
580  switch (Func) {
581  default: return true;
582  case LibFunc_copysign:
583  case LibFunc_copysignf:
584  continue; // ISD::FCOPYSIGN is never a library call.
585  case LibFunc_copysignl:
586  return true;
587  case LibFunc_fabs:
588  case LibFunc_fabsf:
589  case LibFunc_fabsl:
590  continue; // ISD::FABS is never a library call.
591  case LibFunc_sqrt:
592  case LibFunc_sqrtf:
593  case LibFunc_sqrtl:
594  Opcode = ISD::FSQRT; break;
595  case LibFunc_floor:
596  case LibFunc_floorf:
597  case LibFunc_floorl:
598  Opcode = ISD::FFLOOR; break;
599  case LibFunc_nearbyint:
600  case LibFunc_nearbyintf:
601  case LibFunc_nearbyintl:
602  Opcode = ISD::FNEARBYINT; break;
603  case LibFunc_ceil:
604  case LibFunc_ceilf:
605  case LibFunc_ceill:
606  Opcode = ISD::FCEIL; break;
607  case LibFunc_rint:
608  case LibFunc_rintf:
609  case LibFunc_rintl:
610  Opcode = ISD::FRINT; break;
611  case LibFunc_round:
612  case LibFunc_roundf:
613  case LibFunc_roundl:
614  Opcode = ISD::FROUND; break;
615  case LibFunc_trunc:
616  case LibFunc_truncf:
617  case LibFunc_truncl:
618  Opcode = ISD::FTRUNC; break;
619  case LibFunc_fmin:
620  case LibFunc_fminf:
621  case LibFunc_fminl:
622  Opcode = ISD::FMINNUM; break;
623  case LibFunc_fmax:
624  case LibFunc_fmaxf:
625  case LibFunc_fmaxl:
626  Opcode = ISD::FMAXNUM; break;
627  }
628  }
629 
630  if (Opcode) {
631  EVT EVTy =
632  TLI->getValueType(DL, CI->getArgOperand(0)->getType(), true);
633 
634  if (EVTy == MVT::Other)
635  return true;
636 
637  if (TLI->isOperationLegalOrCustom(Opcode, EVTy))
638  continue;
639  else if (EVTy.isVector() &&
640  TLI->isOperationLegalOrCustom(Opcode, EVTy.getScalarType()))
641  continue;
642 
643  return true;
644  }
645  }
646 
647  return true;
648  } else if ((J->getType()->getScalarType()->isFP128Ty() ||
649  J->getType()->getScalarType()->isPPC_FP128Ty())) {
650  // Most operations on f128 or ppc_f128 values become calls.
651  return true;
652  } else if (isa<FCmpInst>(J) &&
653  J->getOperand(0)->getType()->getScalarType()->isFP128Ty()) {
654  return true;
655  } else if ((isa<FPTruncInst>(J) || isa<FPExtInst>(J)) &&
656  (cast<CastInst>(J)->getSrcTy()->getScalarType()->isFP128Ty() ||
657  cast<CastInst>(J)->getDestTy()->getScalarType()->isFP128Ty())) {
658  return true;
659  } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) ||
660  isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) {
661  CastInst *CI = cast<CastInst>(J);
662  if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
663  CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
664  isLargeIntegerTy(!TM.isPPC64(), CI->getSrcTy()->getScalarType()) ||
665  isLargeIntegerTy(!TM.isPPC64(), CI->getDestTy()->getScalarType()))
666  return true;
667  } else if (isLargeIntegerTy(!TM.isPPC64(),
668  J->getType()->getScalarType()) &&
669  (J->getOpcode() == Instruction::UDiv ||
670  J->getOpcode() == Instruction::SDiv ||
671  J->getOpcode() == Instruction::URem ||
672  J->getOpcode() == Instruction::SRem)) {
673  return true;
674  } else if (!TM.isPPC64() &&
675  isLargeIntegerTy(false, J->getType()->getScalarType()) &&
676  (J->getOpcode() == Instruction::Shl ||
677  J->getOpcode() == Instruction::AShr ||
678  J->getOpcode() == Instruction::LShr)) {
679  // Only on PPC32, for 128-bit integers (specifically not 64-bit
680  // integers), these might be runtime calls.
681  return true;
682  } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {
683  // On PowerPC, indirect jumps use the counter register.
684  return true;
685  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
686  if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries())
687  return true;
688  }
689 
690  // FREM is always a call.
691  if (J->getOpcode() == Instruction::FRem)
692  return true;
693 
694  if (ST->useSoftFloat()) {
695  switch(J->getOpcode()) {
696  case Instruction::FAdd:
697  case Instruction::FSub:
698  case Instruction::FMul:
699  case Instruction::FDiv:
700  case Instruction::FPTrunc:
701  case Instruction::FPExt:
702  case Instruction::FPToUI:
703  case Instruction::FPToSI:
704  case Instruction::UIToFP:
705  case Instruction::SIToFP:
706  case Instruction::FCmp:
707  return true;
708  }
709  }
710 
711  for (Value *Operand : J->operands())
712  if (memAddrUsesCTR(Operand, TM, Visited))
713  return true;
714  }
715 
716  return false;
717 }
718 
720  AssumptionCache &AC,
721  TargetLibraryInfo *LibInfo,
722  HardwareLoopInfo &HWLoopInfo) {
723  const PPCTargetMachine &TM = ST->getTargetMachine();
724  TargetSchedModel SchedModel;
725  SchedModel.init(ST);
726 
727  // Do not convert small short loops to CTR loop.
728  unsigned ConstTripCount = SE.getSmallConstantTripCount(L);
729  if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) {
731  CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
733  for (BasicBlock *BB : L->blocks())
734  Metrics.analyzeBasicBlock(BB, *this, EphValues);
735  // 6 is an approximate latency for the mtctr instruction.
736  if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth()))
737  return false;
738  }
739 
740  // We don't want to spill/restore the counter register, and so we don't
741  // want to use the counter register if the loop contains calls.
743  for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
744  I != IE; ++I)
745  if (mightUseCTR(*I, LibInfo, Visited))
746  return false;
747 
748  SmallVector<BasicBlock*, 4> ExitingBlocks;
749  L->getExitingBlocks(ExitingBlocks);
750 
751  // If there is an exit edge known to be frequently taken,
752  // we should not transform this loop.
753  for (auto &BB : ExitingBlocks) {
754  Instruction *TI = BB->getTerminator();
755  if (!TI) continue;
756 
757  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
758  uint64_t TrueWeight = 0, FalseWeight = 0;
759  if (!BI->isConditional() ||
760  !extractBranchWeights(*BI, TrueWeight, FalseWeight))
761  continue;
762 
763  // If the exit path is more frequent than the loop path,
764  // we return here without further analysis for this loop.
765  bool TrueIsExit = !L->contains(BI->getSuccessor(0));
766  if (( TrueIsExit && FalseWeight < TrueWeight) ||
767  (!TrueIsExit && FalseWeight > TrueWeight))
768  return false;
769  }
770  }
771 
772  // If an exit block has a PHI that accesses a TLS variable as one of the
773  // incoming values from the loop, we cannot produce a CTR loop because the
774  // address for that value will be computed in the loop.
775  SmallVector<BasicBlock *, 4> ExitBlocks;
776  L->getExitBlocks(ExitBlocks);
777  for (auto &BB : ExitBlocks) {
778  for (auto &PHI : BB->phis()) {
779  for (int Idx = 0, EndIdx = PHI.getNumIncomingValues(); Idx < EndIdx;
780  Idx++) {
781  const BasicBlock *IncomingBB = PHI.getIncomingBlock(Idx);
782  const Value *IncomingValue = PHI.getIncomingValue(Idx);
783  if (L->contains(IncomingBB) &&
784  memAddrUsesCTR(IncomingValue, TM, Visited))
785  return false;
786  }
787  }
788  }
789 
790  LLVMContext &C = L->getHeader()->getContext();
791  HWLoopInfo.CountType = TM.isPPC64() ?
793  HWLoopInfo.LoopDecrement = ConstantInt::get(HWLoopInfo.CountType, 1);
794  return true;
795 }
796 
800  if (ST->getCPUDirective() == PPC::DIR_A2) {
801  // The A2 is in-order with a deep pipeline, and concatenation unrolling
802  // helps expose latency-hiding opportunities to the instruction scheduler.
803  UP.Partial = UP.Runtime = true;
804 
805  // We unroll a lot on the A2 (hundreds of instructions), and the benefits
806  // often outweigh the cost of a division to compute the trip count.
807  UP.AllowExpensiveTripCount = true;
808  }
809 
810  BaseT::getUnrollingPreferences(L, SE, UP, ORE);
811 }
812 
815  BaseT::getPeelingPreferences(L, SE, PP);
816 }
817 // This function returns true to allow using coldcc calling convention.
818 // Returning true results in coldcc being used for functions which are cold at
819 // all call sites when the callers of the functions are not calling any other
820 // non coldcc functions.
822  return EnablePPCColdCC;
823 }
824 
825 bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {
826  // On the A2, always unroll aggressively.
827  if (ST->getCPUDirective() == PPC::DIR_A2)
828  return true;
829 
830  return LoopHasReductions;
831 }
832 
834 PPCTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
836  Options.LoadSizes = {8, 4, 2, 1};
837  Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
838  return Options;
839 }
840 
842  return true;
843 }
844 
845 unsigned PPCTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
846  assert(ClassID == GPRRC || ClassID == FPRRC ||
847  ClassID == VRRC || ClassID == VSXRC);
848  if (ST->hasVSX()) {
849  assert(ClassID == GPRRC || ClassID == VSXRC || ClassID == VRRC);
850  return ClassID == VSXRC ? 64 : 32;
851  }
852  assert(ClassID == GPRRC || ClassID == FPRRC || ClassID == VRRC);
853  return 32;
854 }
855 
856 unsigned PPCTTIImpl::getRegisterClassForType(bool Vector, Type *Ty) const {
857  if (Vector)
858  return ST->hasVSX() ? VSXRC : VRRC;
859  else if (Ty && (Ty->getScalarType()->isFloatTy() ||
860  Ty->getScalarType()->isDoubleTy()))
861  return ST->hasVSX() ? VSXRC : FPRRC;
862  else if (Ty && (Ty->getScalarType()->isFP128Ty() ||
863  Ty->getScalarType()->isPPC_FP128Ty()))
864  return VRRC;
865  else if (Ty && Ty->getScalarType()->isHalfTy())
866  return VSXRC;
867  else
868  return GPRRC;
869 }
870 
871 const char* PPCTTIImpl::getRegisterClassName(unsigned ClassID) const {
872 
873  switch (ClassID) {
874  default:
875  llvm_unreachable("unknown register class");
876  return "PPC::unknown register class";
877  case GPRRC: return "PPC::GPRRC";
878  case FPRRC: return "PPC::FPRRC";
879  case VRRC: return "PPC::VRRC";
880  case VSXRC: return "PPC::VSXRC";
881  }
882 }
883 
884 TypeSize
886  switch (K) {
888  return TypeSize::getFixed(ST->isPPC64() ? 64 : 32);
890  return TypeSize::getFixed(ST->hasAltivec() ? 128 : 0);
892  return TypeSize::getScalable(0);
893  }
894 
895  llvm_unreachable("Unsupported register kind");
896 }
897 
899  // Starting with P7 we have a cache line size of 128.
900  unsigned Directive = ST->getCPUDirective();
901  // Assume that Future CPU has the same cache line size as the others.
905  return 128;
906 
907  // On other processors return a default of 64 bytes.
908  return 64;
909 }
910 
912  return 300;
913 }
914 
915 unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) {
916  unsigned Directive = ST->getCPUDirective();
917  // The 440 has no SIMD support, but floating-point instructions
918  // have a 5-cycle latency, so unroll by 5x for latency hiding.
919  if (Directive == PPC::DIR_440)
920  return 5;
921 
922  // The A2 has no SIMD support, but floating-point instructions
923  // have a 6-cycle latency, so unroll by 6x for latency hiding.
924  if (Directive == PPC::DIR_A2)
925  return 6;
926 
927  // FIXME: For lack of any better information, do no harm...
929  return 1;
930 
931  // For P7 and P8, floating-point instructions have a 6-cycle latency and
932  // there are two execution units, so unroll by 12x for latency hiding.
933  // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready
934  // FIXME: the same for P10 as previous gen until POWER10 scheduling is ready
935  // Assume that future is the same as the others.
939  return 12;
940 
941  // For most things, modern systems have two execution units (and
942  // out-of-order execution).
943  return 2;
944 }
945 
946 // Returns a cost adjustment factor to adjust the cost of vector instructions
947 // on targets which there is overlap between the vector and scalar units,
948 // thereby reducing the overall throughput of vector code wrt. scalar code.
949 // An invalid instruction cost is returned if the type is an MMA vector type.
951  Type *Ty1, Type *Ty2) {
952  // If the vector type is of an MMA type (v256i1, v512i1), an invalid
953  // instruction cost is returned. This is to signify to other cost computing
954  // functions to return the maximum instruction cost in order to prevent any
955  // opportunities for the optimizer to produce MMA types within the IR.
956  if (isMMAType(Ty1))
958 
959  if (!ST->vectorsUseTwoUnits() || !Ty1->isVectorTy())
960  return InstructionCost(1);
961 
962  std::pair<InstructionCost, MVT> LT1 = getTypeLegalizationCost(Ty1);
963  // If type legalization involves splitting the vector, we don't want to
964  // double the cost at every step - only the last step.
965  if (LT1.first != 1 || !LT1.second.isVector())
966  return InstructionCost(1);
967 
968  int ISD = TLI->InstructionOpcodeToISD(Opcode);
969  if (TLI->isOperationExpand(ISD, LT1.second))
970  return InstructionCost(1);
971 
972  if (Ty2) {
973  std::pair<InstructionCost, MVT> LT2 = getTypeLegalizationCost(Ty2);
974  if (LT2.first != 1 || !LT2.second.isVector())
975  return InstructionCost(1);
976  }
977 
978  return InstructionCost(2);
979 }
980 
982  unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
985  const Instruction *CxtI) {
986  assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
987 
988  InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Ty, nullptr);
989  if (!CostFactor.isValid())
990  return InstructionCost::getMax();
991 
992  // TODO: Handle more cost kinds.
994  return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
995  Op2Info, Args, CxtI);
996 
997  // Fallback to the default implementation.
999  Opcode, Ty, CostKind, Op1Info, Op2Info);
1000  return Cost * CostFactor;
1001 }
1002 
1006  int Index, Type *SubTp,
1008 
1009  InstructionCost CostFactor =
1010  vectorCostAdjustmentFactor(Instruction::ShuffleVector, Tp, nullptr);
1011  if (!CostFactor.isValid())
1012  return InstructionCost::getMax();
1013 
1014  // Legalize the type.
1015  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
1016 
1017  // PPC, for both Altivec/VSX, support cheap arbitrary permutations
1018  // (at least in the sense that there need only be one non-loop-invariant
1019  // instruction). We need one such shuffle instruction for each actual
1020  // register (this is not true for arbitrary shuffles, but is true for the
1021  // structured types of shuffles covered by TTI::ShuffleKind).
1022  return LT.first * CostFactor;
1023 }
1024 
1027  const Instruction *I) {
1029  return Opcode == Instruction::PHI ? 0 : 1;
1030  // Branches are assumed to be predicted.
1031  return 0;
1032 }
1033 
1035  Type *Src,
1038  const Instruction *I) {
1039  assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
1040 
1041  InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Dst, Src);
1042  if (!CostFactor.isValid())
1043  return InstructionCost::getMax();
1044 
1046  BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
1047  Cost *= CostFactor;
1048  // TODO: Allow non-throughput costs that aren't binary.
1050  return Cost == 0 ? 0 : 1;
1051  return Cost;
1052 }
1053 
1055  Type *CondTy,
1056  CmpInst::Predicate VecPred,
1058  const Instruction *I) {
1059  InstructionCost CostFactor =
1060  vectorCostAdjustmentFactor(Opcode, ValTy, nullptr);
1061  if (!CostFactor.isValid())
1062  return InstructionCost::getMax();
1063 
1065  BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
1066  // TODO: Handle other cost kinds.
1068  return Cost;
1069  return Cost * CostFactor;
1070 }
1071 
1073  unsigned Index) {
1074  assert(Val->isVectorTy() && "This must be a vector type");
1075 
1076  int ISD = TLI->InstructionOpcodeToISD(Opcode);
1077  assert(ISD && "Invalid opcode");
1078 
1079  InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Val, nullptr);
1080  if (!CostFactor.isValid())
1081  return InstructionCost::getMax();
1082 
1084  Cost *= CostFactor;
1085 
1086  if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) {
1087  // Double-precision scalars are already located in index #0 (or #1 if LE).
1088  if (ISD == ISD::EXTRACT_VECTOR_ELT &&
1089  Index == (ST->isLittleEndian() ? 1 : 0))
1090  return 0;
1091 
1092  return Cost;
1093 
1094  } else if (Val->getScalarType()->isIntegerTy() && Index != -1U) {
1095  if (ST->hasP9Altivec()) {
1096  if (ISD == ISD::INSERT_VECTOR_ELT)
1097  // A move-to VSR and a permute/insert. Assume vector operation cost
1098  // for both (cost will be 2x on P9).
1099  return 2 * CostFactor;
1100 
1101  // It's an extract. Maybe we can do a cheap move-from VSR.
1102  unsigned EltSize = Val->getScalarSizeInBits();
1103  if (EltSize == 64) {
1104  unsigned MfvsrdIndex = ST->isLittleEndian() ? 1 : 0;
1105  if (Index == MfvsrdIndex)
1106  return 1;
1107  } else if (EltSize == 32) {
1108  unsigned MfvsrwzIndex = ST->isLittleEndian() ? 2 : 1;
1109  if (Index == MfvsrwzIndex)
1110  return 1;
1111  }
1112 
1113  // We need a vector extract (or mfvsrld). Assume vector operation cost.
1114  // The cost of the load constant for a vector extract is disregarded
1115  // (invariant, easily schedulable).
1116  return CostFactor;
1117 
1118  } else if (ST->hasDirectMove())
1119  // Assume permute has standard cost.
1120  // Assume move-to/move-from VSR have 2x standard cost.
1121  return 3;
1122  }
1123 
1124  // Estimated cost of a load-hit-store delay. This was obtained
1125  // experimentally as a minimum needed to prevent unprofitable
1126  // vectorization for the paq8p benchmark. It may need to be
1127  // raised further if other unprofitable cases remain.
1128  unsigned LHSPenalty = 2;
1129  if (ISD == ISD::INSERT_VECTOR_ELT)
1130  LHSPenalty += 7;
1131 
1132  // Vector element insert/extract with Altivec is very expensive,
1133  // because they require store and reload with the attendant
1134  // processor stall for load-hit-store. Until VSX is available,
1135  // these need to be estimated as very costly.
1136  if (ISD == ISD::EXTRACT_VECTOR_ELT ||
1137  ISD == ISD::INSERT_VECTOR_ELT)
1138  return LHSPenalty + Cost;
1139 
1140  return Cost;
1141 }
1142 
1144  MaybeAlign Alignment,
1145  unsigned AddressSpace,
1147  TTI::OperandValueInfo OpInfo,
1148  const Instruction *I) {
1149 
1150  InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Src, nullptr);
1151  if (!CostFactor.isValid())
1152  return InstructionCost::getMax();
1153 
1154  if (TLI->getValueType(DL, Src, true) == MVT::Other)
1155  return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
1156  CostKind);
1157  // Legalize the type.
1158  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Src);
1159  assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
1160  "Invalid Opcode");
1161 
1163  BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind);
1164  // TODO: Handle other cost kinds.
1166  return Cost;
1167 
1168  Cost *= CostFactor;
1169 
1170  bool IsAltivecType = ST->hasAltivec() &&
1171  (LT.second == MVT::v16i8 || LT.second == MVT::v8i16 ||
1172  LT.second == MVT::v4i32 || LT.second == MVT::v4f32);
1173  bool IsVSXType = ST->hasVSX() &&
1174  (LT.second == MVT::v2f64 || LT.second == MVT::v2i64);
1175 
1176  // VSX has 32b/64b load instructions. Legalization can handle loading of
1177  // 32b/64b to VSR correctly and cheaply. But BaseT::getMemoryOpCost and
1178  // PPCTargetLowering can't compute the cost appropriately. So here we
1179  // explicitly check this case.
1180  unsigned MemBytes = Src->getPrimitiveSizeInBits();
1181  if (Opcode == Instruction::Load && ST->hasVSX() && IsAltivecType &&
1182  (MemBytes == 64 || (ST->hasP8Vector() && MemBytes == 32)))
1183  return 1;
1184 
1185  // Aligned loads and stores are easy.
1186  unsigned SrcBytes = LT.second.getStoreSize();
1187  if (!SrcBytes || !Alignment || *Alignment >= SrcBytes)
1188  return Cost;
1189 
1190  // If we can use the permutation-based load sequence, then this is also
1191  // relatively cheap (not counting loop-invariant instructions): one load plus
1192  // one permute (the last load in a series has extra cost, but we're
1193  // neglecting that here). Note that on the P7, we could do unaligned loads
1194  // for Altivec types using the VSX instructions, but that's more expensive
1195  // than using the permutation-based load sequence. On the P8, that's no
1196  // longer true.
1197  if (Opcode == Instruction::Load && (!ST->hasP8Vector() && IsAltivecType) &&
1198  *Alignment >= LT.second.getScalarType().getStoreSize())
1199  return Cost + LT.first; // Add the cost of the permutations.
1200 
1201  // For VSX, we can do unaligned loads and stores on Altivec/VSX types. On the
1202  // P7, unaligned vector loads are more expensive than the permutation-based
1203  // load sequence, so that might be used instead, but regardless, the net cost
1204  // is about the same (not counting loop-invariant instructions).
1205  if (IsVSXType || (ST->hasVSX() && IsAltivecType))
1206  return Cost;
1207 
1208  // Newer PPC supports unaligned memory access.
1209  if (TLI->allowsMisalignedMemoryAccesses(LT.second, 0))
1210  return Cost;
1211 
1212  // PPC in general does not support unaligned loads and stores. They'll need
1213  // to be decomposed based on the alignment factor.
1214 
1215  // Add the cost of each scalar load or store.
1216  assert(Alignment);
1217  Cost += LT.first * ((SrcBytes / Alignment->value()) - 1);
1218 
1219  // For a vector type, there is also scalarization overhead (only for
1220  // stores, loads are expanded using the vector-load + permutation sequence,
1221  // which is much less expensive).
1222  if (Src->isVectorTy() && Opcode == Instruction::Store)
1223  for (int i = 0, e = cast<FixedVectorType>(Src)->getNumElements(); i < e;
1224  ++i)
1225  Cost += getVectorInstrCost(Instruction::ExtractElement, Src, i);
1226 
1227  return Cost;
1228 }
1229 
1231  unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1232  Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
1233  bool UseMaskForCond, bool UseMaskForGaps) {
1234  InstructionCost CostFactor =
1235  vectorCostAdjustmentFactor(Opcode, VecTy, nullptr);
1236  if (!CostFactor.isValid())
1237  return InstructionCost::getMax();
1238 
1239  if (UseMaskForCond || UseMaskForGaps)
1240  return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
1241  Alignment, AddressSpace, CostKind,
1242  UseMaskForCond, UseMaskForGaps);
1243 
1244  assert(isa<VectorType>(VecTy) &&
1245  "Expect a vector type for interleaved memory op");
1246 
1247  // Legalize the type.
1248  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(VecTy);
1249 
1250  // Firstly, the cost of load/store operation.
1251  InstructionCost Cost = getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment),
1253 
1254  // PPC, for both Altivec/VSX, support cheap arbitrary permutations
1255  // (at least in the sense that there need only be one non-loop-invariant
1256  // instruction). For each result vector, we need one shuffle per incoming
1257  // vector (except that the first shuffle can take two incoming vectors
1258  // because it does not need to take itself).
1259  Cost += Factor*(LT.first-1);
1260 
1261  return Cost;
1262 }
1263 
1268 }
1269 
1271  const Function *Callee,
1272  const ArrayRef<Type *> &Types) const {
1273 
1274  // We need to ensure that argument promotion does not
1275  // attempt to promote pointers to MMA types (__vector_pair
1276  // and __vector_quad) since these types explicitly cannot be
1277  // passed as arguments. Both of these types are larger than
1278  // the 128-bit Altivec vectors and have a scalar size of 1 bit.
1279  if (!BaseT::areTypesABICompatible(Caller, Callee, Types))
1280  return false;
1281 
1282  return llvm::none_of(Types, [](Type *Ty) {
1283  if (Ty->isSized())
1284  return Ty->isIntOrIntVectorTy(1) && Ty->getPrimitiveSizeInBits() > 128;
1285  return false;
1286  });
1287 }
1288 
1290  LoopInfo *LI, DominatorTree *DT,
1291  AssumptionCache *AC, TargetLibraryInfo *LibInfo) {
1292  // Process nested loops first.
1293  for (Loop *I : *L)
1294  if (canSaveCmp(I, BI, SE, LI, DT, AC, LibInfo))
1295  return false; // Stop search.
1296 
1297  HardwareLoopInfo HWLoopInfo(L);
1298 
1299  if (!HWLoopInfo.canAnalyze(*LI))
1300  return false;
1301 
1302  if (!isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo))
1303  return false;
1304 
1305  if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT))
1306  return false;
1307 
1308  *BI = HWLoopInfo.ExitBranch;
1309  return true;
1310 }
1311 
1313  const TargetTransformInfo::LSRCost &C2) {
1314  // PowerPC default behaviour here is "instruction number 1st priority".
1315  // If LsrNoInsnsCost is set, call default implementation.
1316  if (!LsrNoInsnsCost)
1317  return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost, C1.NumIVMuls,
1318  C1.NumBaseAdds, C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
1319  std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost, C2.NumIVMuls,
1320  C2.NumBaseAdds, C2.ScaleCost, C2.ImmCost, C2.SetupCost);
1321  else
1323 }
1324 
1326  return false;
1327 }
1328 
1330  const PPCTargetMachine &TM = ST->getTargetMachine();
1331  // XCOFF hasn't implemented lowerRelativeReference, disable non-ELF for now.
1332  if (!TM.isELFv2ABI())
1333  return false;
1335 }
1336 
1339  switch (Inst->getIntrinsicID()) {
1340  case Intrinsic::ppc_altivec_lvx:
1341  case Intrinsic::ppc_altivec_lvxl:
1342  case Intrinsic::ppc_altivec_lvebx:
1343  case Intrinsic::ppc_altivec_lvehx:
1344  case Intrinsic::ppc_altivec_lvewx:
1345  case Intrinsic::ppc_vsx_lxvd2x:
1346  case Intrinsic::ppc_vsx_lxvw4x:
1347  case Intrinsic::ppc_vsx_lxvd2x_be:
1348  case Intrinsic::ppc_vsx_lxvw4x_be:
1349  case Intrinsic::ppc_vsx_lxvl:
1350  case Intrinsic::ppc_vsx_lxvll:
1351  case Intrinsic::ppc_vsx_lxvp: {
1352  Info.PtrVal = Inst->getArgOperand(0);
1353  Info.ReadMem = true;
1354  Info.WriteMem = false;
1355  return true;
1356  }
1357  case Intrinsic::ppc_altivec_stvx:
1358  case Intrinsic::ppc_altivec_stvxl:
1359  case Intrinsic::ppc_altivec_stvebx:
1360  case Intrinsic::ppc_altivec_stvehx:
1361  case Intrinsic::ppc_altivec_stvewx:
1362  case Intrinsic::ppc_vsx_stxvd2x:
1363  case Intrinsic::ppc_vsx_stxvw4x:
1364  case Intrinsic::ppc_vsx_stxvd2x_be:
1365  case Intrinsic::ppc_vsx_stxvw4x_be:
1366  case Intrinsic::ppc_vsx_stxvl:
1367  case Intrinsic::ppc_vsx_stxvll:
1368  case Intrinsic::ppc_vsx_stxvp: {
1369  Info.PtrVal = Inst->getArgOperand(1);
1370  Info.ReadMem = false;
1371  Info.WriteMem = true;
1372  return true;
1373  }
1374  case Intrinsic::ppc_stbcx:
1375  case Intrinsic::ppc_sthcx:
1376  case Intrinsic::ppc_stdcx:
1377  case Intrinsic::ppc_stwcx: {
1378  Info.PtrVal = Inst->getArgOperand(0);
1379  Info.ReadMem = false;
1380  Info.WriteMem = true;
1381  return true;
1382  }
1383  default:
1384  break;
1385  }
1386 
1387  return false;
1388 }
1389 
1391  Align Alignment) const {
1392  // Only load and stores instructions can have variable vector length on Power.
1393  if (Opcode != Instruction::Load && Opcode != Instruction::Store)
1394  return false;
1395  // Loads/stores with length instructions use bits 0-7 of the GPR operand and
1396  // therefore cannot be used in 32-bit mode.
1397  if ((!ST->hasP9Vector() && !ST->hasP10Vector()) || !ST->isPPC64())
1398  return false;
1399  if (isa<FixedVectorType>(DataType)) {
1400  unsigned VecWidth = DataType->getPrimitiveSizeInBits();
1401  return VecWidth == 128;
1402  }
1403  Type *ScalarTy = DataType->getScalarType();
1404 
1405  if (ScalarTy->isPointerTy())
1406  return true;
1407 
1408  if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy())
1409  return true;
1410 
1411  if (!ScalarTy->isIntegerTy())
1412  return false;
1413 
1414  unsigned IntWidth = ScalarTy->getIntegerBitWidth();
1415  return IntWidth == 8 || IntWidth == 16 || IntWidth == 32 || IntWidth == 64;
1416 }
1417 
1419  Align Alignment,
1420  unsigned AddressSpace,
1422  const Instruction *I) {
1423  InstructionCost Cost = BaseT::getVPMemoryOpCost(Opcode, Src, Alignment,
1424  AddressSpace, CostKind, I);
1425  if (TLI->getValueType(DL, Src, true) == MVT::Other)
1426  return Cost;
1427  // TODO: Handle other cost kinds.
1429  return Cost;
1430 
1431  assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
1432  "Invalid Opcode");
1433 
1434  auto *SrcVTy = dyn_cast<FixedVectorType>(Src);
1435  assert(SrcVTy && "Expected a vector type for VP memory operations");
1436 
1437  if (hasActiveVectorLength(Opcode, Src, Alignment)) {
1438  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(SrcVTy);
1439 
1440  InstructionCost CostFactor =
1441  vectorCostAdjustmentFactor(Opcode, Src, nullptr);
1442  if (!CostFactor.isValid())
1443  return InstructionCost::getMax();
1444 
1445  InstructionCost Cost = LT.first * CostFactor;
1446  assert(Cost.isValid() && "Expected valid cost");
1447 
1448  // On P9 but not on P10, if the op is misaligned then it will cause a
1449  // pipeline flush. Otherwise the VSX masked memops cost the same as unmasked
1450  // ones.
1451  const Align DesiredAlignment(16);
1452  if (Alignment >= DesiredAlignment || ST->getCPUDirective() != PPC::DIR_PWR9)
1453  return Cost;
1454 
1455  // Since alignment may be under estimated, we try to compute the probability
1456  // that the actual address is aligned to the desired boundary. For example
1457  // an 8-byte aligned load is assumed to be actually 16-byte aligned half the
1458  // time, while a 4-byte aligned load has a 25% chance of being 16-byte
1459  // aligned.
1460  float AlignmentProb = ((float)Alignment.value()) / DesiredAlignment.value();
1461  float MisalignmentProb = 1.0 - AlignmentProb;
1462  return (MisalignmentProb * P9PipelineFlushEstimate) +
1463  (AlignmentProb * *Cost.getValue());
1464  }
1465 
1466  // Usually we should not get to this point, but the following is an attempt to
1467  // model the cost of legalization. Currently we can only lower intrinsics with
1468  // evl but no mask, on Power 9/10. Otherwise, we must scalarize.
1469  return getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind);
1470 }
1471 
1473  // Subtargets using PC-Relative addressing supported.
1474  if (ST->isUsingPCRelativeCalls())
1475  return true;
1476 
1477  const Function *Callee = CB->getCalledFunction();
1478  // Indirect calls and variadic argument functions not supported.
1479  if (!Callee || Callee->isVarArg())
1480  return false;
1481 
1482  const Function *Caller = CB->getCaller();
1483  // Support if we can share TOC base.
1484  return ST->getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(),
1485  Callee);
1486 }
i
i
Definition: README.txt:29
llvm::InstructionCost
Definition: InstructionCost.h:30
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::TargetTransformInfo::PSK_FastHardware
@ PSK_FastHardware
Definition: TargetTransformInfo.h:586
llvm::PPCTTIImpl::VSXRC
@ VSXRC
Definition: PPCTargetTransformInfo.h:95
llvm::ISD::STRICT_FSETCC
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:475
llvm::Type::isSized
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:283
llvm::BasicTTIImplBase< PPCTTIImpl >::DL
const DataLayout & DL
Definition: TargetTransformInfoImpl.h:37
llvm::TargetTransformInfo::UnrollingPreferences::Runtime
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Definition: TargetTransformInfo.h:474
llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition: TargetTransformInfo.h:218
llvm::PPCSubtarget::hasPOPCNTD
POPCNTDKind hasPOPCNTD() const
Definition: PPCSubtarget.h:361
llvm::PPCTTIImpl::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
Definition: PPCTargetTransformInfo.cpp:797
llvm::TargetTransformInfo::LSRCost::NumRegs
unsigned NumRegs
Definition: TargetTransformInfo.h:407
llvm::BasicTTIImplBase< PPCTTIImpl >::shouldBuildRelLookupTables
bool shouldBuildRelLookupTables() const
Definition: BasicTTIImpl.h:473
llvm::ISD::UMULO
@ UMULO
Definition: ISDOpcodes.h:332
llvm::ISD::STRICT_FSQRT
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:411
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::PPC::DIR_440
@ DIR_440
Definition: PPCSubtarget.h:43
llvm::none_of
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1748
llvm::LoopBase::getExitBlocks
void getExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all of the successor blocks of this loop.
Definition: LoopInfoImpl.h:64
llvm::PPCTTIImpl::getMaxInterleaveFactor
unsigned getMaxInterleaveFactor(unsigned VF)
Definition: PPCTargetTransformInfo.cpp:915
llvm::PPCTTIImpl::getIntImmCost
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
Definition: PPCTargetTransformInfo.cpp:166
llvm::HardwareLoopInfo::LoopDecrement
Value * LoopDecrement
Definition: TargetTransformInfo.h:105
InstCombiner.h
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:720
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
llvm::InstCombiner::getDominatorTree
DominatorTree & getDominatorTree() const
Definition: InstCombiner.h:370
PHI
Rewrite undef for PHI
Definition: AMDGPURewriteUndefForPHI.cpp:101
llvm::InlineAsm::ConstraintInfoVector
std::vector< ConstraintInfo > ConstraintInfoVector
Definition: InlineAsm.h:120
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:237
llvm::TargetTransformInfoImplBase::isLSRCostLess
bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const
Definition: TargetTransformInfoImpl.h:216
ceil
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g ceil
Definition: README-FPStack.txt:54
llvm::ISD::FMINNUM
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:943
llvm::ISD::STRICT_FMAXNUM
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:423
llvm::PPCTTIImpl::getIntImmCostIntrin
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
Definition: PPCTargetTransformInfo.cpp:196
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
llvm::PPCTTIImpl::isLSRCostLess
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)
Definition: PPCTargetTransformInfo.cpp:1312
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:139
llvm::TargetTransformInfo::PopcntSupportKind
PopcntSupportKind
Flags indicating the kind of support for population count.
Definition: TargetTransformInfo.h:586
llvm::ISD::STRICT_FMINNUM
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:424
llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:53
llvm::TargetTransformInfoImplCRTPBase< PPCTTIImpl >::getInstructionCost
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)
Definition: TargetTransformInfoImpl.h:1015
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:328
llvm::TLSModel::GeneralDynamic
@ GeneralDynamic
Definition: CodeGen.h:43
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::InstCombiner::Builder
BuilderTy & Builder
Definition: InstCombiner.h:58
llvm::CodeMetrics
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:31
llvm::PPCTTIImpl::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: PPCTargetTransformInfo.cpp:1034
llvm::PPCSubtarget::hasP8Vector
bool hasP8Vector() const
Definition: PPCSubtarget.h:283
llvm::PPCSubtarget::isLittleEndian
bool isLittleEndian() const
Definition: PPCSubtarget.h:263
llvm::CastInst::Create
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Definition: Instructions.cpp:3340
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
llvm::TargetTransformInfo::RGK_Scalar
@ RGK_Scalar
Definition: TargetTransformInfo.h:965
llvm::PPCSubtarget::hasVSX
bool hasVSX() const
Definition: PPCSubtarget.h:281
llvm::PPC::DIR_E5500
@ DIR_E5500
Definition: PPCSubtarget.h:53
Local.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
DisablePPCConstHoist
static cl::opt< bool > DisablePPCConstHoist("disable-ppc-constant-hoisting", cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden)
llvm::HardwareLoopInfo::ExitBranch
BranchInst * ExitBranch
Definition: TargetTransformInfo.h:102
llvm::ISD::FMA
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:482
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::TargetTransformInfo::LSRCost::NumIVMuls
unsigned NumIVMuls
Definition: TargetTransformInfo.h:409
llvm::isShiftedMask_32
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:446
llvm::PPCSubtarget::hasP9Vector
bool hasP9Vector() const
Definition: PPCSubtarget.h:286
llvm::PPC::DIR_PWR7
@ DIR_PWR7
Definition: PPCSubtarget.h:60
llvm::HardwareLoopInfo::isHardwareLoopCandidate
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Definition: TargetTransformInfo.cpp:105
llvm::TargetTransformInfo::UnrollingPreferences::Partial
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
Definition: TargetTransformInfo.h:470
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:529
llvm::getOrEnforceKnownAlignment
Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1417
llvm::PPCTTIImpl::supportsTailCallFor
bool supportsTailCallFor(const CallBase *CB) const
Definition: PPCTargetTransformInfo.cpp:1472
llvm::Optional
Definition: APInt.h:33
Vector
So we should use XX3Form_Rcr to implement intrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store Vector
Definition: README_P9.txt:497
llvm::PPCTTIImpl::vectorCostAdjustmentFactor
InstructionCost vectorCostAdjustmentFactor(unsigned Opcode, Type *Ty1, Type *Ty2)
Definition: PPCTargetTransformInfo.cpp:950
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::PPCTTIImpl::isHardwareLoopProfitable
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)
Definition: PPCTargetTransformInfo.cpp:719
llvm::PPCSubtarget::getTargetMachine
const PPCTargetMachine & getTargetMachine() const
Definition: PPCSubtarget.h:228
llvm::IRBuilderBase::CreateInsertElement
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2328
llvm::PPCTTIImpl::getRegisterClassName
const char * getRegisterClassName(unsigned ClassID) const
Definition: PPCTargetTransformInfo.cpp:871
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:119
llvm::Intrinsic::not_intrinsic
@ not_intrinsic
Definition: Intrinsics.h:45
llvm::TargetTransformInfoImplBase::getIntImmCost
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const
Definition: TargetTransformInfoImpl.h:392
llvm::PPC::DIR_PWR9
@ DIR_PWR9
Definition: PPCSubtarget.h:62
llvm::PPCSubtarget::vectorsUseTwoUnits
bool vectorsUseTwoUnits() const
Definition: PPCSubtarget.h:306
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:458
llvm::MVT::v2f64
@ v2f64
Definition: MachineValueType.h:190
llvm::TargetTransformInfo::OperandValueInfo
Definition: TargetTransformInfo.h:924
llvm::TargetTransformInfo::UnrollingPreferences::AllowExpensiveTripCount
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
Definition: TargetTransformInfo.h:479
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
llvm::PPCTTIImpl::getShuffleCost
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, Type *SubTp, ArrayRef< const Value * > Args=None)
Definition: PPCTargetTransformInfo.cpp:1003
llvm::TargetTransformInfo::LSRCost::Insns
unsigned Insns
TODO: Some of these could be merged.
Definition: TargetTransformInfo.h:406
llvm::PPCTTIImpl::isNumRegsMajorCostOfLSR
bool isNumRegsMajorCostOfLSR()
Definition: PPCTargetTransformInfo.cpp:1325
llvm::CastInst::getDestTy
Type * getDestTy() const
Return the destination type, as a convenience.
Definition: InstrTypes.h:684
F
#define F(x, y, z)
Definition: MD5.cpp:55
KnownBits.h
llvm::LoopBase::block_end
block_iterator block_end() const
Definition: LoopInfo.h:194
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::TargetTransformInfo::LSRCost::AddRecCost
unsigned AddRecCost
Definition: TargetTransformInfo.h:408
floor
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g floor
Definition: README-FPStack.txt:54
llvm::ISD::STRICT_FROUND
@ STRICT_FROUND
Definition: ISDOpcodes.h:427
llvm::Reloc::Model
Model
Definition: CodeGen.h:22
llvm::AArch64CC::LT
@ LT
Definition: AArch64BaseInfo.h:266
llvm::TargetSchedModel::init
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
Definition: TargetSchedule.cpp:47
llvm::PPC::DIR_A2
@ DIR_A2
Definition: PPCSubtarget.h:50
llvm::PPCTTIImpl::instCombineIntrinsic
Optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Definition: PPCTargetTransformInfo.cpp:64
llvm::TargetTransformInfo::LSRCost::SetupCost
unsigned SetupCost
Definition: TargetTransformInfo.h:412
llvm::ISD::FFLOOR
@ FFLOOR
Definition: ISDOpcodes.h:930
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::PPCTTIImpl::GPRRC
@ GPRRC
Definition: PPCTargetTransformInfo.h:95
CommandLine.h
CodeMetrics.h
TargetLowering.h
llvm::PPCTTIImpl::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
Definition: PPCTargetTransformInfo.cpp:1143
llvm::BasicTTIImplBase< PPCTTIImpl >::getTypeLegalizationCost
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
Definition: BasicTTIImpl.h:789
llvm::TargetLoweringBase::isOperationLegalOrCustom
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Definition: TargetLowering.h:1184
llvm::BasicTTIImplBase< PPCTTIImpl >::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
Definition: BasicTTIImpl.h:524
llvm::TargetSchedModel::getIssueWidth
unsigned getIssueWidth() const
Maximum number of micro-ops that may be scheduled per cycle.
Definition: TargetSchedule.h:98
llvm::ISD::STRICT_FRINT
@ STRICT_FRINT
Definition: ISDOpcodes.h:421
llvm::BasicTTIImplBase< PPCTTIImpl >::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: BasicTTIImpl.h:1187
InlinePriorityMode::Cost
@ Cost
llvm::TargetTransformInfo::ShuffleKind
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Definition: TargetTransformInfo.h:887
llvm::PPCTTIImpl::getNumberOfRegisters
unsigned getNumberOfRegisters(unsigned ClassID) const
Definition: PPCTargetTransformInfo.cpp:845
llvm::TargetTransformInfo::CastContextHint
CastContextHint
Represents a hint about the context in which a cast is used.
Definition: TargetTransformInfo.h:1139
llvm::User
Definition: User.h:44
llvm::LibFunc
LibFunc
Definition: TargetLibraryInfo.h:36
llvm::ISD::STRICT_FNEARBYINT
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:422
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:34
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
LsrNoInsnsCost
static cl::opt< bool > LsrNoInsnsCost("ppc-lsr-no-insns-cost", cl::Hidden, cl::init(false), cl::desc("Do not add instruction count to lsr cost model"))
round
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:56
llvm::isShiftedMask_64
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:452
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1397
SI
@ SI
Definition: SIInstrInfo.cpp:7966
llvm::ISD::FROUND
@ FROUND
Definition: ISDOpcodes.h:928
llvm::PPCTargetLowering::allowsMisalignedMemoryAccesses
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
Definition: PPCISelLowering.cpp:16871
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:195
llvm::ISD::LLROUND
@ LLROUND
Definition: ISDOpcodes.h:932
TargetLibraryInfo.h
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:246
llvm::PPCSubtarget::isISA3_0
bool isISA3_0() const
Definition: PPCSubtarget.h:340
llvm::InlineAsm::isInput
@ isInput
Definition: InlineAsm.h:95
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
llvm::BasicTTIImplBase< PPCTTIImpl >::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
Definition: BasicTTIImpl.h:825
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::TargetLibraryInfo::getLibFunc
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Definition: TargetLibraryInfo.h:298
llvm::Instruction
Definition: Instruction.h:42
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:189
llvm::TargetTransformInfoImplBase::getIntImmCostInst
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const
Definition: TargetTransformInfoImpl.h:397
llvm::PPCSubtarget::isPPC64
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
Definition: PPCSubtarget.cpp:256
Options
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Definition: PassBuilderBindings.cpp:48
llvm::ISD::FNEARBYINT
@ FNEARBYINT
Definition: ISDOpcodes.h:927
llvm::ISD::FRINT
@ FRINT
Definition: ISDOpcodes.h:926
llvm::LoopBase::getExitingBlocks
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:33
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1713
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:879
llvm::PPCTTIImpl::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
Definition: PPCTargetTransformInfo.cpp:813
llvm::AArch64PACKey::IA
@ IA
Definition: AArch64BaseInfo.h:819
llvm::CodeMetrics::collectEphemeralValues
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:70
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::PPCTTIImpl::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: PPCTargetTransformInfo.cpp:1072
llvm::ISD::SMULO
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:331
Align
uint64_t Align
Definition: ELFObjHandler.cpp:82
llvm::TargetTransformInfo::RGK_FixedWidthVector
@ RGK_FixedWidthVector
Definition: TargetTransformInfo.h:965
llvm::PPCTTIImpl::FPRRC
@ FPRRC
Definition: PPCTargetTransformInfo.h:95
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:155
llvm::CastInst::getSrcTy
Type * getSrcTy() const
Return the source type, as a convenience.
Definition: InstrTypes.h:682
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
llvm::BasicTTIImplBase< PPCTTIImpl >::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:1137
llvm::BasicTTIImplBase< PPCTTIImpl >::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:969
llvm::LinearPolySize< TypeSize >::getFixed
static TypeSize getFixed(ScalarTy MinVal)
Definition: TypeSize.h:283
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
llvm::IntrinsicCostAttributes
Definition: TargetTransformInfo.h:120
llvm::CallBase::getCaller
Function * getCaller()
Helper to get the caller (the parent function).
Definition: Instructions.cpp:284
llvm::TargetTransformInfo::PSK_Software
@ PSK_Software
Definition: TargetTransformInfo.h:586
llvm::maxnum
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1322
llvm::PPCTTIImpl::getRegisterBitWidth
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
Definition: PPCTargetTransformInfo.cpp:885
Operands
mir Rename Register Operands
Definition: MIRNamerPass.cpp:74
llvm::ARM_PROC::IE
@ IE
Definition: ARMBaseInfo.h:27
llvm::LoopBase::block_begin
block_iterator block_begin() const
Definition: LoopInfo.h:193
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:210
llvm::ScalarEvolution::getSmallConstantTripCount
unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
Definition: ScalarEvolution.cpp:8016
llvm::InlineAsm
Definition: InlineAsm.h:33
llvm::ISD::STRICT_FSETCCS
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:476
llvm::cl::opt< bool >
llvm::LoopBase< BasicBlock, Loop >::block_iterator
ArrayRef< BasicBlock * >::const_iterator block_iterator
Definition: LoopInfo.h:192
llvm::TargetLoweringBase::getMinimumJumpTableEntries
virtual unsigned getMinimumJumpTableEntries() const
Return lower limit for number of blocks in a jump table.
Definition: TargetLoweringBase.cpp:1996
llvm::PPC::DIR_PWR10
@ DIR_PWR10
Definition: PPCSubtarget.h:63
llvm::IRBuilderBase::CreateBitCast
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1985
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:298
llvm::PPC::DIR_E500mc
@ DIR_E500mc
Definition: PPCSubtarget.h:52
llvm::PPCTTIImpl::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Definition: PPCTargetTransformInfo.cpp:1265
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
TargetSchedule.h
llvm::MVT::v16i8
@ v16i8
Definition: MachineValueType.h:89
llvm::PPCSubtarget::POPCNTD_Unavailable
@ POPCNTD_Unavailable
Definition: PPCSubtarget.h:74
llvm::PointerType::getUnqual
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:651
llvm::InstCombiner::getAssumptionCache
AssumptionCache & getAssumptionCache() const
Definition: InstCombiner.h:368
llvm::TargetSchedModel
Provide an instruction scheduling machine model to CodeGen passes.
Definition: TargetSchedule.h:30
llvm::MVT::v2i64
@ v2i64
Definition: MachineValueType.h:131
uint64_t
llvm::TLSModel::LocalDynamic
@ LocalDynamic
Definition: CodeGen.h:44
llvm::TargetTransformInfo::LSRCost
Definition: TargetTransformInfo.h:403
llvm::InstCombiner::getDataLayout
const DataLayout & getDataLayout() const
Definition: InstCombiner.h:371
llvm::PPCSubtarget::hasP9Altivec
bool hasP9Altivec() const
Definition: PPCSubtarget.h:287
llvm::PPCTTIImpl::getPopcntSupport
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
Definition: PPCTargetTransformInfo.cpp:55
llvm::IRBuilderBase::getInt32
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:469
llvm::ISD::STRICT_LRINT
@ STRICT_LRINT
Definition: ISDOpcodes.h:432
llvm::TargetLoweringBase::getMaxExpandSizeMemcmp
unsigned getMaxExpandSizeMemcmp(bool OptSize) const
Get maximum # of load operations permitted for memcmp.
Definition: TargetLowering.h:1706
llvm::DataLayout::isLittleEndian
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:244
ProfDataUtils.h
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:53
llvm::PPCTTIImpl::getTgtMemIntrinsic
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)
Definition: PPCTargetTransformInfo.cpp:1337
llvm::ISD::EXTRACT_VECTOR_ELT
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:534
llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition: TargetTransformInfo.h:417
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::ISD::LRINT
@ LRINT
Definition: ISDOpcodes.h:933
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:447
Metrics
Machine Trace Metrics
Definition: MachineTraceMetrics.cpp:53
llvm::Type::isHalfTy
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:142
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::MVT::v4f32
@ v4f32
Definition: MachineValueType.h:170
llvm::InlineAsm::ConstraintInfo
Definition: InlineAsm.h:122
llvm::ISD::STRICT_LROUND
@ STRICT_LROUND
Definition: ISDOpcodes.h:430
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MVT::Other
@ Other
Definition: MachineValueType.h:42
llvm::PPCTTIImpl::getInterleavedMemoryOpCost
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
Definition: PPCTargetTransformInfo.cpp:1230
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
llvm::PPCTTIImpl::shouldBuildRelLookupTables
bool shouldBuildRelLookupTables() const
Definition: PPCTargetTransformInfo.cpp:1329
llvm::PPCTTIImpl::enableInterleavedAccessVectorization
bool enableInterleavedAccessVectorization()
Definition: PPCTargetTransformInfo.cpp:841
llvm::IRBuilderBase::CreateExtractElement
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2316
Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:60
llvm::TargetTransformInfo::LSRCost::ScaleCost
unsigned ScaleCost
Definition: TargetTransformInfo.h:413
llvm::BasicTTIImplBase< PPCTTIImpl >::getInterleavedMemoryOpCost
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
Definition: BasicTTIImpl.h:1291
llvm::BasicTTIImplBase< PPCTTIImpl >::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:1231
llvm::TargetTransformInfo::PSK_SlowHardware
@ PSK_SlowHardware
Definition: TargetTransformInfo.h:586
SmallCTRLoopThreshold
static cl::opt< unsigned > SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden, cl::desc("Loops with a constant trip count smaller than " "this value will not use the count register."))
llvm::ISD::STRICT_LLRINT
@ STRICT_LLRINT
Definition: ISDOpcodes.h:433
llvm::PPCTTIImpl::useColdCCForColdCall
bool useColdCCForColdCall(Function &F)
Definition: PPCTargetTransformInfo.cpp:821
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::TargetTransformInfo::MemCmpExpansionOptions
Returns options for expansion of memcmp. IsZeroCmp is.
Definition: TargetTransformInfo.h:781
llvm::PPCSubtarget::hasDirectMove
bool hasDirectMove() const
Definition: PPCSubtarget.h:319
llvm::TargetTransformInfo::TCC_Free
@ TCC_Free
Expected to fold away in lowering.
Definition: TargetTransformInfo.h:244
llvm::TargetTransformInfoImplBase::getVPMemoryOpCost
InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I) const
Definition: TargetTransformInfoImpl.h:611
llvm::PPCTTIImpl::getInstructionCost
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)
Definition: PPCTargetTransformInfo.cpp:324
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1108
llvm::EVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:154
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
Mul
BinaryOperator * Mul
Definition: X86PartialReduction.cpp:70
llvm::ISD::STRICT_FTRUNC
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:429
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::PPCTTIImpl::getIntImmCostInst
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
Definition: PPCTargetTransformInfo.cpp:231
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
if
if(llvm_vc STREQUAL "") set(fake_version_inc "$
Definition: CMakeLists.txt:14
llvm::ISD::LLRINT
@ LLRINT
Definition: ISDOpcodes.h:934
EnablePPCColdCC
static cl::opt< bool > EnablePPCColdCC("ppc-enable-coldcc", cl::Hidden, cl::init(false), cl::desc("Enable using coldcc calling conv for cold " "internal functions"))
CostKind
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
trunc
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g trunc
Definition: README-FPStack.txt:63
llvm::PPCTTIImpl::canSaveCmp
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo)
Definition: PPCTargetTransformInfo.cpp:1289
llvm::TargetLoweringBase::InstructionOpcodeToISD
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
Definition: TargetLoweringBase.cpp:1785
llvm::TargetMachine::shouldAssumeDSOLocal
bool shouldAssumeDSOLocal(const Module &M, const GlobalValue *GV) const
Definition: TargetMachine.cpp:88
llvm::CastInst
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:430
llvm::PPCTTIImpl::getRegisterClassForType
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
Definition: PPCTargetTransformInfo.cpp:856
llvm::InstructionCost::isValid
bool isValid() const
Definition: InstructionCost.h:80
llvm::MVT::v4i32
@ v4i32
Definition: MachineValueType.h:112
llvm::BasicTTIImplBase< PPCTTIImpl >::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
Definition: BasicTTIImpl.h:596
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:174
llvm::PPCTTIImpl::getVPMemoryOpCost
InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: PPCTargetTransformInfo.cpp:1418
llvm::PPCTTIImpl::enableAggressiveInterleaving
bool enableAggressiveInterleaving(bool LoopHasReductions)
Definition: PPCTargetTransformInfo.cpp:825
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:187
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:35
llvm::ISD::FSQRT
@ FSQRT
Definition: ISDOpcodes.h:913
llvm::MCID::Select
@ Select
Definition: MCInstrDesc.h:164
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
llvm::minnum
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1311
llvm::ISD::STRICT_FMA
@ STRICT_FMA
Definition: ISDOpcodes.h:405
llvm::ISD::FMAXNUM
@ FMAXNUM
Definition: ISDOpcodes.h:944
llvm::Type::getInt64Ty
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:240
llvm::TargetTransformInfo::LSRCost::NumBaseAdds
unsigned NumBaseAdds
Definition: TargetTransformInfo.h:410
llvm::Type::isFloatTy
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:153
llvm::None
constexpr std::nullopt_t None
Definition: None.h:27
llvm::PPCSubtarget::useSoftFloat
bool useSoftFloat() const
Definition: PPCSubtarget.h:247
llvm::TargetTransformInfoImplBase::getIntImmCostIntrin
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const
Definition: TargetTransformInfoImpl.h:404
llvm::InstructionCost::getMax
static InstructionCost getMax()
Definition: InstructionCost.h:72
CostTable.h
llvm::EVT::getScalarType
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:295
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:348
llvm::Align::value
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
llvm::ISD::FCEIL
@ FCEIL
Definition: ISDOpcodes.h:924
llvm::Type::isIntOrIntVectorTy
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:216
llvm::MVT::v8i16
@ v8i16
Definition: MachineValueType.h:101
llvm::TypeSize
Definition: TypeSize.h:435
llvm::ISD::STRICT_FCEIL
@ STRICT_FCEIL
Definition: ISDOpcodes.h:425
llvm::BasicTTIImplBase< PPCTTIImpl >::getMaskedMemoryOpCost
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
Definition: BasicTTIImpl.h:1275
llvm::TargetLoweringBase::isOperationExpand
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
Definition: TargetLowering.h:1287
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:105
llvm::LinearPolySize< TypeSize >::getScalable
static TypeSize getScalable(ScalarTy MinVal)
Definition: TypeSize.h:286
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:226
llvm::Type::isDoubleTy
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:156
llvm::PPCTTIImpl::hasActiveVectorLength
bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const
Definition: PPCTargetTransformInfo.cpp:1390
powi
This is blocked on not handling X *X *X powi(X, 3)(see note above). The issue is that we end up getting t
llvm::log2
static double log2(double V)
Definition: AMDGPULibCalls.cpp:794
llvm::Type::isPPC_FP128Ty
bool isPPC_FP128Ty() const
Return true if this is powerpc long double.
Definition: Type.h:165
llvm::TargetTransformInfo::LSRCost::ImmCost
unsigned ImmCost
Definition: TargetTransformInfo.h:411
llvm::TLSModel::Model
Model
Definition: CodeGen.h:42
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:185
llvm::PPCTargetMachine
Common code between 32-bit and 64-bit PowerPC targets.
Definition: PPCTargetMachine.h:25
llvm::InstCombiner
The core instruction combiner logic.
Definition: InstCombiner.h:45
PPCTargetTransformInfo.h
llvm::TargetTransformInfo::RGK_ScalableVector
@ RGK_ScalableVector
Definition: TargetTransformInfo.h:965
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:46
llvm::HardwareLoopInfo
Attributes of a target dependent hardware loop.
Definition: TargetTransformInfo.h:97
llvm::InstructionCost::getInvalid
static InstructionCost getInvalid(CostType Val=0)
Definition: InstructionCost.h:74
llvm::ISD::STRICT_FFLOOR
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:426
llvm::RISCVMatInt::Imm
@ Imm
Definition: RISCVMatInt.h:23
llvm::PPCTTIImpl::getPrefetchDistance
unsigned getPrefetchDistance() const override
Definition: PPCTargetTransformInfo.cpp:911
llvm::PPCTTIImpl::enableMemCmpExpansion
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
Definition: PPCTargetTransformInfo.cpp:834
memAddrUsesCTR
static bool memAddrUsesCTR(const Value *MemAddr, const PPCTargetMachine &TM, SmallPtrSetImpl< const Value * > &Visited)
Definition: PPCTargetTransformInfo.cpp:343
llvm::PPCSubtarget::isUsingPCRelativeCalls
bool isUsingPCRelativeCalls() const
Definition: PPCSubtarget.cpp:258
llvm::StringRef::equals_insensitive
bool equals_insensitive(StringRef RHS) const
Check for string equality, ignoring case.
Definition: StringRef.h:170
llvm::PPCSubtarget::hasAltivec
bool hasAltivec() const
Definition: PPCSubtarget.h:277
llvm::TargetTransformInfo::RegisterKind
RegisterKind
Definition: TargetTransformInfo.h:965
isMMAType
static bool isMMAType(Type *Ty)
Definition: PPCTargetTransformInfo.cpp:319
llvm::PPCTTIImpl::areTypesABICompatible
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const
Definition: PPCTargetTransformInfo.cpp:1270
llvm::PPC::DIR_PWR8
@ DIR_PWR8
Definition: PPCSubtarget.h:61
DataType
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1342
llvm::BasicTTIImplBase< PPCTTIImpl >::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
Definition: BasicTTIImpl.h:1435
llvm::PPCTTIImpl::getCacheLineSize
unsigned getCacheLineSize() const override
Definition: PPCTargetTransformInfo.cpp:898
TargetTransformInfo.h
llvm::PPCTTIImpl::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: PPCTargetTransformInfo.cpp:1054
llvm::ISD::STRICT_LLROUND
@ STRICT_LLROUND
Definition: ISDOpcodes.h:431
llvm::Optional::value
constexpr const T & value() const &
Definition: Optional.h:281
llvm::HardwareLoopInfo::canAnalyze
bool canAnalyze(LoopInfo &LI)
Definition: TargetTransformInfo.cpp:50
llvm::MemIntrinsicInfo
Information about a load/store intrinsic defined by the target.
Definition: TargetTransformInfo.h:72
llvm::ISD::LROUND
@ LROUND
Definition: ISDOpcodes.h:931
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1175
llvm::SmallPtrSetImpl< const Value * >
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1474
llvm::extractBranchWeights
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
Definition: ProfDataUtils.cpp:104
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::PPCSubtarget::getCPUDirective
unsigned getCPUDirective() const
getCPUDirective - Returns the -m directive specified for the cpu.
Definition: PPCSubtarget.h:207
llvm::TargetLibraryInfo::hasOptimizedCodeGen
bool hasOptimizedCodeGen(LibFunc F) const
Tests if the function is both available and a candidate for optimized code generation.
Definition: TargetLibraryInfo.h:347
llvm::TargetTransformInfo::TCC_Basic
@ TCC_Basic
The cost of a typical 'add' instruction.
Definition: TargetTransformInfo.h:245
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3278
llvm::PPC::DIR_PWR_FUTURE
@ DIR_PWR_FUTURE
Definition: PPCSubtarget.h:64
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:394
BasicTTIImpl.h
llvm::cl::desc
Definition: CommandLine.h:413
llvm::TargetLoweringBase::getValueType
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
Definition: TargetLowering.h:1497
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3134
llvm::ISD::FTRUNC
@ FTRUNC
Definition: ISDOpcodes.h:925
llvm::PPCTTIImpl::getCFInstrCost
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: PPCTargetTransformInfo.cpp:1025
llvm::PPCTTIImpl::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
Definition: PPCTargetTransformInfo.cpp:981
llvm::PPCSubtarget::POPCNTD_Slow
@ POPCNTD_Slow
Definition: PPCSubtarget.h:75
llvm::ISD::INSERT_VECTOR_ELT
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:523
llvm::HardwareLoopInfo::CountType
IntegerType * CountType
Definition: TargetTransformInfo.h:104
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::TargetTransformInfo::TCK_RecipThroughput
@ TCK_RecipThroughput
Reciprocal throughput.
Definition: TargetTransformInfo.h:219
llvm::Directive
Definition: DirectiveEmitter.h:100
llvm::Type::isFP128Ty
bool isFP128Ty() const
Return true if this is 'fp128'.
Definition: Type.h:162
Debug.h
llvm::PPCTTIImpl::VRRC
@ VRRC
Definition: PPCTargetTransformInfo.h:95
llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:164
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::PPCSubtarget::hasP10Vector
bool hasP10Vector() const
Definition: PPCSubtarget.h:288
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:39
llvm::TargetTransformInfoImplBase::areTypesABICompatible
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const
Definition: TargetTransformInfoImpl.h:768