LLVM  13.0.0git
PPCTargetTransformInfo.cpp
Go to the documentation of this file.
1 //===-- PPCTargetTransformInfo.cpp - PPC specific TTI ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
14 #include "llvm/CodeGen/CostTable.h"
17 #include "llvm/IR/IntrinsicsPowerPC.h"
19 #include "llvm/Support/Debug.h"
20 #include "llvm/Support/KnownBits.h"
23 
24 using namespace llvm;
25 
26 #define DEBUG_TYPE "ppctti"
27 
28 static cl::opt<bool> DisablePPCConstHoist("disable-ppc-constant-hoisting",
29 cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden);
30 
31 // This is currently only used for the data prefetch pass
32 static cl::opt<unsigned>
33 CacheLineSize("ppc-loop-prefetch-cache-line", cl::Hidden, cl::init(64),
34  cl::desc("The loop prefetch cache line size"));
35 
36 static cl::opt<bool>
37 EnablePPCColdCC("ppc-enable-coldcc", cl::Hidden, cl::init(false),
38  cl::desc("Enable using coldcc calling conv for cold "
39  "internal functions"));
40 
41 static cl::opt<bool>
42 LsrNoInsnsCost("ppc-lsr-no-insns-cost", cl::Hidden, cl::init(false),
43  cl::desc("Do not add instruction count to lsr cost model"));
44 
45 // The latency of mtctr is only justified if there are more than 4
46 // comparisons that will be removed as a result.
47 static cl::opt<unsigned>
48 SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden,
49  cl::desc("Loops with a constant trip count smaller than "
50  "this value will not use the count register."));
51 
52 //===----------------------------------------------------------------------===//
53 //
54 // PPC cost model.
55 //
56 //===----------------------------------------------------------------------===//
57 
59 PPCTTIImpl::getPopcntSupport(unsigned TyWidth) {
60  assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
61  if (ST->hasPOPCNTD() != PPCSubtarget::POPCNTD_Unavailable && TyWidth <= 64)
62  return ST->hasPOPCNTD() == PPCSubtarget::POPCNTD_Slow ?
64  return TTI::PSK_Software;
65 }
66 
69  Intrinsic::ID IID = II.getIntrinsicID();
70  switch (IID) {
71  default:
72  break;
73  case Intrinsic::ppc_altivec_lvx:
74  case Intrinsic::ppc_altivec_lvxl:
75  // Turn PPC lvx -> load if the pointer is known aligned.
77  II.getArgOperand(0), Align(16), IC.getDataLayout(), &II,
78  &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) {
79  Value *Ptr = IC.Builder.CreateBitCast(
81  return new LoadInst(II.getType(), Ptr, "", false, Align(16));
82  }
83  break;
84  case Intrinsic::ppc_vsx_lxvw4x:
85  case Intrinsic::ppc_vsx_lxvd2x: {
86  // Turn PPC VSX loads into normal loads.
87  Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(0),
89  return new LoadInst(II.getType(), Ptr, Twine(""), false, Align(1));
90  }
91  case Intrinsic::ppc_altivec_stvx:
92  case Intrinsic::ppc_altivec_stvxl:
93  // Turn stvx -> store if the pointer is known aligned.
95  II.getArgOperand(1), Align(16), IC.getDataLayout(), &II,
96  &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) {
97  Type *OpPtrTy = PointerType::getUnqual(II.getArgOperand(0)->getType());
98  Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(1), OpPtrTy);
99  return new StoreInst(II.getArgOperand(0), Ptr, false, Align(16));
100  }
101  break;
102  case Intrinsic::ppc_vsx_stxvw4x:
103  case Intrinsic::ppc_vsx_stxvd2x: {
104  // Turn PPC VSX stores into normal stores.
105  Type *OpPtrTy = PointerType::getUnqual(II.getArgOperand(0)->getType());
106  Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(1), OpPtrTy);
107  return new StoreInst(II.getArgOperand(0), Ptr, false, Align(1));
108  }
109  case Intrinsic::ppc_altivec_vperm:
110  // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
111  // Note that ppc_altivec_vperm has a big-endian bias, so when creating
112  // a vectorshuffle for little endian, we must undo the transformation
113  // performed on vec_perm in altivec.h. That is, we must complement
114  // the permutation mask with respect to 31 and reverse the order of
115  // V1 and V2.
116  if (Constant *Mask = dyn_cast<Constant>(II.getArgOperand(2))) {
117  assert(cast<FixedVectorType>(Mask->getType())->getNumElements() == 16 &&
118  "Bad type for intrinsic!");
119 
120  // Check that all of the elements are integer constants or undefs.
121  bool AllEltsOk = true;
122  for (unsigned i = 0; i != 16; ++i) {
123  Constant *Elt = Mask->getAggregateElement(i);
124  if (!Elt || !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) {
125  AllEltsOk = false;
126  break;
127  }
128  }
129 
130  if (AllEltsOk) {
131  // Cast the input vectors to byte vectors.
132  Value *Op0 =
133  IC.Builder.CreateBitCast(II.getArgOperand(0), Mask->getType());
134  Value *Op1 =
135  IC.Builder.CreateBitCast(II.getArgOperand(1), Mask->getType());
136  Value *Result = UndefValue::get(Op0->getType());
137 
138  // Only extract each element once.
139  Value *ExtractedElts[32];
140  memset(ExtractedElts, 0, sizeof(ExtractedElts));
141 
142  for (unsigned i = 0; i != 16; ++i) {
143  if (isa<UndefValue>(Mask->getAggregateElement(i)))
144  continue;
145  unsigned Idx =
146  cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
147  Idx &= 31; // Match the hardware behavior.
148  if (DL.isLittleEndian())
149  Idx = 31 - Idx;
150 
151  if (!ExtractedElts[Idx]) {
152  Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0;
153  Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1;
154  ExtractedElts[Idx] = IC.Builder.CreateExtractElement(
155  Idx < 16 ? Op0ToUse : Op1ToUse, IC.Builder.getInt32(Idx & 15));
156  }
157 
158  // Insert this value into the result vector.
159  Result = IC.Builder.CreateInsertElement(Result, ExtractedElts[Idx],
160  IC.Builder.getInt32(i));
161  }
162  return CastInst::Create(Instruction::BitCast, Result, II.getType());
163  }
164  }
165  break;
166  }
167  return None;
168 }
169 
173  return BaseT::getIntImmCost(Imm, Ty, CostKind);
174 
175  assert(Ty->isIntegerTy());
176 
177  unsigned BitSize = Ty->getPrimitiveSizeInBits();
178  if (BitSize == 0)
179  return ~0U;
180 
181  if (Imm == 0)
182  return TTI::TCC_Free;
183 
184  if (Imm.getBitWidth() <= 64) {
185  if (isInt<16>(Imm.getSExtValue()))
186  return TTI::TCC_Basic;
187 
188  if (isInt<32>(Imm.getSExtValue())) {
189  // A constant that can be materialized using lis.
190  if ((Imm.getZExtValue() & 0xFFFF) == 0)
191  return TTI::TCC_Basic;
192 
193  return 2 * TTI::TCC_Basic;
194  }
195  }
196 
197  return 4 * TTI::TCC_Basic;
198 }
199 
201  const APInt &Imm, Type *Ty,
204  return BaseT::getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
205 
206  assert(Ty->isIntegerTy());
207 
208  unsigned BitSize = Ty->getPrimitiveSizeInBits();
209  if (BitSize == 0)
210  return ~0U;
211 
212  switch (IID) {
213  default:
214  return TTI::TCC_Free;
215  case Intrinsic::sadd_with_overflow:
216  case Intrinsic::uadd_with_overflow:
217  case Intrinsic::ssub_with_overflow:
218  case Intrinsic::usub_with_overflow:
219  if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<16>(Imm.getSExtValue()))
220  return TTI::TCC_Free;
221  break;
222  case Intrinsic::experimental_stackmap:
223  if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
224  return TTI::TCC_Free;
225  break;
226  case Intrinsic::experimental_patchpoint_void:
227  case Intrinsic::experimental_patchpoint_i64:
228  if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
229  return TTI::TCC_Free;
230  break;
231  }
232  return PPCTTIImpl::getIntImmCost(Imm, Ty, CostKind);
233 }
234 
235 int PPCTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
236  const APInt &Imm, Type *Ty,
238  Instruction *Inst) {
240  return BaseT::getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind, Inst);
241 
242  assert(Ty->isIntegerTy());
243 
244  unsigned BitSize = Ty->getPrimitiveSizeInBits();
245  if (BitSize == 0)
246  return ~0U;
247 
248  unsigned ImmIdx = ~0U;
249  bool ShiftedFree = false, RunFree = false, UnsignedFree = false,
250  ZeroFree = false;
251  switch (Opcode) {
252  default:
253  return TTI::TCC_Free;
254  case Instruction::GetElementPtr:
255  // Always hoist the base address of a GetElementPtr. This prevents the
256  // creation of new constants for every base constant that gets constant
257  // folded with the offset.
258  if (Idx == 0)
259  return 2 * TTI::TCC_Basic;
260  return TTI::TCC_Free;
261  case Instruction::And:
262  RunFree = true; // (for the rotate-and-mask instructions)
264  case Instruction::Add:
265  case Instruction::Or:
266  case Instruction::Xor:
267  ShiftedFree = true;
269  case Instruction::Sub:
270  case Instruction::Mul:
271  case Instruction::Shl:
272  case Instruction::LShr:
273  case Instruction::AShr:
274  ImmIdx = 1;
275  break;
276  case Instruction::ICmp:
277  UnsignedFree = true;
278  ImmIdx = 1;
279  // Zero comparisons can use record-form instructions.
281  case Instruction::Select:
282  ZeroFree = true;
283  break;
284  case Instruction::PHI:
285  case Instruction::Call:
286  case Instruction::Ret:
287  case Instruction::Load:
288  case Instruction::Store:
289  break;
290  }
291 
292  if (ZeroFree && Imm == 0)
293  return TTI::TCC_Free;
294 
295  if (Idx == ImmIdx && Imm.getBitWidth() <= 64) {
296  if (isInt<16>(Imm.getSExtValue()))
297  return TTI::TCC_Free;
298 
299  if (RunFree) {
300  if (Imm.getBitWidth() <= 32 &&
301  (isShiftedMask_32(Imm.getZExtValue()) ||
303  return TTI::TCC_Free;
304 
305  if (ST->isPPC64() &&
306  (isShiftedMask_64(Imm.getZExtValue()) ||
308  return TTI::TCC_Free;
309  }
310 
311  if (UnsignedFree && isUInt<16>(Imm.getZExtValue()))
312  return TTI::TCC_Free;
313 
314  if (ShiftedFree && (Imm.getZExtValue() & 0xFFFF) == 0)
315  return TTI::TCC_Free;
316  }
317 
318  return PPCTTIImpl::getIntImmCost(Imm, Ty, CostKind);
319 }
320 
324  // We already implement getCastInstrCost and getMemoryOpCost where we perform
325  // the vector adjustment there.
326  if (isa<CastInst>(U) || isa<LoadInst>(U) || isa<StoreInst>(U))
328 
329  if (U->getType()->isVectorTy()) {
330  // Instructions that need to be split should cost more.
331  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, U->getType());
332  return LT.first * BaseT::getUserCost(U, Operands, CostKind);
333  }
334 
336 }
337 
338 // Determining the address of a TLS variable results in a function call in
339 // certain TLS models.
340 static bool memAddrUsesCTR(const Value *MemAddr, const PPCTargetMachine &TM,
342  // No need to traverse again if we already checked this operand.
343  if (!Visited.insert(MemAddr).second)
344  return false;
345  const auto *GV = dyn_cast<GlobalValue>(MemAddr);
346  if (!GV) {
347  // Recurse to check for constants that refer to TLS global variables.
348  if (const auto *CV = dyn_cast<Constant>(MemAddr))
349  for (const auto &CO : CV->operands())
350  if (memAddrUsesCTR(CO, TM, Visited))
351  return true;
352  return false;
353  }
354 
355  if (!GV->isThreadLocal())
356  return false;
357  TLSModel::Model Model = TM.getTLSModel(GV);
359 }
360 
361 bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
363  const PPCTargetMachine &TM = ST->getTargetMachine();
364 
365  // Loop through the inline asm constraints and look for something that
366  // clobbers ctr.
367  auto asmClobbersCTR = [](InlineAsm *IA) {
368  InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints();
369  for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) {
370  InlineAsm::ConstraintInfo &C = CIV[i];
371  if (C.Type != InlineAsm::isInput)
372  for (unsigned j = 0, je = C.Codes.size(); j < je; ++j)
373  if (StringRef(C.Codes[j]).equals_lower("{ctr}"))
374  return true;
375  }
376  return false;
377  };
378 
379  auto isLargeIntegerTy = [](bool Is32Bit, Type *Ty) {
380  if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
381  return ITy->getBitWidth() > (Is32Bit ? 32U : 64U);
382 
383  return false;
384  };
385 
386  auto supportedHalfPrecisionOp = [](Instruction *Inst) {
387  switch (Inst->getOpcode()) {
388  default:
389  return false;
390  case Instruction::FPTrunc:
391  case Instruction::FPExt:
392  case Instruction::Load:
393  case Instruction::Store:
394  case Instruction::FPToUI:
395  case Instruction::UIToFP:
396  case Instruction::FPToSI:
397  case Instruction::SIToFP:
398  return true;
399  }
400  };
401 
402  for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
403  J != JE; ++J) {
404  // There are no direct operations on half precision so assume that
405  // anything with that type requires a call except for a few select
406  // operations with Power9.
407  if (Instruction *CurrInst = dyn_cast<Instruction>(J)) {
408  for (const auto &Op : CurrInst->operands()) {
409  if (Op->getType()->getScalarType()->isHalfTy() ||
410  CurrInst->getType()->getScalarType()->isHalfTy())
411  return !(ST->isISA3_0() && supportedHalfPrecisionOp(CurrInst));
412  }
413  }
414  if (CallInst *CI = dyn_cast<CallInst>(J)) {
415  // Inline ASM is okay, unless it clobbers the ctr register.
416  if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledOperand())) {
417  if (asmClobbersCTR(IA))
418  return true;
419  continue;
420  }
421 
422  if (Function *F = CI->getCalledFunction()) {
423  // Most intrinsics don't become function calls, but some might.
424  // sin, cos, exp and log are always calls.
425  unsigned Opcode = 0;
426  if (F->getIntrinsicID() != Intrinsic::not_intrinsic) {
427  switch (F->getIntrinsicID()) {
428  default: continue;
429  // If we have a call to loop_decrement or set_loop_iterations,
430  // we're definitely using CTR.
431  case Intrinsic::set_loop_iterations:
432  case Intrinsic::loop_decrement:
433  return true;
434 
435  // Binary operations on 128-bit value will use CTR.
436  case Intrinsic::experimental_constrained_fadd:
437  case Intrinsic::experimental_constrained_fsub:
438  case Intrinsic::experimental_constrained_fmul:
439  case Intrinsic::experimental_constrained_fdiv:
440  case Intrinsic::experimental_constrained_frem:
441  if (F->getType()->getScalarType()->isFP128Ty() ||
442  F->getType()->getScalarType()->isPPC_FP128Ty())
443  return true;
444  break;
445 
446  case Intrinsic::experimental_constrained_fptosi:
447  case Intrinsic::experimental_constrained_fptoui:
448  case Intrinsic::experimental_constrained_sitofp:
449  case Intrinsic::experimental_constrained_uitofp: {
450  Type *SrcType = CI->getArgOperand(0)->getType()->getScalarType();
451  Type *DstType = CI->getType()->getScalarType();
452  if (SrcType->isPPC_FP128Ty() || DstType->isPPC_FP128Ty() ||
453  isLargeIntegerTy(!TM.isPPC64(), SrcType) ||
454  isLargeIntegerTy(!TM.isPPC64(), DstType))
455  return true;
456  break;
457  }
458 
459  // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp
460  // because, although it does clobber the counter register, the
461  // control can't then return to inside the loop unless there is also
462  // an eh_sjlj_setjmp.
463  case Intrinsic::eh_sjlj_setjmp:
464 
465  case Intrinsic::memcpy:
466  case Intrinsic::memmove:
467  case Intrinsic::memset:
468  case Intrinsic::powi:
469  case Intrinsic::log:
470  case Intrinsic::log2:
471  case Intrinsic::log10:
472  case Intrinsic::exp:
473  case Intrinsic::exp2:
474  case Intrinsic::pow:
475  case Intrinsic::sin:
476  case Intrinsic::cos:
477  case Intrinsic::experimental_constrained_powi:
478  case Intrinsic::experimental_constrained_log:
479  case Intrinsic::experimental_constrained_log2:
480  case Intrinsic::experimental_constrained_log10:
481  case Intrinsic::experimental_constrained_exp:
482  case Intrinsic::experimental_constrained_exp2:
483  case Intrinsic::experimental_constrained_pow:
484  case Intrinsic::experimental_constrained_sin:
485  case Intrinsic::experimental_constrained_cos:
486  return true;
487  case Intrinsic::copysign:
488  if (CI->getArgOperand(0)->getType()->getScalarType()->
489  isPPC_FP128Ty())
490  return true;
491  else
492  continue; // ISD::FCOPYSIGN is never a library call.
493  case Intrinsic::fma: Opcode = ISD::FMA; break;
494  case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
495  case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
496  case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
497  case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
498  case Intrinsic::rint: Opcode = ISD::FRINT; break;
499  case Intrinsic::lrint: Opcode = ISD::LRINT; break;
500  case Intrinsic::llrint: Opcode = ISD::LLRINT; break;
501  case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
502  case Intrinsic::round: Opcode = ISD::FROUND; break;
503  case Intrinsic::lround: Opcode = ISD::LROUND; break;
504  case Intrinsic::llround: Opcode = ISD::LLROUND; break;
505  case Intrinsic::minnum: Opcode = ISD::FMINNUM; break;
506  case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break;
507  case Intrinsic::experimental_constrained_fcmp:
508  Opcode = ISD::STRICT_FSETCC;
509  break;
510  case Intrinsic::experimental_constrained_fcmps:
511  Opcode = ISD::STRICT_FSETCCS;
512  break;
513  case Intrinsic::experimental_constrained_fma:
514  Opcode = ISD::STRICT_FMA;
515  break;
516  case Intrinsic::experimental_constrained_sqrt:
517  Opcode = ISD::STRICT_FSQRT;
518  break;
519  case Intrinsic::experimental_constrained_floor:
520  Opcode = ISD::STRICT_FFLOOR;
521  break;
522  case Intrinsic::experimental_constrained_ceil:
523  Opcode = ISD::STRICT_FCEIL;
524  break;
525  case Intrinsic::experimental_constrained_trunc:
526  Opcode = ISD::STRICT_FTRUNC;
527  break;
528  case Intrinsic::experimental_constrained_rint:
529  Opcode = ISD::STRICT_FRINT;
530  break;
531  case Intrinsic::experimental_constrained_lrint:
532  Opcode = ISD::STRICT_LRINT;
533  break;
534  case Intrinsic::experimental_constrained_llrint:
535  Opcode = ISD::STRICT_LLRINT;
536  break;
537  case Intrinsic::experimental_constrained_nearbyint:
538  Opcode = ISD::STRICT_FNEARBYINT;
539  break;
540  case Intrinsic::experimental_constrained_round:
541  Opcode = ISD::STRICT_FROUND;
542  break;
543  case Intrinsic::experimental_constrained_lround:
544  Opcode = ISD::STRICT_LROUND;
545  break;
546  case Intrinsic::experimental_constrained_llround:
547  Opcode = ISD::STRICT_LLROUND;
548  break;
549  case Intrinsic::experimental_constrained_minnum:
550  Opcode = ISD::STRICT_FMINNUM;
551  break;
552  case Intrinsic::experimental_constrained_maxnum:
553  Opcode = ISD::STRICT_FMAXNUM;
554  break;
555  case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO; break;
556  case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO; break;
557  }
558  }
559 
560  // PowerPC does not use [US]DIVREM or other library calls for
561  // operations on regular types which are not otherwise library calls
562  // (i.e. soft float or atomics). If adapting for targets that do,
563  // additional care is required here.
564 
565  LibFunc Func;
566  if (!F->hasLocalLinkage() && F->hasName() && LibInfo &&
567  LibInfo->getLibFunc(F->getName(), Func) &&
568  LibInfo->hasOptimizedCodeGen(Func)) {
569  // Non-read-only functions are never treated as intrinsics.
570  if (!CI->onlyReadsMemory())
571  return true;
572 
573  // Conversion happens only for FP calls.
574  if (!CI->getArgOperand(0)->getType()->isFloatingPointTy())
575  return true;
576 
577  switch (Func) {
578  default: return true;
579  case LibFunc_copysign:
580  case LibFunc_copysignf:
581  continue; // ISD::FCOPYSIGN is never a library call.
582  case LibFunc_copysignl:
583  return true;
584  case LibFunc_fabs:
585  case LibFunc_fabsf:
586  case LibFunc_fabsl:
587  continue; // ISD::FABS is never a library call.
588  case LibFunc_sqrt:
589  case LibFunc_sqrtf:
590  case LibFunc_sqrtl:
591  Opcode = ISD::FSQRT; break;
592  case LibFunc_floor:
593  case LibFunc_floorf:
594  case LibFunc_floorl:
595  Opcode = ISD::FFLOOR; break;
596  case LibFunc_nearbyint:
597  case LibFunc_nearbyintf:
598  case LibFunc_nearbyintl:
599  Opcode = ISD::FNEARBYINT; break;
600  case LibFunc_ceil:
601  case LibFunc_ceilf:
602  case LibFunc_ceill:
603  Opcode = ISD::FCEIL; break;
604  case LibFunc_rint:
605  case LibFunc_rintf:
606  case LibFunc_rintl:
607  Opcode = ISD::FRINT; break;
608  case LibFunc_round:
609  case LibFunc_roundf:
610  case LibFunc_roundl:
611  Opcode = ISD::FROUND; break;
612  case LibFunc_trunc:
613  case LibFunc_truncf:
614  case LibFunc_truncl:
615  Opcode = ISD::FTRUNC; break;
616  case LibFunc_fmin:
617  case LibFunc_fminf:
618  case LibFunc_fminl:
619  Opcode = ISD::FMINNUM; break;
620  case LibFunc_fmax:
621  case LibFunc_fmaxf:
622  case LibFunc_fmaxl:
623  Opcode = ISD::FMAXNUM; break;
624  }
625  }
626 
627  if (Opcode) {
628  EVT EVTy =
629  TLI->getValueType(DL, CI->getArgOperand(0)->getType(), true);
630 
631  if (EVTy == MVT::Other)
632  return true;
633 
634  if (TLI->isOperationLegalOrCustom(Opcode, EVTy))
635  continue;
636  else if (EVTy.isVector() &&
637  TLI->isOperationLegalOrCustom(Opcode, EVTy.getScalarType()))
638  continue;
639 
640  return true;
641  }
642  }
643 
644  return true;
645  } else if (isa<BinaryOperator>(J) &&
646  (J->getType()->getScalarType()->isFP128Ty() ||
647  J->getType()->getScalarType()->isPPC_FP128Ty())) {
648  // Most operations on f128 or ppc_f128 values become calls.
649  return true;
650  } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) ||
651  isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) {
652  CastInst *CI = cast<CastInst>(J);
653  if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
654  CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
655  isLargeIntegerTy(!TM.isPPC64(), CI->getSrcTy()->getScalarType()) ||
656  isLargeIntegerTy(!TM.isPPC64(), CI->getDestTy()->getScalarType()))
657  return true;
658  } else if (isLargeIntegerTy(!TM.isPPC64(),
659  J->getType()->getScalarType()) &&
660  (J->getOpcode() == Instruction::UDiv ||
661  J->getOpcode() == Instruction::SDiv ||
662  J->getOpcode() == Instruction::URem ||
663  J->getOpcode() == Instruction::SRem)) {
664  return true;
665  } else if (!TM.isPPC64() &&
666  isLargeIntegerTy(false, J->getType()->getScalarType()) &&
667  (J->getOpcode() == Instruction::Shl ||
668  J->getOpcode() == Instruction::AShr ||
669  J->getOpcode() == Instruction::LShr)) {
670  // Only on PPC32, for 128-bit integers (specifically not 64-bit
671  // integers), these might be runtime calls.
672  return true;
673  } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {
674  // On PowerPC, indirect jumps use the counter register.
675  return true;
676  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
677  if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries())
678  return true;
679  }
680 
681  // FREM is always a call.
682  if (J->getOpcode() == Instruction::FRem)
683  return true;
684 
685  if (ST->useSoftFloat()) {
686  switch(J->getOpcode()) {
687  case Instruction::FAdd:
688  case Instruction::FSub:
689  case Instruction::FMul:
690  case Instruction::FDiv:
691  case Instruction::FPTrunc:
692  case Instruction::FPExt:
693  case Instruction::FPToUI:
694  case Instruction::FPToSI:
695  case Instruction::UIToFP:
696  case Instruction::SIToFP:
697  case Instruction::FCmp:
698  return true;
699  }
700  }
701 
702  for (Value *Operand : J->operands())
703  if (memAddrUsesCTR(Operand, TM, Visited))
704  return true;
705  }
706 
707  return false;
708 }
709 
711  AssumptionCache &AC,
712  TargetLibraryInfo *LibInfo,
713  HardwareLoopInfo &HWLoopInfo) {
714  const PPCTargetMachine &TM = ST->getTargetMachine();
715  TargetSchedModel SchedModel;
716  SchedModel.init(ST);
717 
718  // Do not convert small short loops to CTR loop.
719  unsigned ConstTripCount = SE.getSmallConstantTripCount(L);
720  if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) {
722  CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
724  for (BasicBlock *BB : L->blocks())
725  Metrics.analyzeBasicBlock(BB, *this, EphValues);
726  // 6 is an approximate latency for the mtctr instruction.
727  if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth()))
728  return false;
729  }
730 
731  // We don't want to spill/restore the counter register, and so we don't
732  // want to use the counter register if the loop contains calls.
734  for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
735  I != IE; ++I)
736  if (mightUseCTR(*I, LibInfo, Visited))
737  return false;
738 
739  SmallVector<BasicBlock*, 4> ExitingBlocks;
740  L->getExitingBlocks(ExitingBlocks);
741 
742  // If there is an exit edge known to be frequently taken,
743  // we should not transform this loop.
744  for (auto &BB : ExitingBlocks) {
745  Instruction *TI = BB->getTerminator();
746  if (!TI) continue;
747 
748  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
749  uint64_t TrueWeight = 0, FalseWeight = 0;
750  if (!BI->isConditional() ||
751  !BI->extractProfMetadata(TrueWeight, FalseWeight))
752  continue;
753 
754  // If the exit path is more frequent than the loop path,
755  // we return here without further analysis for this loop.
756  bool TrueIsExit = !L->contains(BI->getSuccessor(0));
757  if (( TrueIsExit && FalseWeight < TrueWeight) ||
758  (!TrueIsExit && FalseWeight > TrueWeight))
759  return false;
760  }
761  }
762 
763  // If an exit block has a PHI that accesses a TLS variable as one of the
764  // incoming values from the loop, we cannot produce a CTR loop because the
765  // address for that value will be computed in the loop.
766  SmallVector<BasicBlock *, 4> ExitBlocks;
767  L->getExitBlocks(ExitBlocks);
768  for (auto &BB : ExitBlocks) {
769  for (auto &PHI : BB->phis()) {
770  for (int Idx = 0, EndIdx = PHI.getNumIncomingValues(); Idx < EndIdx;
771  Idx++) {
772  const BasicBlock *IncomingBB = PHI.getIncomingBlock(Idx);
773  const Value *IncomingValue = PHI.getIncomingValue(Idx);
774  if (L->contains(IncomingBB) &&
775  memAddrUsesCTR(IncomingValue, TM, Visited))
776  return false;
777  }
778  }
779  }
780 
781  LLVMContext &C = L->getHeader()->getContext();
782  HWLoopInfo.CountType = TM.isPPC64() ?
784  HWLoopInfo.LoopDecrement = ConstantInt::get(HWLoopInfo.CountType, 1);
785  return true;
786 }
787 
790  if (ST->getCPUDirective() == PPC::DIR_A2) {
791  // The A2 is in-order with a deep pipeline, and concatenation unrolling
792  // helps expose latency-hiding opportunities to the instruction scheduler.
793  UP.Partial = UP.Runtime = true;
794 
795  // We unroll a lot on the A2 (hundreds of instructions), and the benefits
796  // often outweigh the cost of a division to compute the trip count.
797  UP.AllowExpensiveTripCount = true;
798  }
799 
801 }
802 
805  BaseT::getPeelingPreferences(L, SE, PP);
806 }
807 // This function returns true to allow using coldcc calling convention.
808 // Returning true results in coldcc being used for functions which are cold at
809 // all call sites when the callers of the functions are not calling any other
810 // non coldcc functions.
812  return EnablePPCColdCC;
813 }
814 
815 bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {
816  // On the A2, always unroll aggressively.
817  if (ST->getCPUDirective() == PPC::DIR_A2)
818  return true;
819 
820  return LoopHasReductions;
821 }
822 
824 PPCTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
826  Options.LoadSizes = {8, 4, 2, 1};
827  Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
828  return Options;
829 }
830 
832  return true;
833 }
834 
835 unsigned PPCTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
836  assert(ClassID == GPRRC || ClassID == FPRRC ||
837  ClassID == VRRC || ClassID == VSXRC);
838  if (ST->hasVSX()) {
839  assert(ClassID == GPRRC || ClassID == VSXRC || ClassID == VRRC);
840  return ClassID == VSXRC ? 64 : 32;
841  }
842  assert(ClassID == GPRRC || ClassID == FPRRC || ClassID == VRRC);
843  return 32;
844 }
845 
846 unsigned PPCTTIImpl::getRegisterClassForType(bool Vector, Type *Ty) const {
847  if (Vector)
848  return ST->hasVSX() ? VSXRC : VRRC;
849  else if (Ty && (Ty->getScalarType()->isFloatTy() ||
850  Ty->getScalarType()->isDoubleTy()))
851  return ST->hasVSX() ? VSXRC : FPRRC;
852  else if (Ty && (Ty->getScalarType()->isFP128Ty() ||
853  Ty->getScalarType()->isPPC_FP128Ty()))
854  return VRRC;
855  else if (Ty && Ty->getScalarType()->isHalfTy())
856  return VSXRC;
857  else
858  return GPRRC;
859 }
860 
861 const char* PPCTTIImpl::getRegisterClassName(unsigned ClassID) const {
862 
863  switch (ClassID) {
864  default:
865  llvm_unreachable("unknown register class");
866  return "PPC::unknown register class";
867  case GPRRC: return "PPC::GPRRC";
868  case FPRRC: return "PPC::FPRRC";
869  case VRRC: return "PPC::VRRC";
870  case VSXRC: return "PPC::VSXRC";
871  }
872 }
873 
874 TypeSize
876  switch (K) {
878  return TypeSize::getFixed(ST->isPPC64() ? 64 : 32);
880  return TypeSize::getFixed(ST->hasAltivec() ? 128 : 0);
882  return TypeSize::getScalable(0);
883  }
884 
885  llvm_unreachable("Unsupported register kind");
886 }
887 
889  // Check first if the user specified a custom line size.
890  if (CacheLineSize.getNumOccurrences() > 0)
891  return CacheLineSize;
892 
893  // Starting with P7 we have a cache line size of 128.
894  unsigned Directive = ST->getCPUDirective();
895  // Assume that Future CPU has the same cache line size as the others.
899  return 128;
900 
901  // On other processors return a default of 64 bytes.
902  return 64;
903 }
904 
906  return 300;
907 }
908 
909 unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) {
910  unsigned Directive = ST->getCPUDirective();
911  // The 440 has no SIMD support, but floating-point instructions
912  // have a 5-cycle latency, so unroll by 5x for latency hiding.
913  if (Directive == PPC::DIR_440)
914  return 5;
915 
916  // The A2 has no SIMD support, but floating-point instructions
917  // have a 6-cycle latency, so unroll by 6x for latency hiding.
918  if (Directive == PPC::DIR_A2)
919  return 6;
920 
921  // FIXME: For lack of any better information, do no harm...
923  return 1;
924 
925  // For P7 and P8, floating-point instructions have a 6-cycle latency and
926  // there are two execution units, so unroll by 12x for latency hiding.
927  // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready
928  // FIXME: the same for P10 as previous gen until POWER10 scheduling is ready
929  // Assume that future is the same as the others.
933  return 12;
934 
935  // For most things, modern systems have two execution units (and
936  // out-of-order execution).
937  return 2;
938 }
939 
940 // Adjust the cost of vector instructions on targets which there is overlap
941 // between the vector and scalar units, thereby reducing the overall throughput
942 // of vector code wrt. scalar code.
944  unsigned Opcode, Type *Ty1,
945  Type *Ty2) {
946  if (!ST->vectorsUseTwoUnits() || !Ty1->isVectorTy())
947  return Cost;
948 
949  std::pair<int, MVT> LT1 = TLI->getTypeLegalizationCost(DL, Ty1);
950  // If type legalization involves splitting the vector, we don't want to
951  // double the cost at every step - only the last step.
952  if (LT1.first != 1 || !LT1.second.isVector())
953  return Cost;
954 
955  int ISD = TLI->InstructionOpcodeToISD(Opcode);
956  if (TLI->isOperationExpand(ISD, LT1.second))
957  return Cost;
958 
959  if (Ty2) {
960  std::pair<int, MVT> LT2 = TLI->getTypeLegalizationCost(DL, Ty2);
961  if (LT2.first != 1 || !LT2.second.isVector())
962  return Cost;
963  }
964 
965  return Cost * 2;
966 }
967 
968 int PPCTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
970  TTI::OperandValueKind Op1Info,
971  TTI::OperandValueKind Op2Info,
972  TTI::OperandValueProperties Opd1PropInfo,
973  TTI::OperandValueProperties Opd2PropInfo,
975  const Instruction *CxtI) {
976  assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
977  // TODO: Handle more cost kinds.
979  return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
980  Op2Info, Opd1PropInfo,
981  Opd2PropInfo, Args, CxtI);
982 
983  // Fallback to the default implementation.
984  int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
985  Op2Info,
986  Opd1PropInfo, Opd2PropInfo);
987  return *vectorCostAdjustment(Cost, Opcode, Ty, nullptr).getValue();
988 }
989 
991  ArrayRef<int> Mask, int Index, Type *SubTp) {
992  // Legalize the type.
993  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
994 
995  // PPC, for both Altivec/VSX, support cheap arbitrary permutations
996  // (at least in the sense that there need only be one non-loop-invariant
997  // instruction). We need one such shuffle instruction for each actual
998  // register (this is not true for arbitrary shuffles, but is true for the
999  // structured types of shuffles covered by TTI::ShuffleKind).
1000  return *vectorCostAdjustment(LT.first, Instruction::ShuffleVector, Tp,
1001  nullptr)
1002  .getValue();
1003 }
1004 
1006  const Instruction *I) {
1008  return Opcode == Instruction::PHI ? 0 : 1;
1009  // Branches are assumed to be predicted.
1010  return 0;
1011 }
1012 
1014  Type *Src,
1017  const Instruction *I) {
1018  assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
1019 
1020  InstructionCost Cost =
1021  BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
1022  Cost = vectorCostAdjustment(Cost, Opcode, Dst, Src);
1023  // TODO: Allow non-throughput costs that aren't binary.
1025  return Cost == 0 ? 0 : 1;
1026  return Cost;
1027 }
1028 
1030  Type *CondTy,
1031  CmpInst::Predicate VecPred,
1033  const Instruction *I) {
1034  InstructionCost Cost =
1035  BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
1036  // TODO: Handle other cost kinds.
1038  return Cost;
1039  return vectorCostAdjustment(Cost, Opcode, ValTy, nullptr);
1040 }
1041 
1042 int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
1043  assert(Val->isVectorTy() && "This must be a vector type");
1044 
1045  int ISD = TLI->InstructionOpcodeToISD(Opcode);
1046  assert(ISD && "Invalid opcode");
1047 
1048  int Cost = BaseT::getVectorInstrCost(Opcode, Val, Index);
1049  Cost = *vectorCostAdjustment(Cost, Opcode, Val, nullptr).getValue();
1050 
1051  if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) {
1052  // Double-precision scalars are already located in index #0 (or #1 if LE).
1053  if (ISD == ISD::EXTRACT_VECTOR_ELT &&
1054  Index == (ST->isLittleEndian() ? 1 : 0))
1055  return 0;
1056 
1057  return Cost;
1058 
1059  } else if (Val->getScalarType()->isIntegerTy() && Index != -1U) {
1060  if (ST->hasP9Altivec()) {
1061  if (ISD == ISD::INSERT_VECTOR_ELT)
1062  // A move-to VSR and a permute/insert. Assume vector operation cost
1063  // for both (cost will be 2x on P9).
1064  return *vectorCostAdjustment(2, Opcode, Val, nullptr).getValue();
1065 
1066  // It's an extract. Maybe we can do a cheap move-from VSR.
1067  unsigned EltSize = Val->getScalarSizeInBits();
1068  if (EltSize == 64) {
1069  unsigned MfvsrdIndex = ST->isLittleEndian() ? 1 : 0;
1070  if (Index == MfvsrdIndex)
1071  return 1;
1072  } else if (EltSize == 32) {
1073  unsigned MfvsrwzIndex = ST->isLittleEndian() ? 2 : 1;
1074  if (Index == MfvsrwzIndex)
1075  return 1;
1076  }
1077 
1078  // We need a vector extract (or mfvsrld). Assume vector operation cost.
1079  // The cost of the load constant for a vector extract is disregarded
1080  // (invariant, easily schedulable).
1081  return *vectorCostAdjustment(1, Opcode, Val, nullptr).getValue();
1082 
1083  } else if (ST->hasDirectMove())
1084  // Assume permute has standard cost.
1085  // Assume move-to/move-from VSR have 2x standard cost.
1086  return 3;
1087  }
1088 
1089  // Estimated cost of a load-hit-store delay. This was obtained
1090  // experimentally as a minimum needed to prevent unprofitable
1091  // vectorization for the paq8p benchmark. It may need to be
1092  // raised further if other unprofitable cases remain.
1093  unsigned LHSPenalty = 2;
1094  if (ISD == ISD::INSERT_VECTOR_ELT)
1095  LHSPenalty += 7;
1096 
1097  // Vector element insert/extract with Altivec is very expensive,
1098  // because they require store and reload with the attendant
1099  // processor stall for load-hit-store. Until VSX is available,
1100  // these need to be estimated as very costly.
1101  if (ISD == ISD::EXTRACT_VECTOR_ELT ||
1102  ISD == ISD::INSERT_VECTOR_ELT)
1103  return LHSPenalty + Cost;
1104 
1105  return Cost;
1106 }
1107 
1109  MaybeAlign Alignment,
1110  unsigned AddressSpace,
1112  const Instruction *I) {
1113  if (TLI->getValueType(DL, Src, true) == MVT::Other)
1114  return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
1115  CostKind);
1116  // Legalize the type.
1117  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
1118  assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
1119  "Invalid Opcode");
1120 
1121  InstructionCost Cost =
1122  BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind);
1123  // TODO: Handle other cost kinds.
1125  return Cost;
1126 
1127  Cost = *vectorCostAdjustment(Cost, Opcode, Src, nullptr).getValue();
1128 
1129  bool IsAltivecType = ST->hasAltivec() &&
1130  (LT.second == MVT::v16i8 || LT.second == MVT::v8i16 ||
1131  LT.second == MVT::v4i32 || LT.second == MVT::v4f32);
1132  bool IsVSXType = ST->hasVSX() &&
1133  (LT.second == MVT::v2f64 || LT.second == MVT::v2i64);
1134 
1135  // VSX has 32b/64b load instructions. Legalization can handle loading of
1136  // 32b/64b to VSR correctly and cheaply. But BaseT::getMemoryOpCost and
1137  // PPCTargetLowering can't compute the cost appropriately. So here we
1138  // explicitly check this case.
1139  unsigned MemBytes = Src->getPrimitiveSizeInBits();
1140  if (Opcode == Instruction::Load && ST->hasVSX() && IsAltivecType &&
1141  (MemBytes == 64 || (ST->hasP8Vector() && MemBytes == 32)))
1142  return 1;
1143 
1144  // Aligned loads and stores are easy.
1145  unsigned SrcBytes = LT.second.getStoreSize();
1146  if (!SrcBytes || !Alignment || *Alignment >= SrcBytes)
1147  return Cost;
1148 
1149  // If we can use the permutation-based load sequence, then this is also
1150  // relatively cheap (not counting loop-invariant instructions): one load plus
1151  // one permute (the last load in a series has extra cost, but we're
1152  // neglecting that here). Note that on the P7, we could do unaligned loads
1153  // for Altivec types using the VSX instructions, but that's more expensive
1154  // than using the permutation-based load sequence. On the P8, that's no
1155  // longer true.
1156  if (Opcode == Instruction::Load && (!ST->hasP8Vector() && IsAltivecType) &&
1157  *Alignment >= LT.second.getScalarType().getStoreSize())
1158  return Cost + LT.first; // Add the cost of the permutations.
1159 
1160  // For VSX, we can do unaligned loads and stores on Altivec/VSX types. On the
1161  // P7, unaligned vector loads are more expensive than the permutation-based
1162  // load sequence, so that might be used instead, but regardless, the net cost
1163  // is about the same (not counting loop-invariant instructions).
1164  if (IsVSXType || (ST->hasVSX() && IsAltivecType))
1165  return Cost;
1166 
1167  // Newer PPC supports unaligned memory access.
1168  if (TLI->allowsMisalignedMemoryAccesses(LT.second, 0))
1169  return Cost;
1170 
1171  // PPC in general does not support unaligned loads and stores. They'll need
1172  // to be decomposed based on the alignment factor.
1173 
1174  // Add the cost of each scalar load or store.
1175  assert(Alignment);
1176  Cost += LT.first * ((SrcBytes / Alignment->value()) - 1);
1177 
1178  // For a vector type, there is also scalarization overhead (only for
1179  // stores, loads are expanded using the vector-load + permutation sequence,
1180  // which is much less expensive).
1181  if (Src->isVectorTy() && Opcode == Instruction::Store)
1182  for (int i = 0, e = cast<FixedVectorType>(Src)->getNumElements(); i < e;
1183  ++i)
1184  Cost += getVectorInstrCost(Instruction::ExtractElement, Src, i);
1185 
1186  return Cost;
1187 }
1188 
1190  unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1191  Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
1192  bool UseMaskForCond, bool UseMaskForGaps) {
1193  if (UseMaskForCond || UseMaskForGaps)
1194  return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
1195  Alignment, AddressSpace, CostKind,
1196  UseMaskForCond, UseMaskForGaps);
1197 
1198  assert(isa<VectorType>(VecTy) &&
1199  "Expect a vector type for interleaved memory op");
1200 
1201  // Legalize the type.
1202  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, VecTy);
1203 
1204  // Firstly, the cost of load/store operation.
1205  InstructionCost Cost = getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment),
1207 
1208  // PPC, for both Altivec/VSX, support cheap arbitrary permutations
1209  // (at least in the sense that there need only be one non-loop-invariant
1210  // instruction). For each result vector, we need one shuffle per incoming
1211  // vector (except that the first shuffle can take two incoming vectors
1212  // because it does not need to take itself).
1213  Cost += Factor*(LT.first-1);
1214 
1215  return Cost;
1216 }
1217 
1222 }
1223 
1225  LoopInfo *LI, DominatorTree *DT,
1226  AssumptionCache *AC, TargetLibraryInfo *LibInfo) {
1227  // Process nested loops first.
1228  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
1229  if (canSaveCmp(*I, BI, SE, LI, DT, AC, LibInfo))
1230  return false; // Stop search.
1231 
1232  HardwareLoopInfo HWLoopInfo(L);
1233 
1234  if (!HWLoopInfo.canAnalyze(*LI))
1235  return false;
1236 
1237  if (!isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo))
1238  return false;
1239 
1240  if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT))
1241  return false;
1242 
1243  *BI = HWLoopInfo.ExitBranch;
1244  return true;
1245 }
1246 
1249  // PowerPC default behaviour here is "instruction number 1st priority".
1250  // If LsrNoInsnsCost is set, call default implementation.
1251  if (!LsrNoInsnsCost)
1252  return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost, C1.NumIVMuls,
1253  C1.NumBaseAdds, C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
1254  std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost, C2.NumIVMuls,
1255  C2.NumBaseAdds, C2.ScaleCost, C2.ImmCost, C2.SetupCost);
1256  else
1258 }
1259 
1261  return false;
1262 }
1263 
1266  switch (Inst->getIntrinsicID()) {
1267  case Intrinsic::ppc_altivec_lvx:
1268  case Intrinsic::ppc_altivec_lvxl:
1269  case Intrinsic::ppc_altivec_lvebx:
1270  case Intrinsic::ppc_altivec_lvehx:
1271  case Intrinsic::ppc_altivec_lvewx:
1272  case Intrinsic::ppc_vsx_lxvd2x:
1273  case Intrinsic::ppc_vsx_lxvw4x:
1274  case Intrinsic::ppc_vsx_lxvd2x_be:
1275  case Intrinsic::ppc_vsx_lxvw4x_be:
1276  case Intrinsic::ppc_vsx_lxvl:
1277  case Intrinsic::ppc_vsx_lxvll:
1278  case Intrinsic::ppc_vsx_lxvp: {
1279  Info.PtrVal = Inst->getArgOperand(0);
1280  Info.ReadMem = true;
1281  Info.WriteMem = false;
1282  return true;
1283  }
1284  case Intrinsic::ppc_altivec_stvx:
1285  case Intrinsic::ppc_altivec_stvxl:
1286  case Intrinsic::ppc_altivec_stvebx:
1287  case Intrinsic::ppc_altivec_stvehx:
1288  case Intrinsic::ppc_altivec_stvewx:
1289  case Intrinsic::ppc_vsx_stxvd2x:
1290  case Intrinsic::ppc_vsx_stxvw4x:
1291  case Intrinsic::ppc_vsx_stxvd2x_be:
1292  case Intrinsic::ppc_vsx_stxvw4x_be:
1293  case Intrinsic::ppc_vsx_stxvl:
1294  case Intrinsic::ppc_vsx_stxvll:
1295  case Intrinsic::ppc_vsx_stxvp: {
1296  Info.PtrVal = Inst->getArgOperand(1);
1297  Info.ReadMem = false;
1298  Info.WriteMem = true;
1299  return true;
1300  }
1301  default:
1302  break;
1303  }
1304 
1305  return false;
1306 }
llvm::PPC::DIR_PWR7
@ DIR_PWR7
Definition: PPCSubtarget.h:60
i
i
Definition: README.txt:29
llvm::InstructionCost
Definition: InstructionCost.h:26
llvm::EngineKind::Kind
Kind
Definition: ExecutionEngine.h:524
llvm::TargetTransformInfo::PSK_FastHardware
@ PSK_FastHardware
Definition: TargetTransformInfo.h:586
llvm::PPCTTIImpl::VSXRC
@ VSXRC
Definition: PPCTargetTransformInfo.h:91
llvm::BasicTTIImplBase< PPCTTIImpl >::getArithmeticInstrCost
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
Definition: BasicTTIImpl.h:688
llvm::ISD::STRICT_FSETCC
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:456
llvm::BasicTTIImplBase< PPCTTIImpl >::DL
const DataLayout & DL
Definition: TargetTransformInfoImpl.h:38
llvm::TargetTransformInfo::UnrollingPreferences::Runtime
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Definition: TargetTransformInfo.h:480
llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition: TargetTransformInfo.h:211
llvm::PPCSubtarget::hasPOPCNTD
POPCNTDKind hasPOPCNTD() const
Definition: PPCSubtarget.h:333
llvm::TargetTransformInfo::LSRCost::NumRegs
unsigned NumRegs
Definition: TargetTransformInfo.h:413
llvm::ISD::UMULO
@ UMULO
Definition: ISDOpcodes.h:312
llvm::ISD::STRICT_FSQRT
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:392
llvm
Definition: AllocatorList.h:23
llvm::SystemZISD::TM
@ TM
Definition: SystemZISelLowering.h:65
llvm::LoopBase::getExitBlocks
void getExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all of the successor blocks of this loop.
Definition: LoopInfoImpl.h:62
llvm::PPCTTIImpl::getMaxInterleaveFactor
unsigned getMaxInterleaveFactor(unsigned VF)
Definition: PPCTargetTransformInfo.cpp:909
llvm::HardwareLoopInfo::LoopDecrement
Value * LoopDecrement
Definition: TargetTransformInfo.h:101
llvm::InstructionCost::getValue
Optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
Definition: InstructionCost.h:68
InstCombiner.h
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:722
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::InstCombiner::getDominatorTree
DominatorTree & getDominatorTree() const
Definition: InstCombiner.h:369
llvm::InlineAsm::ConstraintInfoVector
std::vector< ConstraintInfo > ConstraintInfoVector
Definition: InlineAsm.h:116
ceil
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g ceil
Definition: README-FPStack.txt:54
llvm::ISD::FMINNUM
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:883
llvm::ISD::STRICT_FMAXNUM
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:404
llvm::TargetTransformInfoImplBase::getIntImmCostIntrin
unsigned getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const
Definition: TargetTransformInfoImpl.h:363
llvm::Function
Definition: Function.h:61
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:529
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
llvm::TargetTransformInfo::PopcntSupportKind
PopcntSupportKind
Flags indicating the kind of support for population count.
Definition: TargetTransformInfo.h:586
llvm::ISD::STRICT_FMINNUM
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:405
llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:52
llvm::TargetTransformInfoImplCRTPBase< PPCTTIImpl >::getUserCost
InstructionCost getUserCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)
Definition: TargetTransformInfoImpl.h:909
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:317
llvm::TLSModel::GeneralDynamic
@ GeneralDynamic
Definition: CodeGen.h:43
llvm::PPCTTIImpl::isLSRCostLess
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2)
Definition: PPCTargetTransformInfo.cpp:1247
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::InstCombiner::Builder
BuilderTy & Builder
Definition: InstCombiner.h:56
llvm::CodeMetrics
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:30
llvm::PPCTTIImpl::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: PPCTargetTransformInfo.cpp:1013
llvm::TargetTransformInfoImplBase::isLSRCostLess
bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) const
Definition: TargetTransformInfoImpl.h:206
llvm::APInt::getSExtValue
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1643
llvm::PPCSubtarget::hasP8Vector
bool hasP8Vector() const
Definition: PPCSubtarget.h:269
llvm::PPCSubtarget::isLittleEndian
bool isLittleEndian() const
Definition: PPCSubtarget.h:249
llvm::PPC::DIR_PWR10
@ DIR_PWR10
Definition: PPCSubtarget.h:63
llvm::CastInst::Create
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Definition: Instructions.cpp:2945
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:443
llvm::TargetTransformInfo::RGK_Scalar
@ RGK_Scalar
Definition: TargetTransformInfo.h:924
llvm::PPCSubtarget::hasVSX
bool hasVSX() const
Definition: PPCSubtarget.h:267
Local.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
DisablePPCConstHoist
static cl::opt< bool > DisablePPCConstHoist("disable-ppc-constant-hoisting", cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden)
llvm::HardwareLoopInfo::ExitBranch
BranchInst * ExitBranch
Definition: TargetTransformInfo.h:98
llvm::ISD::FMA
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:460
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::TargetTransformInfo::LSRCost::NumIVMuls
unsigned NumIVMuls
Definition: TargetTransformInfo.h:415
llvm::isShiftedMask_32
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:480
llvm::HardwareLoopInfo::isHardwareLoopCandidate
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Definition: TargetTransformInfo.cpp:101
llvm::PPC::DIR_PWR9
@ DIR_PWR9
Definition: PPCSubtarget.h:62
llvm::TargetTransformInfo::UnrollingPreferences::Partial
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
Definition: TargetTransformInfo.h:476
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1581
llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:529
llvm::getOrEnforceKnownAlignment
Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1328
llvm::PPCTTIImpl::getIntImmCostIntrin
int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
Definition: PPCTargetTransformInfo.cpp:200
llvm::Optional
Definition: APInt.h:33
llvm::LoopBase::begin
iterator begin() const
Definition: LoopInfo.h:154
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::PPCTTIImpl::isHardwareLoopProfitable
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)
Definition: PPCTargetTransformInfo.cpp:710
llvm::PPCSubtarget::getTargetMachine
const PPCTargetMachine & getTargetMachine() const
Definition: PPCSubtarget.h:214
llvm::PPCTTIImpl::getRegisterClassName
const char * getRegisterClassName(unsigned ClassID) const
Definition: PPCTargetTransformInfo.cpp:861
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:116
llvm::Intrinsic::not_intrinsic
@ not_intrinsic
Definition: Intrinsics.h:45
llvm::PPCSubtarget::vectorsUseTwoUnits
bool vectorsUseTwoUnits() const
Definition: PPCSubtarget.h:292
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:492
llvm::MVT::v2f64
@ v2f64
Definition: MachineValueType.h:158
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::TargetTransformInfo::UnrollingPreferences::AllowExpensiveTripCount
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
Definition: TargetTransformInfo.h:485
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:204
llvm::PPCTTIImpl::getIntImmCost
int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
Definition: PPCTargetTransformInfo.cpp:170
llvm::TargetTransformInfoImplBase::getIntImmCostInst
unsigned getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const
Definition: TargetTransformInfoImpl.h:357
llvm::TargetTransformInfo::LSRCost::Insns
unsigned Insns
TODO: Some of these could be merged.
Definition: TargetTransformInfo.h:412
llvm::PPCTTIImpl::isNumRegsMajorCostOfLSR
bool isNumRegsMajorCostOfLSR()
Definition: PPCTargetTransformInfo.cpp:1260
llvm::CastInst::getDestTy
Type * getDestTy() const
Return the destination type, as a convenience.
Definition: InstrTypes.h:686
F
#define F(x, y, z)
Definition: MD5.cpp:56
KnownBits.h
llvm::LoopBase::block_end
block_iterator block_end() const
Definition: LoopInfo.h:177
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::TargetTransformInfo::LSRCost::AddRecCost
unsigned AddRecCost
Definition: TargetTransformInfo.h:414
floor
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g floor
Definition: README-FPStack.txt:54
llvm::ISD::STRICT_FROUND
@ STRICT_FROUND
Definition: ISDOpcodes.h:408
llvm::PPCTTIImpl::getArithmeticInstrCost
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
Definition: PPCTargetTransformInfo.cpp:968
llvm::Reloc::Model
Model
Definition: CodeGen.h:22
llvm::AArch64CC::LT
@ LT
Definition: AArch64BaseInfo.h:247
llvm::TargetSchedModel::init
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
Definition: TargetSchedule.cpp:63
llvm::PPCTTIImpl::instCombineIntrinsic
Optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Definition: PPCTargetTransformInfo.cpp:68
llvm::TargetTransformInfo::LSRCost::SetupCost
unsigned SetupCost
Definition: TargetTransformInfo.h:418
llvm::ISD::FFLOOR
@ FFLOOR
Definition: ISDOpcodes.h:870
llvm::PPCTTIImpl::GPRRC
@ GPRRC
Definition: PPCTargetTransformInfo.h:91
CommandLine.h
CodeMetrics.h
TargetLowering.h
llvm::PPC::DIR_A2
@ DIR_A2
Definition: PPCSubtarget.h:50
llvm::TargetLoweringBase::isOperationLegalOrCustom
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Definition: TargetLowering.h:1100
llvm::TargetSchedModel::getIssueWidth
unsigned getIssueWidth() const
Maximum number of micro-ops that may be scheduled per cycle.
Definition: TargetSchedule.h:99
llvm::ISD::STRICT_FRINT
@ STRICT_FRINT
Definition: ISDOpcodes.h:402
llvm::TargetTransformInfo::ShuffleKind
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Definition: TargetTransformInfo.h:845
llvm::PPCTTIImpl::getNumberOfRegisters
unsigned getNumberOfRegisters(unsigned ClassID) const
Definition: PPCTargetTransformInfo.cpp:835
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::TargetTransformInfo::CastContextHint
CastContextHint
Represents a hint about the context in which a cast is used.
Definition: TargetTransformInfo.h:1079
llvm::User
Definition: User.h:44
llvm::LibFunc
LibFunc
Definition: TargetLibraryInfo.h:34
llvm::ISD::STRICT_FNEARBYINT
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:403
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:35
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
LsrNoInsnsCost
static cl::opt< bool > LsrNoInsnsCost("ppc-lsr-no-insns-cost", cl::Hidden, cl::init(false), cl::desc("Do not add instruction count to lsr cost model"))
round
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:57
llvm::isShiftedMask_64
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:486
SI
@ SI
Definition: SIInstrInfo.cpp:7342
llvm::ISD::FROUND
@ FROUND
Definition: ISDOpcodes.h:868
llvm::LoopBase::end
iterator end() const
Definition: LoopInfo.h:155
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
llvm::ISD::LLROUND
@ LLROUND
Definition: ISDOpcodes.h:872
TargetLibraryInfo.h
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:235
llvm::PPCSubtarget::isISA3_0
bool isISA3_0() const
Definition: PPCSubtarget.h:322
llvm::InlineAsm::isInput
@ isInput
Definition: InlineAsm.h:92
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:119
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::TargetLibraryInfo::getLibFunc
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Definition: TargetLibraryInfo.h:277
llvm::Instruction
Definition: Instruction.h:45
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:154
llvm::PPCSubtarget::isPPC64
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
Definition: PPCSubtarget.cpp:241
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1631
llvm::IRBuilderBase::CreateInsertElement
Value * CreateInsertElement(Value *Vec, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2404
llvm::ISD::FNEARBYINT
@ FNEARBYINT
Definition: ISDOpcodes.h:867
llvm::ISD::FRINT
@ FRINT
Definition: ISDOpcodes.h:866
llvm::LoopBase::getExitingBlocks
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:34
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1770
llvm::PPCTTIImpl::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: PPCTargetTransformInfo.cpp:1108
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:885
llvm::PPCTTIImpl::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
Definition: PPCTargetTransformInfo.cpp:803
llvm::CodeMetrics::collectEphemeralValues
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:70
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:26
llvm::ISD::SMULO
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:311
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
llvm::TargetTransformInfo::RGK_FixedWidthVector
@ RGK_FixedWidthVector
Definition: TargetTransformInfo.h:924
llvm::PPCTTIImpl::FPRRC
@ FPRRC
Definition: PPCTargetTransformInfo.h:91
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:154
llvm::CastInst::getSrcTy
Type * getSrcTy() const
Return the source type, as a convenience.
Definition: InstrTypes.h:684
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
llvm::BasicTTIImplBase< PPCTTIImpl >::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:931
llvm::BasicTTIImplBase< PPCTTIImpl >::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:768
llvm::None
const NoneType None
Definition: None.h:23
llvm::LinearPolySize< TypeSize >::getFixed
static TypeSize getFixed(ScalarTy MinVal)
Definition: TypeSize.h:284
llvm::IntrinsicCostAttributes
Definition: TargetTransformInfo.h:116
llvm::PPCTTIImpl::getInterleavedMemoryOpCost
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, bool UseMaskForCond=false, bool UseMaskForGaps=false)
Definition: PPCTargetTransformInfo.cpp:1189
llvm::TargetTransformInfo::PSK_Software
@ PSK_Software
Definition: TargetTransformInfo.h:586
llvm::maxnum
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1297
llvm::PPCTTIImpl::getRegisterBitWidth
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
Definition: PPCTargetTransformInfo.cpp:875
Operands
mir Rename Register Operands
Definition: MIRNamerPass.cpp:78
llvm::ARM_PROC::IE
@ IE
Definition: ARMBaseInfo.h:27
llvm::LoopBase::block_begin
block_iterator block_begin() const
Definition: LoopInfo.h:176
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:202
llvm::ScalarEvolution::getSmallConstantTripCount
unsigned getSmallConstantTripCount(const Loop *L)
Returns the maximum trip count of the loop if it is a single-exit loop and we can compute a small max...
Definition: ScalarEvolution.cpp:6908
CacheLineSize
static cl::opt< unsigned > CacheLineSize("ppc-loop-prefetch-cache-line", cl::Hidden, cl::init(64), cl::desc("The loop prefetch cache line size"))
llvm::InlineAsm
Definition: InlineAsm.h:31
llvm::ISD::STRICT_FSETCCS
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:457
llvm::cl::opt< bool >
llvm::LoopBase< BasicBlock, Loop >::block_iterator
ArrayRef< BasicBlock * >::const_iterator block_iterator
Definition: LoopInfo.h:175
llvm::TargetLoweringBase::getMinimumJumpTableEntries
virtual unsigned getMinimumJumpTableEntries() const
Return lower limit for number of blocks in a jump table.
Definition: TargetLoweringBase.cpp:1965
llvm::IRBuilderBase::CreateBitCast
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2066
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:303
llvm::PPCTTIImpl::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Definition: PPCTargetTransformInfo.cpp:1219
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
TargetSchedule.h
llvm::MVT::v16i8
@ v16i8
Definition: MachineValueType.h:77
llvm::isInt< 32 >
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:374
llvm::PPCSubtarget::POPCNTD_Unavailable
@ POPCNTD_Unavailable
Definition: PPCSubtarget.h:74
llvm::PointerType::getUnqual
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the generic address space (address sp...
Definition: DerivedTypes.h:649
llvm::isUInt< 16 >
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:409
llvm::InstCombiner::getAssumptionCache
AssumptionCache & getAssumptionCache() const
Definition: InstCombiner.h:367
llvm::TargetSchedModel
Provide an instruction scheduling machine model to CodeGen passes.
Definition: TargetSchedule.h:31
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::MVT::v2i64
@ v2i64
Definition: MachineValueType.h:109
llvm::TLSModel::LocalDynamic
@ LocalDynamic
Definition: CodeGen.h:44
llvm::TargetTransformInfo::LSRCost
Definition: TargetTransformInfo.h:409
llvm::InstCombiner::getDataLayout
const DataLayout & getDataLayout() const
Definition: InstCombiner.h:370
llvm::PPCTTIImpl::getVectorInstrCost
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: PPCTargetTransformInfo.cpp:1042
llvm::PPCSubtarget::hasP9Altivec
bool hasP9Altivec() const
Definition: PPCSubtarget.h:273
llvm::PPCTTIImpl::getPopcntSupport
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
Definition: PPCTargetTransformInfo.cpp:59
llvm::IRBuilderBase::getInt32
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:478
llvm::ISD::STRICT_LRINT
@ STRICT_LRINT
Definition: ISDOpcodes.h:413
llvm::PPC::DIR_E500mc
@ DIR_E500mc
Definition: PPCSubtarget.h:52
llvm::TargetLoweringBase::getMaxExpandSizeMemcmp
unsigned getMaxExpandSizeMemcmp(bool OptSize) const
Get maximum # of load operations permitted for memcmp.
Definition: TargetLowering.h:1586
llvm::DataLayout::isLittleEndian
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:241
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:58
llvm::PPCTTIImpl::getTgtMemIntrinsic
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)
Definition: PPCTargetTransformInfo.cpp:1264
llvm::ISD::EXTRACT_VECTOR_ELT
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:505
llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition: TargetTransformInfo.h:423
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::ISD::LRINT
@ LRINT
Definition: ISDOpcodes.h:873
llvm::TargetTransformInfo::OperandValueProperties
OperandValueProperties
Additional properties of an operand's values.
Definition: TargetTransformInfo.h:903
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:440
Metrics
Machine Trace Metrics
Definition: MachineTraceMetrics.cpp:53
llvm::Type::isHalfTy
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:142
llvm::MVT::v4f32
@ v4f32
Definition: MachineValueType.h:145
llvm::InlineAsm::ConstraintInfo
Definition: InlineAsm.h:118
llvm::ISD::STRICT_LROUND
@ STRICT_LROUND
Definition: ISDOpcodes.h:411
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MVT::Other
@ Other
Definition: MachineValueType.h:39
llvm::elfabi::ELFSymbolType::Func
@ Func
llvm::PPCTargetLowering::allowsMisalignedMemoryAccesses
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, bool *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
Definition: PPCISelLowering.cpp:15898
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
llvm::PPCTTIImpl::enableInterleavedAccessVectorization
bool enableInterleavedAccessVectorization()
Definition: PPCTargetTransformInfo.cpp:831
llvm::IRBuilderBase::CreateExtractElement
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2391
llvm::TargetTransformInfo::LSRCost::ScaleCost
unsigned ScaleCost
Definition: TargetTransformInfo.h:419
llvm::BasicTTIImplBase< PPCTTIImpl >::getInterleavedMemoryOpCost
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
Definition: BasicTTIImpl.h:1074
llvm::TargetTransformInfo::PSK_SlowHardware
@ PSK_SlowHardware
Definition: TargetTransformInfo.h:586
SmallCTRLoopThreshold
static cl::opt< unsigned > SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden, cl::desc("Loops with a constant trip count smaller than " "this value will not use the count register."))
llvm::ISD::STRICT_LLRINT
@ STRICT_LLRINT
Definition: ISDOpcodes.h:414
llvm::PPCTTIImpl::getCFInstrCost
int getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: PPCTargetTransformInfo.cpp:1005
llvm::TargetTransformInfo::OperandValueKind
OperandValueKind
Additional information about an operand's possible values.
Definition: TargetTransformInfo.h:895
llvm::PPCTTIImpl::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
Definition: PPCTargetTransformInfo.cpp:788
llvm::PPCTTIImpl::useColdCCForColdCall
bool useColdCCForColdCall(Function &F)
Definition: PPCTargetTransformInfo.cpp:811
llvm::PPC::DIR_E5500
@ DIR_E5500
Definition: PPCSubtarget.h:53
llvm::TargetTransformInfo::MemCmpExpansionOptions::MaxNumLoads
unsigned MaxNumLoads
Definition: TargetTransformInfo.h:755
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:70
llvm::TargetTransformInfo::MemCmpExpansionOptions
Returns options for expansion of memcmp. IsZeroCmp is.
Definition: TargetTransformInfo.h:750
llvm::PPCSubtarget::hasDirectMove
bool hasDirectMove() const
Definition: PPCSubtarget.h:304
llvm::TargetTransformInfo::TCC_Free
@ TCC_Free
Expected to fold away in lowering.
Definition: TargetTransformInfo.h:262
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1079
llvm::EVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:149
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::PPCTTIImpl::getIntImmCostInst
int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
Definition: PPCTargetTransformInfo.cpp:235
llvm::ISD::STRICT_FTRUNC
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:410
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:41
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
if
if(llvm_vc STREQUAL "") set(fake_version_inc "$
Definition: CMakeLists.txt:14
llvm::ISD::LLRINT
@ LLRINT
Definition: ISDOpcodes.h:874
EnablePPCColdCC
static cl::opt< bool > EnablePPCColdCC("ppc-enable-coldcc", cl::Hidden, cl::init(false), cl::desc("Enable using coldcc calling conv for cold " "internal functions"))
CostKind
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
trunc
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g trunc
Definition: README-FPStack.txt:63
llvm::PPCTTIImpl::canSaveCmp
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo)
Definition: PPCTargetTransformInfo.cpp:1224
llvm::TargetLoweringBase::InstructionOpcodeToISD
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
Definition: TargetLoweringBase.cpp:1731
llvm::CastInst
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:432
llvm::PPCTTIImpl::getRegisterClassForType
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
Definition: PPCTargetTransformInfo.cpp:846
llvm::MVT::v4i32
@ v4i32
Definition: MachineValueType.h:96
llvm::BasicTTIImplBase< PPCTTIImpl >::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
Definition: BasicTTIImpl.h:496
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:281
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:174
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::PPCTTIImpl::enableAggressiveInterleaving
bool enableAggressiveInterleaving(bool LoopHasReductions)
Definition: PPCTargetTransformInfo.cpp:815
llvm::TargetTransformInfo::MemCmpExpansionOptions::LoadSizes
SmallVector< unsigned, 8 > LoadSizes
Definition: TargetTransformInfo.h:758
llvm::TargetTransformInfoImplBase::getIntImmCost
unsigned getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const
Definition: TargetTransformInfoImpl.h:352
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:32
llvm::ISD::FSQRT
@ FSQRT
Definition: ISDOpcodes.h:853
llvm::BasicTTIImplBase< PPCTTIImpl >::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
Definition: BasicTTIImpl.h:434
llvm::MCID::Select
@ Select
Definition: MCInstrDesc.h:163
llvm::isInt< 16 >
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:371
llvm::PPCTTIImpl::getUserCost
InstructionCost getUserCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)
Definition: PPCTargetTransformInfo.cpp:321
j
return j(j<< 16)
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
llvm::minnum
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1286
llvm::ISD::STRICT_FMA
@ STRICT_FMA
Definition: ISDOpcodes.h:386
llvm::ISD::FMAXNUM
@ FMAXNUM
Definition: ISDOpcodes.h:884
llvm::Type::getInt64Ty
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:205
llvm::TargetTransformInfo::LSRCost::NumBaseAdds
unsigned NumBaseAdds
Definition: TargetTransformInfo.h:416
llvm::Type::isFloatTy
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:148
llvm::PPCSubtarget::useSoftFloat
bool useSoftFloat() const
Definition: PPCSubtarget.h:233
CostTable.h
llvm::EVT::getScalarType
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:288
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:314
llvm::ISD::FCEIL
@ FCEIL
Definition: ISDOpcodes.h:864
llvm::MVT::v8i16
@ v8i16
Definition: MachineValueType.h:87
llvm::TypeSize
Definition: TypeSize.h:417
llvm::ISD::STRICT_FCEIL
@ STRICT_FCEIL
Definition: ISDOpcodes.h:406
llvm::PPCTTIImpl::getShuffleCost
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, ArrayRef< int > Mask, int Index, Type *SubTp)
Definition: PPCTargetTransformInfo.cpp:990
llvm::TargetLoweringBase::isOperationExpand
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
Definition: TargetLowering.h:1199
llvm::PPC::DIR_PWR_FUTURE
@ DIR_PWR_FUTURE
Definition: PPCSubtarget.h:64
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::StringRef::equals_lower
LLVM_NODISCARD bool equals_lower(StringRef RHS) const
equals_lower - Check for string equality, ignoring case.
Definition: StringRef.h:197
llvm::LinearPolySize< TypeSize >::getScalable
static TypeSize getScalable(ScalarTy MinVal)
Definition: TypeSize.h:287
llvm::PPCTTIImpl::vectorCostAdjustment
InstructionCost vectorCostAdjustment(InstructionCost Cost, unsigned Opcode, Type *Ty1, Type *Ty2)
Definition: PPCTargetTransformInfo.cpp:943
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:207
llvm::Type::isDoubleTy
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:151
powi
This is blocked on not handling X *X *X powi(X, 3)(see note above). The issue is that we end up getting t
llvm::log2
static double log2(double V)
Definition: AMDGPULibCalls.cpp:841
llvm::Type::isPPC_FP128Ty
bool isPPC_FP128Ty() const
Return true if this is powerpc long double.
Definition: Type.h:160
llvm::TargetTransformInfo::LSRCost::ImmCost
unsigned ImmCost
Definition: TargetTransformInfo.h:417
llvm::TLSModel::Model
Model
Definition: CodeGen.h:42
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:184
llvm::PPCTargetMachine
Common code between 32-bit and 64-bit PowerPC targets.
Definition: PPCTargetMachine.h:25
llvm::InstCombiner
The core instruction combiner logic.
Definition: InstCombiner.h:45
PPCTargetTransformInfo.h
llvm::TargetTransformInfo::RGK_ScalableVector
@ RGK_ScalableVector
Definition: TargetTransformInfo.h:924
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:45
llvm::HardwareLoopInfo
Attributes of a target dependent hardware loop.
Definition: TargetTransformInfo.h:93
llvm::ISD::STRICT_FFLOOR
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:407
Vector
So we should use XX3Form_Rcr to implement instrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store Vector
Definition: README_P9.txt:497
llvm::PPCTTIImpl::getPrefetchDistance
unsigned getPrefetchDistance() const override
Definition: PPCTargetTransformInfo.cpp:905
llvm::TargetLoweringBase::getTypeLegalizationCost
std::pair< int, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
Definition: TargetLoweringBase.cpp:1811
llvm::PPCTTIImpl::enableMemCmpExpansion
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
Definition: PPCTargetTransformInfo.cpp:824
memAddrUsesCTR
static bool memAddrUsesCTR(const Value *MemAddr, const PPCTargetMachine &TM, SmallPtrSetImpl< const Value * > &Visited)
Definition: PPCTargetTransformInfo.cpp:340
llvm::PPCSubtarget::hasAltivec
bool hasAltivec() const
Definition: PPCSubtarget.h:263
llvm::TargetTransformInfo::RegisterKind
RegisterKind
Definition: TargetTransformInfo.h:924
llvm::LoopBase< BasicBlock, Loop >::iterator
std::vector< Loop * >::const_iterator iterator
Definition: LoopInfo.h:151
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1341
llvm::BasicTTIImplBase< PPCTTIImpl >::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
Definition: BasicTTIImpl.h:1226
llvm::PPCTTIImpl::getCacheLineSize
unsigned getCacheLineSize() const override
Definition: PPCTargetTransformInfo.cpp:888
TargetTransformInfo.h
llvm::PPCTTIImpl::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: PPCTargetTransformInfo.cpp:1029
llvm::BasicTTIImplBase< PPCTTIImpl >::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:985
llvm::ISD::STRICT_LLROUND
@ STRICT_LLROUND
Definition: ISDOpcodes.h:412
llvm::HardwareLoopInfo::canAnalyze
bool canAnalyze(LoopInfo &LI)
Definition: TargetTransformInfo.cpp:47
llvm::MemIntrinsicInfo
Information about a load/store intrinsic defined by the target.
Definition: TargetTransformInfo.h:68
llvm::ISD::LROUND
@ LROUND
Definition: ISDOpcodes.h:871
llvm::SmallPtrSetImpl< const Value * >
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1450
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::PPCSubtarget::getCPUDirective
unsigned getCPUDirective() const
getCPUDirective - Returns the -m directive specified for the cpu.
Definition: PPCSubtarget.h:193
llvm::TargetLibraryInfo::hasOptimizedCodeGen
bool hasOptimizedCodeGen(LibFunc F) const
Tests if the function is both available and a candidate for optimized code generation.
Definition: TargetLibraryInfo.h:326
llvm::PPC::DIR_PWR8
@ DIR_PWR8
Definition: PPCSubtarget.h:61
llvm::TargetTransformInfo::TCC_Basic
@ TCC_Basic
The cost of a typical 'add' instruction.
Definition: TargetTransformInfo.h:263
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3149
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
BasicTTIImpl.h
llvm::cl::desc
Definition: CommandLine.h:411
llvm::TargetLoweringBase::getValueType
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
Definition: TargetLowering.h:1382
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3005
llvm::ISD::FTRUNC
@ FTRUNC
Definition: ISDOpcodes.h:865
llvm::PPCSubtarget::POPCNTD_Slow
@ POPCNTD_Slow
Definition: PPCSubtarget.h:75
llvm::ISD::INSERT_VECTOR_ELT
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:494
llvm::HardwareLoopInfo::CountType
IntegerType * CountType
Definition: TargetTransformInfo.h:100
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::TargetTransformInfo::TCK_RecipThroughput
@ TCK_RecipThroughput
Reciprocal throughput.
Definition: TargetTransformInfo.h:212
llvm::Directive
Definition: DirectiveEmitter.h:100
llvm::Type::isFP128Ty
bool isFP128Ty() const
Return true if this is 'fp128'.
Definition: Type.h:157
Debug.h
llvm::PPC::DIR_440
@ DIR_440
Definition: PPCSubtarget.h:43
llvm::PPCTTIImpl::VRRC
@ VRRC
Definition: PPCTargetTransformInfo.h:91
llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:129
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::BasicTTIImplBase< PPCTTIImpl >::getVectorInstrCost
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: BasicTTIImpl.h:978