LLVM  9.0.0svn
AArch64TargetTransformInfo.cpp
Go to the documentation of this file.
1 //===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AArch64ExpandImm.h"
12 #include "llvm/Analysis/LoopInfo.h"
15 #include "llvm/CodeGen/CostTable.h"
17 #include "llvm/IR/IntrinsicInst.h"
18 #include "llvm/Support/Debug.h"
19 #include <algorithm>
20 using namespace llvm;
21 
22 #define DEBUG_TYPE "aarch64tti"
23 
24 static cl::opt<bool> EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix",
25  cl::init(true), cl::Hidden);
26 
28  const Function *Callee) const {
29  const TargetMachine &TM = getTLI()->getTargetMachine();
30 
31  const FeatureBitset &CallerBits =
32  TM.getSubtargetImpl(*Caller)->getFeatureBits();
33  const FeatureBitset &CalleeBits =
34  TM.getSubtargetImpl(*Callee)->getFeatureBits();
35 
36  // Inline a callee if its target-features are a subset of the callers
37  // target-features.
38  return (CallerBits & CalleeBits) == CalleeBits;
39 }
40 
41 /// Calculate the cost of materializing a 64-bit value. This helper
42 /// method might only calculate a fraction of a larger immediate. Therefore it
43 /// is valid to return a cost of ZERO.
45  // Check if the immediate can be encoded within an instruction.
46  if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, 64))
47  return 0;
48 
49  if (Val < 0)
50  Val = ~Val;
51 
52  // Calculate how many moves we will need to materialize this constant.
54  AArch64_IMM::expandMOVImm(Val, 64, Insn);
55  return Insn.size();
56 }
57 
58 /// Calculate the cost of materializing the given constant.
60  assert(Ty->isIntegerTy());
61 
62  unsigned BitSize = Ty->getPrimitiveSizeInBits();
63  if (BitSize == 0)
64  return ~0U;
65 
66  // Sign-extend all constants to a multiple of 64-bit.
67  APInt ImmVal = Imm;
68  if (BitSize & 0x3f)
69  ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
70 
71  // Split the constant into 64-bit chunks and calculate the cost for each
72  // chunk.
73  int Cost = 0;
74  for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
75  APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
76  int64_t Val = Tmp.getSExtValue();
77  Cost += getIntImmCost(Val);
78  }
79  // We need at least one instruction to materialze the constant.
80  return std::max(1, Cost);
81 }
82 
83 int AArch64TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
84  const APInt &Imm, Type *Ty) {
85  assert(Ty->isIntegerTy());
86 
87  unsigned BitSize = Ty->getPrimitiveSizeInBits();
88  // There is no cost model for constants with a bit size of 0. Return TCC_Free
89  // here, so that constant hoisting will ignore this constant.
90  if (BitSize == 0)
91  return TTI::TCC_Free;
92 
93  unsigned ImmIdx = ~0U;
94  switch (Opcode) {
95  default:
96  return TTI::TCC_Free;
97  case Instruction::GetElementPtr:
98  // Always hoist the base address of a GetElementPtr.
99  if (Idx == 0)
100  return 2 * TTI::TCC_Basic;
101  return TTI::TCC_Free;
102  case Instruction::Store:
103  ImmIdx = 0;
104  break;
105  case Instruction::Add:
106  case Instruction::Sub:
107  case Instruction::Mul:
108  case Instruction::UDiv:
109  case Instruction::SDiv:
110  case Instruction::URem:
111  case Instruction::SRem:
112  case Instruction::And:
113  case Instruction::Or:
114  case Instruction::Xor:
115  case Instruction::ICmp:
116  ImmIdx = 1;
117  break;
118  // Always return TCC_Free for the shift value of a shift instruction.
119  case Instruction::Shl:
120  case Instruction::LShr:
121  case Instruction::AShr:
122  if (Idx == 1)
123  return TTI::TCC_Free;
124  break;
125  case Instruction::Trunc:
126  case Instruction::ZExt:
127  case Instruction::SExt:
128  case Instruction::IntToPtr:
129  case Instruction::PtrToInt:
130  case Instruction::BitCast:
131  case Instruction::PHI:
132  case Instruction::Call:
133  case Instruction::Select:
134  case Instruction::Ret:
135  case Instruction::Load:
136  break;
137  }
138 
139  if (Idx == ImmIdx) {
140  int NumConstants = (BitSize + 63) / 64;
141  int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
142  return (Cost <= NumConstants * TTI::TCC_Basic)
143  ? static_cast<int>(TTI::TCC_Free)
144  : Cost;
145  }
146  return AArch64TTIImpl::getIntImmCost(Imm, Ty);
147 }
148 
150  const APInt &Imm, Type *Ty) {
151  assert(Ty->isIntegerTy());
152 
153  unsigned BitSize = Ty->getPrimitiveSizeInBits();
154  // There is no cost model for constants with a bit size of 0. Return TCC_Free
155  // here, so that constant hoisting will ignore this constant.
156  if (BitSize == 0)
157  return TTI::TCC_Free;
158 
159  switch (IID) {
160  default:
161  return TTI::TCC_Free;
162  case Intrinsic::sadd_with_overflow:
163  case Intrinsic::uadd_with_overflow:
164  case Intrinsic::ssub_with_overflow:
165  case Intrinsic::usub_with_overflow:
166  case Intrinsic::smul_with_overflow:
167  case Intrinsic::umul_with_overflow:
168  if (Idx == 1) {
169  int NumConstants = (BitSize + 63) / 64;
170  int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
171  return (Cost <= NumConstants * TTI::TCC_Basic)
172  ? static_cast<int>(TTI::TCC_Free)
173  : Cost;
174  }
175  break;
176  case Intrinsic::experimental_stackmap:
177  if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
178  return TTI::TCC_Free;
179  break;
180  case Intrinsic::experimental_patchpoint_void:
181  case Intrinsic::experimental_patchpoint_i64:
182  if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
183  return TTI::TCC_Free;
184  break;
185  }
186  return AArch64TTIImpl::getIntImmCost(Imm, Ty);
187 }
188 
191  assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
192  if (TyWidth == 32 || TyWidth == 64)
193  return TTI::PSK_FastHardware;
194  // TODO: AArch64TargetLowering::LowerCTPOP() supports 128bit popcount.
195  return TTI::PSK_Software;
196 }
197 
198 bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
200 
201  // A helper that returns a vector type from the given type. The number of
202  // elements in type Ty determine the vector width.
203  auto toVectorTy = [&](Type *ArgTy) {
204  return VectorType::get(ArgTy->getScalarType(),
205  DstTy->getVectorNumElements());
206  };
207 
208  // Exit early if DstTy is not a vector type whose elements are at least
209  // 16-bits wide.
210  if (!DstTy->isVectorTy() || DstTy->getScalarSizeInBits() < 16)
211  return false;
212 
213  // Determine if the operation has a widening variant. We consider both the
214  // "long" (e.g., usubl) and "wide" (e.g., usubw) versions of the
215  // instructions.
216  //
217  // TODO: Add additional widening operations (e.g., mul, shl, etc.) once we
218  // verify that their extending operands are eliminated during code
219  // generation.
220  switch (Opcode) {
221  case Instruction::Add: // UADDL(2), SADDL(2), UADDW(2), SADDW(2).
222  case Instruction::Sub: // USUBL(2), SSUBL(2), USUBW(2), SSUBW(2).
223  break;
224  default:
225  return false;
226  }
227 
228  // To be a widening instruction (either the "wide" or "long" versions), the
229  // second operand must be a sign- or zero extend having a single user. We
230  // only consider extends having a single user because they may otherwise not
231  // be eliminated.
232  if (Args.size() != 2 ||
233  (!isa<SExtInst>(Args[1]) && !isa<ZExtInst>(Args[1])) ||
234  !Args[1]->hasOneUse())
235  return false;
236  auto *Extend = cast<CastInst>(Args[1]);
237 
238  // Legalize the destination type and ensure it can be used in a widening
239  // operation.
240  auto DstTyL = TLI->getTypeLegalizationCost(DL, DstTy);
241  unsigned DstElTySize = DstTyL.second.getScalarSizeInBits();
242  if (!DstTyL.second.isVector() || DstElTySize != DstTy->getScalarSizeInBits())
243  return false;
244 
245  // Legalize the source type and ensure it can be used in a widening
246  // operation.
247  Type *SrcTy = toVectorTy(Extend->getSrcTy());
248  auto SrcTyL = TLI->getTypeLegalizationCost(DL, SrcTy);
249  unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
250  if (!SrcTyL.second.isVector() || SrcElTySize != SrcTy->getScalarSizeInBits())
251  return false;
252 
253  // Get the total number of vector elements in the legalized types.
254  unsigned NumDstEls = DstTyL.first * DstTyL.second.getVectorNumElements();
255  unsigned NumSrcEls = SrcTyL.first * SrcTyL.second.getVectorNumElements();
256 
257  // Return true if the legalized types have the same number of vector elements
258  // and the destination element type size is twice that of the source type.
259  return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstElTySize;
260 }
261 
262 int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
263  const Instruction *I) {
264  int ISD = TLI->InstructionOpcodeToISD(Opcode);
265  assert(ISD && "Invalid opcode");
266 
267  // If the cast is observable, and it is used by a widening instruction (e.g.,
268  // uaddl, saddw, etc.), it may be free.
269  if (I && I->hasOneUse()) {
270  auto *SingleUser = cast<Instruction>(*I->user_begin());
271  SmallVector<const Value *, 4> Operands(SingleUser->operand_values());
272  if (isWideningInstruction(Dst, SingleUser->getOpcode(), Operands)) {
273  // If the cast is the second operand, it is free. We will generate either
274  // a "wide" or "long" version of the widening instruction.
275  if (I == SingleUser->getOperand(1))
276  return 0;
277  // If the cast is not the second operand, it will be free if it looks the
278  // same as the second operand. In this case, we will generate a "long"
279  // version of the widening instruction.
280  if (auto *Cast = dyn_cast<CastInst>(SingleUser->getOperand(1)))
281  if (I->getOpcode() == unsigned(Cast->getOpcode()) &&
282  cast<CastInst>(I)->getSrcTy() == Cast->getSrcTy())
283  return 0;
284  }
285  }
286 
287  EVT SrcTy = TLI->getValueType(DL, Src);
288  EVT DstTy = TLI->getValueType(DL, Dst);
289 
290  if (!SrcTy.isSimple() || !DstTy.isSimple())
291  return BaseT::getCastInstrCost(Opcode, Dst, Src);
292 
293  static const TypeConversionCostTblEntry
294  ConversionTbl[] = {
299 
300  // The number of shll instructions for the extension.
317 
318  // LowerVectorINT_TO_FP:
325 
326  // Complex: to v2f32
333 
334  // Complex: to v4f32
339 
340  // Complex: to v8f32
345 
346  // Complex: to v16f32
349 
350  // Complex: to v2f64
357 
358 
359  // LowerVectorFP_TO_INT
366 
367  // Complex, from v2f32: legal type is v2i32 (no cost) or v2i64 (1 ext).
374 
375  // Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2
380 
381  // Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2.
388  };
389 
390  if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD,
391  DstTy.getSimpleVT(),
392  SrcTy.getSimpleVT()))
393  return Entry->Cost;
394 
395  return BaseT::getCastInstrCost(Opcode, Dst, Src);
396 }
397 
399  VectorType *VecTy,
400  unsigned Index) {
401 
402  // Make sure we were given a valid extend opcode.
403  assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
404  "Invalid opcode");
405 
406  // We are extending an element we extract from a vector, so the source type
407  // of the extend is the element type of the vector.
408  auto *Src = VecTy->getElementType();
409 
410  // Sign- and zero-extends are for integer types only.
411  assert(isa<IntegerType>(Dst) && isa<IntegerType>(Src) && "Invalid type");
412 
413  // Get the cost for the extract. We compute the cost (if any) for the extend
414  // below.
415  auto Cost = getVectorInstrCost(Instruction::ExtractElement, VecTy, Index);
416 
417  // Legalize the types.
418  auto VecLT = TLI->getTypeLegalizationCost(DL, VecTy);
419  auto DstVT = TLI->getValueType(DL, Dst);
420  auto SrcVT = TLI->getValueType(DL, Src);
421 
422  // If the resulting type is still a vector and the destination type is legal,
423  // we may get the extension for free. If not, get the default cost for the
424  // extend.
425  if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
426  return Cost + getCastInstrCost(Opcode, Dst, Src);
427 
428  // The destination type should be larger than the element type. If not, get
429  // the default cost for the extend.
430  if (DstVT.getSizeInBits() < SrcVT.getSizeInBits())
431  return Cost + getCastInstrCost(Opcode, Dst, Src);
432 
433  switch (Opcode) {
434  default:
435  llvm_unreachable("Opcode should be either SExt or ZExt");
436 
437  // For sign-extends, we only need a smov, which performs the extension
438  // automatically.
439  case Instruction::SExt:
440  return Cost;
441 
442  // For zero-extends, the extend is performed automatically by a umov unless
443  // the destination type is i64 and the element type is i8 or i16.
444  case Instruction::ZExt:
445  if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
446  return Cost;
447  }
448 
449  // If we are unable to perform the extend for free, get the default cost.
450  return Cost + getCastInstrCost(Opcode, Dst, Src);
451 }
452 
453 int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
454  unsigned Index) {
455  assert(Val->isVectorTy() && "This must be a vector type");
456 
457  if (Index != -1U) {
458  // Legalize the type.
459  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
460 
461  // This type is legalized to a scalar type.
462  if (!LT.second.isVector())
463  return 0;
464 
465  // The type may be split. Normalize the index to the new type.
466  unsigned Width = LT.second.getVectorNumElements();
467  Index = Index % Width;
468 
469  // The element at index zero is already inside the vector.
470  if (Index == 0)
471  return 0;
472  }
473 
474  // All other insert/extracts cost this much.
475  return ST->getVectorInsertExtractBaseCost();
476 }
477 
479  unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
480  TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
482  // Legalize the type.
483  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
484 
485  // If the instruction is a widening instruction (e.g., uaddl, saddw, etc.),
486  // add in the widening overhead specified by the sub-target. Since the
487  // extends feeding widening instructions are performed automatically, they
488  // aren't present in the generated code and have a zero cost. By adding a
489  // widening overhead here, we attach the total cost of the combined operation
490  // to the widening instruction.
491  int Cost = 0;
492  if (isWideningInstruction(Ty, Opcode, Args))
493  Cost += ST->getWideningBaseCost();
494 
495  int ISD = TLI->InstructionOpcodeToISD(Opcode);
496 
497  switch (ISD) {
498  default:
499  return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
500  Opd1PropInfo, Opd2PropInfo);
501  case ISD::SDIV:
503  Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
504  // On AArch64, scalar signed division by constants power-of-two are
505  // normally expanded to the sequence ADD + CMP + SELECT + SRA.
506  // The OperandValue properties many not be same as that of previous
507  // operation; conservatively assume OP_None.
508  Cost += getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info,
511  Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Opd1Info, Opd2Info,
514  Cost += getArithmeticInstrCost(Instruction::Select, Ty, Opd1Info, Opd2Info,
517  Cost += getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info, Opd2Info,
520  return Cost;
521  }
523  case ISD::UDIV:
525  auto VT = TLI->getValueType(DL, Ty);
526  if (TLI->isOperationLegalOrCustom(ISD::MULHU, VT)) {
527  // Vector signed division by constant are expanded to the
528  // sequence MULHS + ADD/SUB + SRA + SRL + ADD, and unsigned division
529  // to MULHS + SUB + SRL + ADD + SRL.
530  int MulCost = getArithmeticInstrCost(Instruction::Mul, Ty, Opd1Info,
531  Opd2Info,
534  int AddCost = getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info,
535  Opd2Info,
538  int ShrCost = getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info,
539  Opd2Info,
542  return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1;
543  }
544  }
545 
546  Cost += BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
547  Opd1PropInfo, Opd2PropInfo);
548  if (Ty->isVectorTy()) {
549  // On AArch64, vector divisions are not supported natively and are
550  // expanded into scalar divisions of each pair of elements.
551  Cost += getArithmeticInstrCost(Instruction::ExtractElement, Ty, Opd1Info,
552  Opd2Info, Opd1PropInfo, Opd2PropInfo);
553  Cost += getArithmeticInstrCost(Instruction::InsertElement, Ty, Opd1Info,
554  Opd2Info, Opd1PropInfo, Opd2PropInfo);
555  // TODO: if one of the arguments is scalar, then it's not necessary to
556  // double the cost of handling the vector elements.
557  Cost += Cost;
558  }
559  return Cost;
560 
561  case ISD::ADD:
562  case ISD::MUL:
563  case ISD::XOR:
564  case ISD::OR:
565  case ISD::AND:
566  // These nodes are marked as 'custom' for combining purposes only.
567  // We know that they are legal. See LowerAdd in ISelLowering.
568  return (Cost + 1) * LT.first;
569  }
570 }
571 
573  const SCEV *Ptr) {
574  // Address computations in vectorized code with non-consecutive addresses will
575  // likely result in more instructions compared to scalar code where the
576  // computation can more often be merged into the index mode. The resulting
577  // extra micro-ops can significantly decrease throughput.
578  unsigned NumVectorInstToHideOverhead = 10;
579  int MaxMergeDistance = 64;
580 
581  if (Ty->isVectorTy() && SE &&
582  !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
583  return NumVectorInstToHideOverhead;
584 
585  // In many cases the address computation is not merged into the instruction
586  // addressing mode.
587  return 1;
588 }
589 
590 int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
591  Type *CondTy, const Instruction *I) {
592 
593  int ISD = TLI->InstructionOpcodeToISD(Opcode);
594  // We don't lower some vector selects well that are wider than the register
595  // width.
596  if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
597  // We would need this many instructions to hide the scalarization happening.
598  const int AmortizationCost = 20;
599  static const TypeConversionCostTblEntry
600  VectorSelectTbl[] = {
604  { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
605  { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
606  { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
607  };
608 
609  EVT SelCondTy = TLI->getValueType(DL, CondTy);
610  EVT SelValTy = TLI->getValueType(DL, ValTy);
611  if (SelCondTy.isSimple() && SelValTy.isSimple()) {
612  if (const auto *Entry = ConvertCostTableLookup(VectorSelectTbl, ISD,
613  SelCondTy.getSimpleVT(),
614  SelValTy.getSimpleVT()))
615  return Entry->Cost;
616  }
617  }
618  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
619 }
620 
621 int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
622  unsigned Alignment, unsigned AddressSpace,
623  const Instruction *I) {
624  auto LT = TLI->getTypeLegalizationCost(DL, Ty);
625 
626  if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
627  LT.second.is128BitVector() && Alignment < 16) {
628  // Unaligned stores are extremely inefficient. We don't split all
629  // unaligned 128-bit stores because the negative impact that has shown in
630  // practice on inlined block copy code.
631  // We make such stores expensive so that we will only vectorize if there
632  // are 6 other instructions getting vectorized.
633  const int AmortizationCost = 6;
634 
635  return LT.first * 2 * AmortizationCost;
636  }
637 
638  if (Ty->isVectorTy() && Ty->getVectorElementType()->isIntegerTy(8)) {
639  unsigned ProfitableNumElements;
640  if (Opcode == Instruction::Store)
641  // We use a custom trunc store lowering so v.4b should be profitable.
642  ProfitableNumElements = 4;
643  else
644  // We scalarize the loads because there is not v.4b register and we
645  // have to promote the elements to v.2.
646  ProfitableNumElements = 8;
647 
648  if (Ty->getVectorNumElements() < ProfitableNumElements) {
649  unsigned NumVecElts = Ty->getVectorNumElements();
650  unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
651  // We generate 2 instructions per vector element.
652  return NumVectorizableInstsToAmortize * NumVecElts * 2;
653  }
654  }
655 
656  return LT.first;
657 }
658 
660  unsigned Factor,
661  ArrayRef<unsigned> Indices,
662  unsigned Alignment,
663  unsigned AddressSpace,
664  bool UseMaskForCond,
665  bool UseMaskForGaps) {
666  assert(Factor >= 2 && "Invalid interleave factor");
667  assert(isa<VectorType>(VecTy) && "Expect a vector type");
668 
669  if (!UseMaskForCond && !UseMaskForGaps &&
670  Factor <= TLI->getMaxSupportedInterleaveFactor()) {
671  unsigned NumElts = VecTy->getVectorNumElements();
672  auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
673 
674  // ldN/stN only support legal vector types of size 64 or 128 in bits.
675  // Accesses having vector types that are a multiple of 128 bits can be
676  // matched to more than one ldN/stN instruction.
677  if (NumElts % Factor == 0 &&
678  TLI->isLegalInterleavedAccessType(SubVecTy, DL))
679  return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL);
680  }
681 
682  return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
683  Alignment, AddressSpace,
684  UseMaskForCond, UseMaskForGaps);
685 }
686 
688  int Cost = 0;
689  for (auto *I : Tys) {
690  if (!I->isVectorTy())
691  continue;
692  if (I->getScalarSizeInBits() * I->getVectorNumElements() == 128)
693  Cost += getMemoryOpCost(Instruction::Store, I, 128, 0) +
695  }
696  return Cost;
697 }
698 
700  return ST->getMaxInterleaveFactor();
701 }
702 
703 // For Falkor, we want to avoid having too many strided loads in a loop since
704 // that can exhaust the HW prefetcher resources. We adjust the unroller
705 // MaxCount preference below to attempt to ensure unrolling doesn't create too
706 // many strided loads.
707 static void
710  enum { MaxStridedLoads = 7 };
711  auto countStridedLoads = [](Loop *L, ScalarEvolution &SE) {
712  int StridedLoads = 0;
713  // FIXME? We could make this more precise by looking at the CFG and
714  // e.g. not counting loads in each side of an if-then-else diamond.
715  for (const auto BB : L->blocks()) {
716  for (auto &I : *BB) {
717  LoadInst *LMemI = dyn_cast<LoadInst>(&I);
718  if (!LMemI)
719  continue;
720 
721  Value *PtrValue = LMemI->getPointerOperand();
722  if (L->isLoopInvariant(PtrValue))
723  continue;
724 
725  const SCEV *LSCEV = SE.getSCEV(PtrValue);
726  const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
727  if (!LSCEVAddRec || !LSCEVAddRec->isAffine())
728  continue;
729 
730  // FIXME? We could take pairing of unrolled load copies into account
731  // by looking at the AddRec, but we would probably have to limit this
732  // to loops with no stores or other memory optimization barriers.
733  ++StridedLoads;
734  // We've seen enough strided loads that seeing more won't make a
735  // difference.
736  if (StridedLoads > MaxStridedLoads / 2)
737  return StridedLoads;
738  }
739  }
740  return StridedLoads;
741  };
742 
743  int StridedLoads = countStridedLoads(L, SE);
744  LLVM_DEBUG(dbgs() << "falkor-hwpf: detected " << StridedLoads
745  << " strided loads\n");
746  // Pick the largest power of 2 unroll count that won't result in too many
747  // strided loads.
748  if (StridedLoads) {
749  UP.MaxCount = 1 << Log2_32(MaxStridedLoads / StridedLoads);
750  LLVM_DEBUG(dbgs() << "falkor-hwpf: setting unroll MaxCount to "
751  << UP.MaxCount << '\n');
752  }
753 }
754 
757  // Enable partial unrolling and runtime unrolling.
759 
760  // For inner loop, it is more likely to be a hot one, and the runtime check
761  // can be promoted out from LICM pass, so the overhead is less, let's try
762  // a larger threshold to unroll more loops.
763  if (L->getLoopDepth() > 1)
764  UP.PartialThreshold *= 2;
765 
766  // Disable partial & runtime unrolling on -Os.
768 
772 }
773 
775  Type *ExpectedType) {
776  switch (Inst->getIntrinsicID()) {
777  default:
778  return nullptr;
779  case Intrinsic::aarch64_neon_st2:
780  case Intrinsic::aarch64_neon_st3:
781  case Intrinsic::aarch64_neon_st4: {
782  // Create a struct type
783  StructType *ST = dyn_cast<StructType>(ExpectedType);
784  if (!ST)
785  return nullptr;
786  unsigned NumElts = Inst->getNumArgOperands() - 1;
787  if (ST->getNumElements() != NumElts)
788  return nullptr;
789  for (unsigned i = 0, e = NumElts; i != e; ++i) {
790  if (Inst->getArgOperand(i)->getType() != ST->getElementType(i))
791  return nullptr;
792  }
793  Value *Res = UndefValue::get(ExpectedType);
794  IRBuilder<> Builder(Inst);
795  for (unsigned i = 0, e = NumElts; i != e; ++i) {
796  Value *L = Inst->getArgOperand(i);
797  Res = Builder.CreateInsertValue(Res, L, i);
798  }
799  return Res;
800  }
801  case Intrinsic::aarch64_neon_ld2:
802  case Intrinsic::aarch64_neon_ld3:
803  case Intrinsic::aarch64_neon_ld4:
804  if (Inst->getType() == ExpectedType)
805  return Inst;
806  return nullptr;
807  }
808 }
809 
812  switch (Inst->getIntrinsicID()) {
813  default:
814  break;
815  case Intrinsic::aarch64_neon_ld2:
816  case Intrinsic::aarch64_neon_ld3:
817  case Intrinsic::aarch64_neon_ld4:
818  Info.ReadMem = true;
819  Info.WriteMem = false;
820  Info.PtrVal = Inst->getArgOperand(0);
821  break;
822  case Intrinsic::aarch64_neon_st2:
823  case Intrinsic::aarch64_neon_st3:
824  case Intrinsic::aarch64_neon_st4:
825  Info.ReadMem = false;
826  Info.WriteMem = true;
827  Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1);
828  break;
829  }
830 
831  switch (Inst->getIntrinsicID()) {
832  default:
833  return false;
834  case Intrinsic::aarch64_neon_ld2:
835  case Intrinsic::aarch64_neon_st2:
836  Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
837  break;
838  case Intrinsic::aarch64_neon_ld3:
839  case Intrinsic::aarch64_neon_st3:
840  Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
841  break;
842  case Intrinsic::aarch64_neon_ld4:
843  case Intrinsic::aarch64_neon_st4:
844  Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
845  break;
846  }
847  return true;
848 }
849 
850 /// See if \p I should be considered for address type promotion. We check if \p
851 /// I is a sext with right type and used in memory accesses. If it used in a
852 /// "complex" getelementptr, we allow it to be promoted without finding other
853 /// sext instructions that sign extended the same initial value. A getelementptr
854 /// is considered as "complex" if it has more than 2 operands.
856  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) {
857  bool Considerable = false;
858  AllowPromotionWithoutCommonHeader = false;
859  if (!isa<SExtInst>(&I))
860  return false;
861  Type *ConsideredSExtType =
863  if (I.getType() != ConsideredSExtType)
864  return false;
865  // See if the sext is the one with the right type and used in at least one
866  // GetElementPtrInst.
867  for (const User *U : I.users()) {
868  if (const GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(U)) {
869  Considerable = true;
870  // A getelementptr is considered as "complex" if it has more than 2
871  // operands. We will promote a SExt used in such complex GEP as we
872  // expect some computation to be merged if they are done on 64 bits.
873  if (GEPInst->getNumOperands() > 2) {
874  AllowPromotionWithoutCommonHeader = true;
875  break;
876  }
877  }
878  }
879  return Considerable;
880 }
881 
883  return ST->getCacheLineSize();
884 }
885 
887  return ST->getPrefetchDistance();
888 }
889 
891  return ST->getMinPrefetchStride();
892 }
893 
895  return ST->getMaxPrefetchIterationsAhead();
896 }
897 
898 bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
899  TTI::ReductionFlags Flags) const {
900  assert(isa<VectorType>(Ty) && "Expected Ty to be a vector type");
901  unsigned ScalarBits = Ty->getScalarSizeInBits();
902  switch (Opcode) {
903  case Instruction::FAdd:
904  case Instruction::FMul:
905  case Instruction::And:
906  case Instruction::Or:
907  case Instruction::Xor:
908  case Instruction::Mul:
909  return false;
910  case Instruction::Add:
911  return ScalarBits * Ty->getVectorNumElements() >= 128;
912  case Instruction::ICmp:
913  return (ScalarBits < 64) &&
914  (ScalarBits * Ty->getVectorNumElements() >= 128);
915  case Instruction::FCmp:
916  return Flags.NoNaN;
917  default:
918  llvm_unreachable("Unhandled reduction opcode");
919  }
920  return false;
921 }
922 
924  bool IsPairwiseForm) {
925 
926  if (IsPairwiseForm)
927  return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm);
928 
929  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
930  MVT MTy = LT.second;
931  int ISD = TLI->InstructionOpcodeToISD(Opcode);
932  assert(ISD && "Invalid opcode");
933 
934  // Horizontal adds can use the 'addv' instruction. We model the cost of these
935  // instructions as normal vector adds. This is the only arithmetic vector
936  // reduction operation for which we have an instruction.
937  static const CostTblEntry CostTblNoPairwise[]{
938  {ISD::ADD, MVT::v8i8, 1},
939  {ISD::ADD, MVT::v16i8, 1},
940  {ISD::ADD, MVT::v4i16, 1},
941  {ISD::ADD, MVT::v8i16, 1},
942  {ISD::ADD, MVT::v4i32, 1},
943  };
944 
945  if (const auto *Entry = CostTableLookup(CostTblNoPairwise, ISD, MTy))
946  return LT.first * Entry->Cost;
947 
948  return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm);
949 }
950 
952  Type *SubTp) {
953  if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose ||
954  Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc) {
955  static const CostTblEntry ShuffleTbl[] = {
956  // Broadcast shuffle kinds can be performed with 'dup'.
967  // Transpose shuffle kinds can be performed with 'trn1/trn2' and
968  // 'zip1/zip2' instructions.
979  // Select shuffle kinds.
980  // TODO: handle vXi8/vXi16.
981  { TTI::SK_Select, MVT::v2i32, 1 }, // mov.
982  { TTI::SK_Select, MVT::v4i32, 2 }, // rev+trn (or similar).
983  { TTI::SK_Select, MVT::v2i64, 1 }, // mov.
984  { TTI::SK_Select, MVT::v2f32, 1 }, // mov.
985  { TTI::SK_Select, MVT::v4f32, 2 }, // rev+trn (or similar).
986  { TTI::SK_Select, MVT::v2f64, 1 }, // mov.
987  // PermuteSingleSrc shuffle kinds.
988  // TODO: handle vXi8/vXi16.
989  { TTI::SK_PermuteSingleSrc, MVT::v2i32, 1 }, // mov.
990  { TTI::SK_PermuteSingleSrc, MVT::v4i32, 3 }, // perfectshuffle worst case.
991  { TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // mov.
992  { TTI::SK_PermuteSingleSrc, MVT::v2f32, 1 }, // mov.
993  { TTI::SK_PermuteSingleSrc, MVT::v4f32, 3 }, // perfectshuffle worst case.
994  { TTI::SK_PermuteSingleSrc, MVT::v2f64, 1 }, // mov.
995  };
996  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
997  if (const auto *Entry = CostTableLookup(ShuffleTbl, Kind, LT.second))
998  return LT.first * Entry->Cost;
999  }
1000 
1001  return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
1002 }
Type * getVectorElementType() const
Definition: Type.h:370
static cl::opt< bool > EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix", cl::init(true), cl::Hidden)
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >())
Definition: BasicTTIImpl.h:567
bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, int64_t MergeDistance)
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:833
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
bool isMisaligned128StoreSlow() const
This class represents lattice values for constants.
Definition: AllocatorList.h:23
Type * getElementType(unsigned N) const
Definition: DerivedTypes.h:345
Cost tables and simple lookup functions.
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
unsigned getLoopDepth() const
Return the nesting level of this loop.
Definition: LoopInfo.h:92
unsigned getNumElements() const
Random access to the elements.
Definition: DerivedTypes.h:344
The main scalar evolution driver.
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold, but used for partial/runtime unrolling (set to UINT_MAX to disable).
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
Type Conversion Cost Table.
Definition: CostTable.h:44
An instruction for reading from memory.
Definition: Instructions.h:167
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:176
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
unsigned getMaxInterleaveFactor(unsigned VF)
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
static void getFalkorUnrollingPreferences(Loop *L, ScalarEvolution &SE, TargetTransformInfo::UnrollingPreferences &UP)
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>())
Cost Table Entry.
Definition: CostTable.h:24
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1508
int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr)
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1218
unsigned getCacheLineSize() const
const FeatureBitset & getFeatureBits() const
Class to represent struct types.
Definition: DerivedTypes.h:232
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:196
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:742
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise)
Try to calculate arithmetic and shuffle op costs for reduction operations.
const TypeConversionCostTblEntry * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntry > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table, TypeTy must be comparable to CompareTy by ==. ...
Definition: CostTable.h:54
bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL) const
Returns true if VecTy is a legal interleaved access type.
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I)
Definition: BasicTTIImpl.h:771
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1574
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:633
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:497
This node represents a polynomial recurrence on the trip count of the specified loop.
PopcntSupportKind
Flags indicating the kind of support for population count.
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:883
unsigned getPrefetchDistance() const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
Selects elements from the corresponding lane of either source operand.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
unsigned getWideningBaseCost() const
Value * getOperand(unsigned i) const
Definition: User.h:169
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:543
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:303
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I=nullptr)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:873
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
Definition: BasicTTIImpl.h:423
bool useReductionIntrinsic(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType)
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
Container class for subtarget features.
int getIntImmCost(int64_t Val)
Calculate the cost of materializing a 64-bit value.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
Machine Value Type.
Flags describing the kind of vector reduction.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false)
Definition: BasicTTIImpl.h:849
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)
See if I should be considered for address type promotion.
Expected to fold away in lowering.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
Value * getPointerOperand()
Definition: Instructions.h:284
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I=nullptr)
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false)
This file a TargetTransformInfo::Concept conforming object specific to the AArch64 target machine...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:196
Extended Value Type.
Definition: ValueTypes.h:33
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1424
size_t size() const
Definition: SmallVector.h:52
const TargetMachine & getTargetMachine() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:59
OperandValueProperties
Additional properties of an operand&#39;s values.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:50
unsigned getMinPrefetchStride() const
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
Definition: BasicTTIImpl.h:614
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition: APInt.h:946
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:129
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
int getCostOfKeepingLiveOverCall(ArrayRef< Type *> Tys)
AddressSpace
Definition: NVPTXBaseInfo.h:21
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target&#39;s TargetSubtargetInf...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:538
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:493
Class to represent vector types.
Definition: DerivedTypes.h:424
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Class for arbitrary precision integers.
Definition: APInt.h:69
unsigned getMaxInterleaveFactor() const
int getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm)
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:439
iterator_range< user_iterator > users()
Definition: Value.h:399
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:487
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
const CostTblEntry * CostTableLookup(ArrayRef< CostTblEntry > Tbl, int ISD, MVT Ty)
Find in cost table, TypeTy must be comparable to CompareTy by ==.
Definition: CostTable.h:31
unsigned getNumArgOperands() const
Definition: InstrTypes.h:1216
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:406
This class represents an analyzed expression in the program.
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:465
static bool hasOneUse(unsigned Reg, MachineInstr *Def, MachineRegisterInfo &MRI, MachineDominatorTree &MDT, LiveIntervals &LIS)
Parameters that control the generic loop unrolling transformation.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
ARMProcFamilyEnum getProcFamily() const
Returns ARM processor family.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:375
The cost of a typical &#39;add&#39; instruction.
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:114
unsigned getMaxPrefetchIterationsAhead() const
LLVM Value Representation.
Definition: Value.h:72
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:605
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:250
Broadcast element 0 to all other elements.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:65
Type * getElementType() const
Definition: DerivedTypes.h:391
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr)
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:412
OperandValueKind
Additional information about an operand&#39;s possible values.
This pass exposes codegen information to IR-level passes.
Conversion operators.
Definition: ISDOpcodes.h:484
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2127
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:493
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:125
#define LLVM_DEBUG(X)
Definition: Debug.h:122
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:156
Information about a load/store intrinsic defined by the target.
bool NoNaN
If op is an fp min/max, whether NaNs may be present.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::pair< int, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
This file describes how to lower LLVM code to machine code.
const BasicBlock * getParent() const
Definition: Instruction.h:66
unsigned getVectorInsertExtractBaseCost() const
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Shuffle elements of single source vector with any shuffle mask.
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:399