LLVM  13.0.0git
AArch64TargetTransformInfo.cpp
Go to the documentation of this file.
1 //===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include "AArch64ExpandImm.h"
12 #include "llvm/Analysis/LoopInfo.h"
15 #include "llvm/CodeGen/CostTable.h"
17 #include "llvm/IR/IntrinsicInst.h"
18 #include "llvm/IR/IntrinsicsAArch64.h"
19 #include "llvm/IR/PatternMatch.h"
20 #include "llvm/Support/Debug.h"
21 #include <algorithm>
22 using namespace llvm;
23 using namespace llvm::PatternMatch;
24 
25 #define DEBUG_TYPE "aarch64tti"
26 
27 static cl::opt<bool> EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix",
28  cl::init(true), cl::Hidden);
29 
31  const Function *Callee) const {
32  const TargetMachine &TM = getTLI()->getTargetMachine();
33 
34  const FeatureBitset &CallerBits =
35  TM.getSubtargetImpl(*Caller)->getFeatureBits();
36  const FeatureBitset &CalleeBits =
37  TM.getSubtargetImpl(*Callee)->getFeatureBits();
38 
39  // Inline a callee if its target-features are a subset of the callers
40  // target-features.
41  return (CallerBits & CalleeBits) == CalleeBits;
42 }
43 
44 /// Calculate the cost of materializing a 64-bit value. This helper
45 /// method might only calculate a fraction of a larger immediate. Therefore it
46 /// is valid to return a cost of ZERO.
48  // Check if the immediate can be encoded within an instruction.
49  if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, 64))
50  return 0;
51 
52  if (Val < 0)
53  Val = ~Val;
54 
55  // Calculate how many moves we will need to materialize this constant.
57  AArch64_IMM::expandMOVImm(Val, 64, Insn);
58  return Insn.size();
59 }
60 
61 /// Calculate the cost of materializing the given constant.
64  assert(Ty->isIntegerTy());
65 
66  unsigned BitSize = Ty->getPrimitiveSizeInBits();
67  if (BitSize == 0)
68  return ~0U;
69 
70  // Sign-extend all constants to a multiple of 64-bit.
71  APInt ImmVal = Imm;
72  if (BitSize & 0x3f)
73  ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
74 
75  // Split the constant into 64-bit chunks and calculate the cost for each
76  // chunk.
77  int Cost = 0;
78  for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
79  APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
80  int64_t Val = Tmp.getSExtValue();
81  Cost += getIntImmCost(Val);
82  }
83  // We need at least one instruction to materialze the constant.
84  return std::max(1, Cost);
85 }
86 
87 int AArch64TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
88  const APInt &Imm, Type *Ty,
90  Instruction *Inst) {
91  assert(Ty->isIntegerTy());
92 
93  unsigned BitSize = Ty->getPrimitiveSizeInBits();
94  // There is no cost model for constants with a bit size of 0. Return TCC_Free
95  // here, so that constant hoisting will ignore this constant.
96  if (BitSize == 0)
97  return TTI::TCC_Free;
98 
99  unsigned ImmIdx = ~0U;
100  switch (Opcode) {
101  default:
102  return TTI::TCC_Free;
103  case Instruction::GetElementPtr:
104  // Always hoist the base address of a GetElementPtr.
105  if (Idx == 0)
106  return 2 * TTI::TCC_Basic;
107  return TTI::TCC_Free;
108  case Instruction::Store:
109  ImmIdx = 0;
110  break;
111  case Instruction::Add:
112  case Instruction::Sub:
113  case Instruction::Mul:
114  case Instruction::UDiv:
115  case Instruction::SDiv:
116  case Instruction::URem:
117  case Instruction::SRem:
118  case Instruction::And:
119  case Instruction::Or:
120  case Instruction::Xor:
121  case Instruction::ICmp:
122  ImmIdx = 1;
123  break;
124  // Always return TCC_Free for the shift value of a shift instruction.
125  case Instruction::Shl:
126  case Instruction::LShr:
127  case Instruction::AShr:
128  if (Idx == 1)
129  return TTI::TCC_Free;
130  break;
131  case Instruction::Trunc:
132  case Instruction::ZExt:
133  case Instruction::SExt:
134  case Instruction::IntToPtr:
135  case Instruction::PtrToInt:
136  case Instruction::BitCast:
137  case Instruction::PHI:
138  case Instruction::Call:
139  case Instruction::Select:
140  case Instruction::Ret:
141  case Instruction::Load:
142  break;
143  }
144 
145  if (Idx == ImmIdx) {
146  int NumConstants = (BitSize + 63) / 64;
147  int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind);
148  return (Cost <= NumConstants * TTI::TCC_Basic)
149  ? static_cast<int>(TTI::TCC_Free)
150  : Cost;
151  }
152  return AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind);
153 }
154 
156  const APInt &Imm, Type *Ty,
158  assert(Ty->isIntegerTy());
159 
160  unsigned BitSize = Ty->getPrimitiveSizeInBits();
161  // There is no cost model for constants with a bit size of 0. Return TCC_Free
162  // here, so that constant hoisting will ignore this constant.
163  if (BitSize == 0)
164  return TTI::TCC_Free;
165 
166  // Most (all?) AArch64 intrinsics do not support folding immediates into the
167  // selected instruction, so we compute the materialization cost for the
168  // immediate directly.
169  if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
170  return AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind);
171 
172  switch (IID) {
173  default:
174  return TTI::TCC_Free;
175  case Intrinsic::sadd_with_overflow:
176  case Intrinsic::uadd_with_overflow:
177  case Intrinsic::ssub_with_overflow:
178  case Intrinsic::usub_with_overflow:
179  case Intrinsic::smul_with_overflow:
180  case Intrinsic::umul_with_overflow:
181  if (Idx == 1) {
182  int NumConstants = (BitSize + 63) / 64;
183  int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind);
184  return (Cost <= NumConstants * TTI::TCC_Basic)
185  ? static_cast<int>(TTI::TCC_Free)
186  : Cost;
187  }
188  break;
189  case Intrinsic::experimental_stackmap:
190  if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
191  return TTI::TCC_Free;
192  break;
193  case Intrinsic::experimental_patchpoint_void:
194  case Intrinsic::experimental_patchpoint_i64:
195  if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
196  return TTI::TCC_Free;
197  break;
198  case Intrinsic::experimental_gc_statepoint:
199  if ((Idx < 5) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
200  return TTI::TCC_Free;
201  break;
202  }
203  return AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind);
204 }
205 
208  assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
209  if (TyWidth == 32 || TyWidth == 64)
210  return TTI::PSK_FastHardware;
211  // TODO: AArch64TargetLowering::LowerCTPOP() supports 128bit popcount.
212  return TTI::PSK_Software;
213 }
214 
218  auto *RetTy = ICA.getReturnType();
219  switch (ICA.getID()) {
220  case Intrinsic::umin:
221  case Intrinsic::umax: {
222  auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
223  // umin(x,y) -> sub(x,usubsat(x,y))
224  // umax(x,y) -> add(x,usubsat(y,x))
225  if (LT.second == MVT::v2i64)
226  return LT.first * 2;
228  }
229  case Intrinsic::smin:
230  case Intrinsic::smax: {
231  static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
233  auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
234  if (any_of(ValidMinMaxTys, [&LT](MVT M) { return M == LT.second; }))
235  return LT.first;
236  break;
237  }
238  case Intrinsic::sadd_sat:
239  case Intrinsic::ssub_sat:
240  case Intrinsic::uadd_sat:
241  case Intrinsic::usub_sat: {
242  static const auto ValidSatTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
244  MVT::v2i64};
245  auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
246  // This is a base cost of 1 for the vadd, plus 3 extract shifts if we
247  // need to extend the type, as it uses shr(qadd(shl, shl)).
248  unsigned Instrs =
249  LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits() ? 1 : 4;
250  if (any_of(ValidSatTys, [&LT](MVT M) { return M == LT.second; }))
251  return LT.first * Instrs;
252  break;
253  }
254  case Intrinsic::abs: {
255  static const auto ValidAbsTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
257  MVT::v2i64};
258  auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
259  if (any_of(ValidAbsTys, [&LT](MVT M) { return M == LT.second; }))
260  return LT.first;
261  break;
262  }
263  case Intrinsic::experimental_stepvector: {
264  unsigned Cost = 1; // Cost of the `index' instruction
265  auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
266  // Legalisation of illegal vectors involves an `index' instruction plus
267  // (LT.first - 1) vector adds.
268  if (LT.first > 1) {
269  Type *LegalVTy = EVT(LT.second).getTypeForEVT(RetTy->getContext());
270  unsigned AddCost =
271  getArithmeticInstrCost(Instruction::Add, LegalVTy, CostKind);
272  Cost += AddCost * (LT.first - 1);
273  }
274  return Cost;
275  }
276  default:
277  break;
278  }
279  return BaseT::getIntrinsicInstrCost(ICA, CostKind);
280 }
281 
282 bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
284 
285  // A helper that returns a vector type from the given type. The number of
286  // elements in type Ty determine the vector width.
287  auto toVectorTy = [&](Type *ArgTy) {
288  return VectorType::get(ArgTy->getScalarType(),
289  cast<VectorType>(DstTy)->getElementCount());
290  };
291 
292  // Exit early if DstTy is not a vector type whose elements are at least
293  // 16-bits wide.
294  if (!DstTy->isVectorTy() || DstTy->getScalarSizeInBits() < 16)
295  return false;
296 
297  // Determine if the operation has a widening variant. We consider both the
298  // "long" (e.g., usubl) and "wide" (e.g., usubw) versions of the
299  // instructions.
300  //
301  // TODO: Add additional widening operations (e.g., mul, shl, etc.) once we
302  // verify that their extending operands are eliminated during code
303  // generation.
304  switch (Opcode) {
305  case Instruction::Add: // UADDL(2), SADDL(2), UADDW(2), SADDW(2).
306  case Instruction::Sub: // USUBL(2), SSUBL(2), USUBW(2), SSUBW(2).
307  break;
308  default:
309  return false;
310  }
311 
312  // To be a widening instruction (either the "wide" or "long" versions), the
313  // second operand must be a sign- or zero extend having a single user. We
314  // only consider extends having a single user because they may otherwise not
315  // be eliminated.
316  if (Args.size() != 2 ||
317  (!isa<SExtInst>(Args[1]) && !isa<ZExtInst>(Args[1])) ||
318  !Args[1]->hasOneUse())
319  return false;
320  auto *Extend = cast<CastInst>(Args[1]);
321 
322  // Legalize the destination type and ensure it can be used in a widening
323  // operation.
324  auto DstTyL = TLI->getTypeLegalizationCost(DL, DstTy);
325  unsigned DstElTySize = DstTyL.second.getScalarSizeInBits();
326  if (!DstTyL.second.isVector() || DstElTySize != DstTy->getScalarSizeInBits())
327  return false;
328 
329  // Legalize the source type and ensure it can be used in a widening
330  // operation.
331  auto *SrcTy = toVectorTy(Extend->getSrcTy());
332  auto SrcTyL = TLI->getTypeLegalizationCost(DL, SrcTy);
333  unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
334  if (!SrcTyL.second.isVector() || SrcElTySize != SrcTy->getScalarSizeInBits())
335  return false;
336 
337  // Get the total number of vector elements in the legalized types.
338  unsigned NumDstEls = DstTyL.first * DstTyL.second.getVectorMinNumElements();
339  unsigned NumSrcEls = SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
340 
341  // Return true if the legalized types have the same number of vector elements
342  // and the destination element type size is twice that of the source type.
343  return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstElTySize;
344 }
345 
347  Type *Src,
350  const Instruction *I) {
351  int ISD = TLI->InstructionOpcodeToISD(Opcode);
352  assert(ISD && "Invalid opcode");
353 
354  // If the cast is observable, and it is used by a widening instruction (e.g.,
355  // uaddl, saddw, etc.), it may be free.
356  if (I && I->hasOneUse()) {
357  auto *SingleUser = cast<Instruction>(*I->user_begin());
358  SmallVector<const Value *, 4> Operands(SingleUser->operand_values());
359  if (isWideningInstruction(Dst, SingleUser->getOpcode(), Operands)) {
360  // If the cast is the second operand, it is free. We will generate either
361  // a "wide" or "long" version of the widening instruction.
362  if (I == SingleUser->getOperand(1))
363  return 0;
364  // If the cast is not the second operand, it will be free if it looks the
365  // same as the second operand. In this case, we will generate a "long"
366  // version of the widening instruction.
367  if (auto *Cast = dyn_cast<CastInst>(SingleUser->getOperand(1)))
368  if (I->getOpcode() == unsigned(Cast->getOpcode()) &&
369  cast<CastInst>(I)->getSrcTy() == Cast->getSrcTy())
370  return 0;
371  }
372  }
373 
374  // TODO: Allow non-throughput costs that aren't binary.
375  auto AdjustCost = [&CostKind](InstructionCost Cost) -> InstructionCost {
377  return Cost == 0 ? 0 : 1;
378  return Cost;
379  };
380 
381  EVT SrcTy = TLI->getValueType(DL, Src);
382  EVT DstTy = TLI->getValueType(DL, Dst);
383 
384  if (!SrcTy.isSimple() || !DstTy.isSimple())
385  return AdjustCost(
386  BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));
387 
388  static const TypeConversionCostTblEntry
389  ConversionTbl[] = {
394 
395  // Truncations on nxvmiN
411 
412  // The number of shll instructions for the extension.
429 
430  // LowerVectorINT_TO_FP:
437 
438  // Complex: to v2f32
445 
446  // Complex: to v4f32
451 
452  // Complex: to v8f32
457 
458  // Complex: to v16f32
461 
462  // Complex: to v2f64
469 
470 
471  // LowerVectorFP_TO_INT
478 
479  // Complex, from v2f32: legal type is v2i32 (no cost) or v2i64 (1 ext).
486 
487  // Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2
492 
493  // Lowering scalable
500 
501 
502  // Complex, from nxv2f32 legal type is nxv2i32 (no cost) or nxv2i64 (1 ext)
509 
510  // Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2.
517 
518  // Complex, from nxv2f64: legal type is nxv2i32, 1 narrowing => ~2.
525 
526  // Complex, from nxv4f32 legal type is nxv4i16, 1 narrowing => ~2
531 
532  // Complex, from nxv8f64: legal type is nxv8i32, 1 narrowing => ~2.
539 
540  // Complex, from nxv4f64: legal type is nxv4i32, 1 narrowing => ~2.
547 
548  // Complex, from nxv8f32: legal type is nxv8i32 (no cost) or nxv8i64 (1 ext).
555 
556  // Truncate from nxvmf32 to nxvmf16.
560 
561  // Truncate from nxvmf64 to nxvmf16.
565 
566  // Truncate from nxvmf64 to nxvmf32.
570 
571  // Extend from nxvmf16 to nxvmf32.
575 
576  // Extend from nxvmf16 to nxvmf64.
580 
581  // Extend from nxvmf32 to nxvmf64.
585 
586  };
587 
588  if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD,
589  DstTy.getSimpleVT(),
590  SrcTy.getSimpleVT()))
591  return AdjustCost(Entry->Cost);
592 
593  return AdjustCost(
594  BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));
595 }
596 
598  Type *Dst,
599  VectorType *VecTy,
600  unsigned Index) {
601 
602  // Make sure we were given a valid extend opcode.
603  assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
604  "Invalid opcode");
605 
606  // We are extending an element we extract from a vector, so the source type
607  // of the extend is the element type of the vector.
608  auto *Src = VecTy->getElementType();
609 
610  // Sign- and zero-extends are for integer types only.
611  assert(isa<IntegerType>(Dst) && isa<IntegerType>(Src) && "Invalid type");
612 
613  // Get the cost for the extract. We compute the cost (if any) for the extend
614  // below.
615  InstructionCost Cost =
616  getVectorInstrCost(Instruction::ExtractElement, VecTy, Index);
617 
618  // Legalize the types.
619  auto VecLT = TLI->getTypeLegalizationCost(DL, VecTy);
620  auto DstVT = TLI->getValueType(DL, Dst);
621  auto SrcVT = TLI->getValueType(DL, Src);
623 
624  // If the resulting type is still a vector and the destination type is legal,
625  // we may get the extension for free. If not, get the default cost for the
626  // extend.
627  if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
628  return Cost + getCastInstrCost(Opcode, Dst, Src, TTI::CastContextHint::None,
629  CostKind);
630 
631  // The destination type should be larger than the element type. If not, get
632  // the default cost for the extend.
633  if (DstVT.getFixedSizeInBits() < SrcVT.getFixedSizeInBits())
634  return Cost + getCastInstrCost(Opcode, Dst, Src, TTI::CastContextHint::None,
635  CostKind);
636 
637  switch (Opcode) {
638  default:
639  llvm_unreachable("Opcode should be either SExt or ZExt");
640 
641  // For sign-extends, we only need a smov, which performs the extension
642  // automatically.
643  case Instruction::SExt:
644  return Cost;
645 
646  // For zero-extends, the extend is performed automatically by a umov unless
647  // the destination type is i64 and the element type is i8 or i16.
648  case Instruction::ZExt:
649  if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
650  return Cost;
651  }
652 
653  // If we are unable to perform the extend for free, get the default cost.
654  return Cost + getCastInstrCost(Opcode, Dst, Src, TTI::CastContextHint::None,
655  CostKind);
656 }
657 
658 unsigned AArch64TTIImpl::getCFInstrCost(unsigned Opcode,
660  const Instruction *I) {
662  return Opcode == Instruction::PHI ? 0 : 1;
663  assert(CostKind == TTI::TCK_RecipThroughput && "unexpected CostKind");
664  // Branches are assumed to be predicted.
665  return 0;
666 }
667 
668 int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
669  unsigned Index) {
670  assert(Val->isVectorTy() && "This must be a vector type");
671 
672  if (Index != -1U) {
673  // Legalize the type.
674  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
675 
676  // This type is legalized to a scalar type.
677  if (!LT.second.isVector())
678  return 0;
679 
680  // The type may be split. Normalize the index to the new type.
681  unsigned Width = LT.second.getVectorNumElements();
682  Index = Index % Width;
683 
684  // The element at index zero is already inside the vector.
685  if (Index == 0)
686  return 0;
687  }
688 
689  // All other insert/extracts cost this much.
690  return ST->getVectorInsertExtractBaseCost();
691 }
692 
694  unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
695  TTI::OperandValueKind Opd1Info,
696  TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
698  const Instruction *CxtI) {
699  // TODO: Handle more cost kinds.
701  return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
702  Opd2Info, Opd1PropInfo,
703  Opd2PropInfo, Args, CxtI);
704 
705  // Legalize the type.
706  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
707 
708  // If the instruction is a widening instruction (e.g., uaddl, saddw, etc.),
709  // add in the widening overhead specified by the sub-target. Since the
710  // extends feeding widening instructions are performed automatically, they
711  // aren't present in the generated code and have a zero cost. By adding a
712  // widening overhead here, we attach the total cost of the combined operation
713  // to the widening instruction.
714  int Cost = 0;
715  if (isWideningInstruction(Ty, Opcode, Args))
716  Cost += ST->getWideningBaseCost();
717 
718  int ISD = TLI->InstructionOpcodeToISD(Opcode);
719 
720  switch (ISD) {
721  default:
722  return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
723  Opd2Info,
724  Opd1PropInfo, Opd2PropInfo);
725  case ISD::SDIV:
727  Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
728  // On AArch64, scalar signed division by constants power-of-two are
729  // normally expanded to the sequence ADD + CMP + SELECT + SRA.
730  // The OperandValue properties many not be same as that of previous
731  // operation; conservatively assume OP_None.
732  Cost += getArithmeticInstrCost(Instruction::Add, Ty, CostKind,
733  Opd1Info, Opd2Info,
736  Cost += getArithmeticInstrCost(Instruction::Sub, Ty, CostKind,
737  Opd1Info, Opd2Info,
740  Cost += getArithmeticInstrCost(Instruction::Select, Ty, CostKind,
741  Opd1Info, Opd2Info,
744  Cost += getArithmeticInstrCost(Instruction::AShr, Ty, CostKind,
745  Opd1Info, Opd2Info,
748  return Cost;
749  }
751  case ISD::UDIV:
753  auto VT = TLI->getValueType(DL, Ty);
754  if (TLI->isOperationLegalOrCustom(ISD::MULHU, VT)) {
755  // Vector signed division by constant are expanded to the
756  // sequence MULHS + ADD/SUB + SRA + SRL + ADD, and unsigned division
757  // to MULHS + SUB + SRL + ADD + SRL.
758  int MulCost = getArithmeticInstrCost(Instruction::Mul, Ty, CostKind,
759  Opd1Info, Opd2Info,
762  int AddCost = getArithmeticInstrCost(Instruction::Add, Ty, CostKind,
763  Opd1Info, Opd2Info,
766  int ShrCost = getArithmeticInstrCost(Instruction::AShr, Ty, CostKind,
767  Opd1Info, Opd2Info,
770  return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1;
771  }
772  }
773 
774  Cost += BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
775  Opd2Info,
776  Opd1PropInfo, Opd2PropInfo);
777  if (Ty->isVectorTy()) {
778  // On AArch64, vector divisions are not supported natively and are
779  // expanded into scalar divisions of each pair of elements.
780  Cost += getArithmeticInstrCost(Instruction::ExtractElement, Ty, CostKind,
781  Opd1Info, Opd2Info, Opd1PropInfo,
782  Opd2PropInfo);
783  Cost += getArithmeticInstrCost(Instruction::InsertElement, Ty, CostKind,
784  Opd1Info, Opd2Info, Opd1PropInfo,
785  Opd2PropInfo);
786  // TODO: if one of the arguments is scalar, then it's not necessary to
787  // double the cost of handling the vector elements.
788  Cost += Cost;
789  }
790  return Cost;
791 
792  case ISD::MUL:
793  if (LT.second != MVT::v2i64)
794  return (Cost + 1) * LT.first;
795  // Since we do not have a MUL.2d instruction, a mul <2 x i64> is expensive
796  // as elements are extracted from the vectors and the muls scalarized.
797  // As getScalarizationOverhead is a bit too pessimistic, we estimate the
798  // cost for a i64 vector directly here, which is:
799  // - four i64 extracts,
800  // - two i64 inserts, and
801  // - two muls.
802  // So, for a v2i64 with LT.First = 1 the cost is 8, and for a v4i64 with
803  // LT.first = 2 the cost is 16.
804  return LT.first * 8;
805  case ISD::ADD:
806  case ISD::XOR:
807  case ISD::OR:
808  case ISD::AND:
809  // These nodes are marked as 'custom' for combining purposes only.
810  // We know that they are legal. See LowerAdd in ISelLowering.
811  return (Cost + 1) * LT.first;
812 
813  case ISD::FADD:
814  // These nodes are marked as 'custom' just to lower them to SVE.
815  // We know said lowering will incur no additional cost.
816  if (isa<FixedVectorType>(Ty) && !Ty->getScalarType()->isFP128Ty())
817  return (Cost + 2) * LT.first;
818 
819  return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
820  Opd2Info,
821  Opd1PropInfo, Opd2PropInfo);
822  }
823 }
824 
826  const SCEV *Ptr) {
827  // Address computations in vectorized code with non-consecutive addresses will
828  // likely result in more instructions compared to scalar code where the
829  // computation can more often be merged into the index mode. The resulting
830  // extra micro-ops can significantly decrease throughput.
831  unsigned NumVectorInstToHideOverhead = 10;
832  int MaxMergeDistance = 64;
833 
834  if (Ty->isVectorTy() && SE &&
835  !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
836  return NumVectorInstToHideOverhead;
837 
838  // In many cases the address computation is not merged into the instruction
839  // addressing mode.
840  return 1;
841 }
842 
844  Type *CondTy,
845  CmpInst::Predicate VecPred,
847  const Instruction *I) {
848  // TODO: Handle other cost kinds.
850  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
851  I);
852 
853  int ISD = TLI->InstructionOpcodeToISD(Opcode);
854  // We don't lower some vector selects well that are wider than the register
855  // width.
856  if (isa<FixedVectorType>(ValTy) && ISD == ISD::SELECT) {
857  // We would need this many instructions to hide the scalarization happening.
858  const int AmortizationCost = 20;
859 
860  // If VecPred is not set, check if we can get a predicate from the context
861  // instruction, if its type matches the requested ValTy.
862  if (VecPred == CmpInst::BAD_ICMP_PREDICATE && I && I->getType() == ValTy) {
863  CmpInst::Predicate CurrentPred;
864  if (match(I, m_Select(m_Cmp(CurrentPred, m_Value(), m_Value()), m_Value(),
865  m_Value())))
866  VecPred = CurrentPred;
867  }
868  // Check if we have a compare/select chain that can be lowered using CMxx &
869  // BFI pair.
870  if (CmpInst::isIntPredicate(VecPred)) {
871  static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
873  MVT::v2i64};
874  auto LT = TLI->getTypeLegalizationCost(DL, ValTy);
875  if (any_of(ValidMinMaxTys, [&LT](MVT M) { return M == LT.second; }))
876  return LT.first;
877  }
878 
879  static const TypeConversionCostTblEntry
880  VectorSelectTbl[] = {
884  { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
885  { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
886  { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
887  };
888 
889  EVT SelCondTy = TLI->getValueType(DL, CondTy);
890  EVT SelValTy = TLI->getValueType(DL, ValTy);
891  if (SelCondTy.isSimple() && SelValTy.isSimple()) {
892  if (const auto *Entry = ConvertCostTableLookup(VectorSelectTbl, ISD,
893  SelCondTy.getSimpleVT(),
894  SelValTy.getSimpleVT()))
895  return Entry->Cost;
896  }
897  }
898  // The base case handles scalable vectors fine for now, since it treats the
899  // cost as 1 * legalization cost.
900  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
901 }
902 
904 AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
906  if (ST->requiresStrictAlign()) {
907  // TODO: Add cost modeling for strict align. Misaligned loads expand to
908  // a bunch of instructions when strict align is enabled.
909  return Options;
910  }
911  Options.AllowOverlappingLoads = true;
912  Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
913  Options.NumLoadsPerBlock = Options.MaxNumLoads;
914  // TODO: Though vector loads usually perform well on AArch64, in some targets
915  // they may wake up the FP unit, which raises the power consumption. Perhaps
916  // they could be used with no holds barred (-O3).
917  Options.LoadSizes = {8, 4, 2, 1};
918  return Options;
919 }
920 
922  unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
923  Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) {
924 
925  if (!isa<ScalableVectorType>(DataTy))
926  return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
927  Alignment, CostKind, I);
928  auto *VT = cast<VectorType>(DataTy);
929  auto LT = TLI->getTypeLegalizationCost(DL, DataTy);
930  ElementCount LegalVF = LT.second.getVectorElementCount();
931  Optional<unsigned> MaxNumVScale = getMaxVScale();
932  assert(MaxNumVScale && "Expected valid max vscale value");
933 
934  InstructionCost MemOpCost =
935  getMemoryOpCost(Opcode, VT->getElementType(), Alignment, 0, CostKind, I);
936  unsigned MaxNumElementsPerGather =
937  MaxNumVScale.getValue() * LegalVF.getKnownMinValue();
938  return LT.first * MaxNumElementsPerGather * MemOpCost;
939 }
940 
941 bool AArch64TTIImpl::useNeonVector(const Type *Ty) const {
942  return isa<FixedVectorType>(Ty) && !ST->useSVEForFixedLengthVectors();
943 }
944 
946  MaybeAlign Alignment,
947  unsigned AddressSpace,
949  const Instruction *I) {
950  // TODO: Handle other cost kinds.
952  return 1;
953 
954  // Type legalization can't handle structs
955  if (TLI->getValueType(DL, Ty, true) == MVT::Other)
956  return BaseT::getMemoryOpCost(Opcode, Ty, Alignment, AddressSpace,
957  CostKind);
958 
959  auto LT = TLI->getTypeLegalizationCost(DL, Ty);
960 
961  if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
962  LT.second.is128BitVector() && (!Alignment || *Alignment < Align(16))) {
963  // Unaligned stores are extremely inefficient. We don't split all
964  // unaligned 128-bit stores because the negative impact that has shown in
965  // practice on inlined block copy code.
966  // We make such stores expensive so that we will only vectorize if there
967  // are 6 other instructions getting vectorized.
968  const int AmortizationCost = 6;
969 
970  return LT.first * 2 * AmortizationCost;
971  }
972 
973  if (useNeonVector(Ty) &&
974  cast<VectorType>(Ty)->getElementType()->isIntegerTy(8)) {
975  unsigned ProfitableNumElements;
976  if (Opcode == Instruction::Store)
977  // We use a custom trunc store lowering so v.4b should be profitable.
978  ProfitableNumElements = 4;
979  else
980  // We scalarize the loads because there is not v.4b register and we
981  // have to promote the elements to v.2.
982  ProfitableNumElements = 8;
983 
984  if (cast<FixedVectorType>(Ty)->getNumElements() < ProfitableNumElements) {
985  unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();
986  unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
987  // We generate 2 instructions per vector element.
988  return NumVectorizableInstsToAmortize * NumVecElts * 2;
989  }
990  }
991 
992  return LT.first;
993 }
994 
996  unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
997  Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
998  bool UseMaskForCond, bool UseMaskForGaps) {
999  assert(Factor >= 2 && "Invalid interleave factor");
1000  auto *VecVTy = cast<FixedVectorType>(VecTy);
1001 
1002  if (!UseMaskForCond && !UseMaskForGaps &&
1003  Factor <= TLI->getMaxSupportedInterleaveFactor()) {
1004  unsigned NumElts = VecVTy->getNumElements();
1005  auto *SubVecTy =
1006  FixedVectorType::get(VecTy->getScalarType(), NumElts / Factor);
1007 
1008  // ldN/stN only support legal vector types of size 64 or 128 in bits.
1009  // Accesses having vector types that are a multiple of 128 bits can be
1010  // matched to more than one ldN/stN instruction.
1011  if (NumElts % Factor == 0 &&
1012  TLI->isLegalInterleavedAccessType(SubVecTy, DL))
1013  return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL);
1014  }
1015 
1016  return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
1017  Alignment, AddressSpace, CostKind,
1018  UseMaskForCond, UseMaskForGaps);
1019 }
1020 
1022  InstructionCost Cost = 0;
1024  for (auto *I : Tys) {
1025  if (!I->isVectorTy())
1026  continue;
1027  if (I->getScalarSizeInBits() * cast<FixedVectorType>(I)->getNumElements() ==
1028  128)
1029  Cost += getMemoryOpCost(Instruction::Store, I, Align(128), 0, CostKind) +
1030  getMemoryOpCost(Instruction::Load, I, Align(128), 0, CostKind);
1031  }
1032  return *Cost.getValue();
1033 }
1034 
1036  return ST->getMaxInterleaveFactor();
1037 }
1038 
1039 // For Falkor, we want to avoid having too many strided loads in a loop since
1040 // that can exhaust the HW prefetcher resources. We adjust the unroller
1041 // MaxCount preference below to attempt to ensure unrolling doesn't create too
1042 // many strided loads.
1043 static void
1046  enum { MaxStridedLoads = 7 };
1047  auto countStridedLoads = [](Loop *L, ScalarEvolution &SE) {
1048  int StridedLoads = 0;
1049  // FIXME? We could make this more precise by looking at the CFG and
1050  // e.g. not counting loads in each side of an if-then-else diamond.
1051  for (const auto BB : L->blocks()) {
1052  for (auto &I : *BB) {
1053  LoadInst *LMemI = dyn_cast<LoadInst>(&I);
1054  if (!LMemI)
1055  continue;
1056 
1057  Value *PtrValue = LMemI->getPointerOperand();
1058  if (L->isLoopInvariant(PtrValue))
1059  continue;
1060 
1061  const SCEV *LSCEV = SE.getSCEV(PtrValue);
1062  const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
1063  if (!LSCEVAddRec || !LSCEVAddRec->isAffine())
1064  continue;
1065 
1066  // FIXME? We could take pairing of unrolled load copies into account
1067  // by looking at the AddRec, but we would probably have to limit this
1068  // to loops with no stores or other memory optimization barriers.
1069  ++StridedLoads;
1070  // We've seen enough strided loads that seeing more won't make a
1071  // difference.
1072  if (StridedLoads > MaxStridedLoads / 2)
1073  return StridedLoads;
1074  }
1075  }
1076  return StridedLoads;
1077  };
1078 
1079  int StridedLoads = countStridedLoads(L, SE);
1080  LLVM_DEBUG(dbgs() << "falkor-hwpf: detected " << StridedLoads
1081  << " strided loads\n");
1082  // Pick the largest power of 2 unroll count that won't result in too many
1083  // strided loads.
1084  if (StridedLoads) {
1085  UP.MaxCount = 1 << Log2_32(MaxStridedLoads / StridedLoads);
1086  LLVM_DEBUG(dbgs() << "falkor-hwpf: setting unroll MaxCount to "
1087  << UP.MaxCount << '\n');
1088  }
1089 }
1090 
1093  // Enable partial unrolling and runtime unrolling.
1094  BaseT::getUnrollingPreferences(L, SE, UP);
1095 
1096  // For inner loop, it is more likely to be a hot one, and the runtime check
1097  // can be promoted out from LICM pass, so the overhead is less, let's try
1098  // a larger threshold to unroll more loops.
1099  if (L->getLoopDepth() > 1)
1100  UP.PartialThreshold *= 2;
1101 
1102  // Disable partial & runtime unrolling on -Os.
1103  UP.PartialOptSizeThreshold = 0;
1104 
1105  if (ST->getProcFamily() == AArch64Subtarget::Falkor &&
1107  getFalkorUnrollingPreferences(L, SE, UP);
1108 }
1109 
1112  BaseT::getPeelingPreferences(L, SE, PP);
1113 }
1114 
1116  Type *ExpectedType) {
1117  switch (Inst->getIntrinsicID()) {
1118  default:
1119  return nullptr;
1120  case Intrinsic::aarch64_neon_st2:
1121  case Intrinsic::aarch64_neon_st3:
1122  case Intrinsic::aarch64_neon_st4: {
1123  // Create a struct type
1124  StructType *ST = dyn_cast<StructType>(ExpectedType);
1125  if (!ST)
1126  return nullptr;
1127  unsigned NumElts = Inst->getNumArgOperands() - 1;
1128  if (ST->getNumElements() != NumElts)
1129  return nullptr;
1130  for (unsigned i = 0, e = NumElts; i != e; ++i) {
1131  if (Inst->getArgOperand(i)->getType() != ST->getElementType(i))
1132  return nullptr;
1133  }
1134  Value *Res = UndefValue::get(ExpectedType);
1135  IRBuilder<> Builder(Inst);
1136  for (unsigned i = 0, e = NumElts; i != e; ++i) {
1137  Value *L = Inst->getArgOperand(i);
1138  Res = Builder.CreateInsertValue(Res, L, i);
1139  }
1140  return Res;
1141  }
1142  case Intrinsic::aarch64_neon_ld2:
1143  case Intrinsic::aarch64_neon_ld3:
1144  case Intrinsic::aarch64_neon_ld4:
1145  if (Inst->getType() == ExpectedType)
1146  return Inst;
1147  return nullptr;
1148  }
1149 }
1150 
1153  switch (Inst->getIntrinsicID()) {
1154  default:
1155  break;
1156  case Intrinsic::aarch64_neon_ld2:
1157  case Intrinsic::aarch64_neon_ld3:
1158  case Intrinsic::aarch64_neon_ld4:
1159  Info.ReadMem = true;
1160  Info.WriteMem = false;
1161  Info.PtrVal = Inst->getArgOperand(0);
1162  break;
1163  case Intrinsic::aarch64_neon_st2:
1164  case Intrinsic::aarch64_neon_st3:
1165  case Intrinsic::aarch64_neon_st4:
1166  Info.ReadMem = false;
1167  Info.WriteMem = true;
1168  Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1);
1169  break;
1170  }
1171 
1172  switch (Inst->getIntrinsicID()) {
1173  default:
1174  return false;
1175  case Intrinsic::aarch64_neon_ld2:
1176  case Intrinsic::aarch64_neon_st2:
1177  Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
1178  break;
1179  case Intrinsic::aarch64_neon_ld3:
1180  case Intrinsic::aarch64_neon_st3:
1181  Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
1182  break;
1183  case Intrinsic::aarch64_neon_ld4:
1184  case Intrinsic::aarch64_neon_st4:
1185  Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
1186  break;
1187  }
1188  return true;
1189 }
1190 
1191 /// See if \p I should be considered for address type promotion. We check if \p
1192 /// I is a sext with right type and used in memory accesses. If it used in a
1193 /// "complex" getelementptr, we allow it to be promoted without finding other
1194 /// sext instructions that sign extended the same initial value. A getelementptr
1195 /// is considered as "complex" if it has more than 2 operands.
1197  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) {
1198  bool Considerable = false;
1199  AllowPromotionWithoutCommonHeader = false;
1200  if (!isa<SExtInst>(&I))
1201  return false;
1202  Type *ConsideredSExtType =
1203  Type::getInt64Ty(I.getParent()->getParent()->getContext());
1204  if (I.getType() != ConsideredSExtType)
1205  return false;
1206  // See if the sext is the one with the right type and used in at least one
1207  // GetElementPtrInst.
1208  for (const User *U : I.users()) {
1209  if (const GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(U)) {
1210  Considerable = true;
1211  // A getelementptr is considered as "complex" if it has more than 2
1212  // operands. We will promote a SExt used in such complex GEP as we
1213  // expect some computation to be merged if they are done on 64 bits.
1214  if (GEPInst->getNumOperands() > 2) {
1215  AllowPromotionWithoutCommonHeader = true;
1216  break;
1217  }
1218  }
1219  }
1220  return Considerable;
1221 }
1222 
1224  ElementCount VF) const {
1225  if (!VF.isScalable())
1226  return true;
1227 
1228  Type *Ty = RdxDesc.getRecurrenceType();
1229  if (Ty->isBFloatTy() || !isLegalElementTypeForSVE(Ty))
1230  return false;
1231 
1232  switch (RdxDesc.getRecurrenceKind()) {
1233  case RecurKind::Add:
1234  case RecurKind::FAdd:
1235  case RecurKind::And:
1236  case RecurKind::Or:
1237  case RecurKind::Xor:
1238  case RecurKind::SMin:
1239  case RecurKind::SMax:
1240  case RecurKind::UMin:
1241  case RecurKind::UMax:
1242  case RecurKind::FMin:
1243  case RecurKind::FMax:
1244  return true;
1245  default:
1246  return false;
1247  }
1248 }
1249 
1252  bool IsPairwise, bool IsUnsigned,
1254  if (!isa<ScalableVectorType>(Ty))
1255  return BaseT::getMinMaxReductionCost(Ty, CondTy, IsPairwise, IsUnsigned,
1256  CostKind);
1257  assert((isa<ScalableVectorType>(Ty) && isa<ScalableVectorType>(CondTy)) &&
1258  "Both vector needs to be scalable");
1259 
1260  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
1261  InstructionCost LegalizationCost = 0;
1262  if (LT.first > 1) {
1263  Type *LegalVTy = EVT(LT.second).getTypeForEVT(Ty->getContext());
1264  unsigned CmpOpcode =
1265  Ty->isFPOrFPVectorTy() ? Instruction::FCmp : Instruction::ICmp;
1266  LegalizationCost =
1267  getCmpSelInstrCost(CmpOpcode, LegalVTy, LegalVTy,
1269  getCmpSelInstrCost(Instruction::Select, LegalVTy, LegalVTy,
1271  LegalizationCost *= LT.first - 1;
1272  }
1273 
1274  return LegalizationCost + /*Cost of horizontal reduction*/ 2;
1275 }
1276 
1278  unsigned Opcode, VectorType *ValTy, bool IsPairwise,
1280  assert(!IsPairwise && "Cannot be pair wise to continue");
1281 
1282  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
1283  int LegalizationCost = 0;
1284  if (LT.first > 1) {
1285  Type *LegalVTy = EVT(LT.second).getTypeForEVT(ValTy->getContext());
1286  LegalizationCost = getArithmeticInstrCost(Opcode, LegalVTy, CostKind);
1287  LegalizationCost *= LT.first - 1;
1288  }
1289 
1290  int ISD = TLI->InstructionOpcodeToISD(Opcode);
1291  assert(ISD && "Invalid opcode");
1292  // Add the final reduction cost for the legal horizontal reduction
1293  switch (ISD) {
1294  case ISD::ADD:
1295  case ISD::AND:
1296  case ISD::OR:
1297  case ISD::XOR:
1298  case ISD::FADD:
1299  return LegalizationCost + 2;
1300  default:
1301  return InstructionCost::getInvalid();
1302  }
1303 }
1304 
1307  bool IsPairwiseForm,
1309 
1310  if (isa<ScalableVectorType>(ValTy))
1311  return getArithmeticReductionCostSVE(Opcode, ValTy, IsPairwiseForm,
1312  CostKind);
1313  if (IsPairwiseForm)
1314  return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm,
1315  CostKind);
1316 
1317  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
1318  MVT MTy = LT.second;
1319  int ISD = TLI->InstructionOpcodeToISD(Opcode);
1320  assert(ISD && "Invalid opcode");
1321 
1322  // Horizontal adds can use the 'addv' instruction. We model the cost of these
1323  // instructions as normal vector adds. This is the only arithmetic vector
1324  // reduction operation for which we have an instruction.
1325  static const CostTblEntry CostTblNoPairwise[]{
1326  {ISD::ADD, MVT::v8i8, 1},
1327  {ISD::ADD, MVT::v16i8, 1},
1328  {ISD::ADD, MVT::v4i16, 1},
1329  {ISD::ADD, MVT::v8i16, 1},
1330  {ISD::ADD, MVT::v4i32, 1},
1331  };
1332 
1333  if (const auto *Entry = CostTableLookup(CostTblNoPairwise, ISD, MTy))
1334  return LT.first * Entry->Cost;
1335 
1336  return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm,
1337  CostKind);
1338 }
1339 
1341  ArrayRef<int> Mask, int Index,
1342  VectorType *SubTp) {
1345  Kind == TTI::SK_Reverse) {
1346  static const CostTblEntry ShuffleTbl[] = {
1347  // Broadcast shuffle kinds can be performed with 'dup'.
1348  { TTI::SK_Broadcast, MVT::v8i8, 1 },
1349  { TTI::SK_Broadcast, MVT::v16i8, 1 },
1350  { TTI::SK_Broadcast, MVT::v4i16, 1 },
1351  { TTI::SK_Broadcast, MVT::v8i16, 1 },
1352  { TTI::SK_Broadcast, MVT::v2i32, 1 },
1353  { TTI::SK_Broadcast, MVT::v4i32, 1 },
1354  { TTI::SK_Broadcast, MVT::v2i64, 1 },
1355  { TTI::SK_Broadcast, MVT::v2f32, 1 },
1356  { TTI::SK_Broadcast, MVT::v4f32, 1 },
1357  { TTI::SK_Broadcast, MVT::v2f64, 1 },
1358  // Transpose shuffle kinds can be performed with 'trn1/trn2' and
1359  // 'zip1/zip2' instructions.
1360  { TTI::SK_Transpose, MVT::v8i8, 1 },
1361  { TTI::SK_Transpose, MVT::v16i8, 1 },
1362  { TTI::SK_Transpose, MVT::v4i16, 1 },
1363  { TTI::SK_Transpose, MVT::v8i16, 1 },
1364  { TTI::SK_Transpose, MVT::v2i32, 1 },
1365  { TTI::SK_Transpose, MVT::v4i32, 1 },
1366  { TTI::SK_Transpose, MVT::v2i64, 1 },
1367  { TTI::SK_Transpose, MVT::v2f32, 1 },
1368  { TTI::SK_Transpose, MVT::v4f32, 1 },
1369  { TTI::SK_Transpose, MVT::v2f64, 1 },
1370  // Select shuffle kinds.
1371  // TODO: handle vXi8/vXi16.
1372  { TTI::SK_Select, MVT::v2i32, 1 }, // mov.
1373  { TTI::SK_Select, MVT::v4i32, 2 }, // rev+trn (or similar).
1374  { TTI::SK_Select, MVT::v2i64, 1 }, // mov.
1375  { TTI::SK_Select, MVT::v2f32, 1 }, // mov.
1376  { TTI::SK_Select, MVT::v4f32, 2 }, // rev+trn (or similar).
1377  { TTI::SK_Select, MVT::v2f64, 1 }, // mov.
1378  // PermuteSingleSrc shuffle kinds.
1379  // TODO: handle vXi8/vXi16.
1380  { TTI::SK_PermuteSingleSrc, MVT::v2i32, 1 }, // mov.
1381  { TTI::SK_PermuteSingleSrc, MVT::v4i32, 3 }, // perfectshuffle worst case.
1382  { TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // mov.
1383  { TTI::SK_PermuteSingleSrc, MVT::v2f32, 1 }, // mov.
1384  { TTI::SK_PermuteSingleSrc, MVT::v4f32, 3 }, // perfectshuffle worst case.
1385  { TTI::SK_PermuteSingleSrc, MVT::v2f64, 1 }, // mov.
1386  // Broadcast shuffle kinds for scalable vectors
1395  // Handle the cases for vector.reverse with scalable vectors
1396  { TTI::SK_Reverse, MVT::nxv16i8, 1 },
1397  { TTI::SK_Reverse, MVT::nxv8i16, 1 },
1398  { TTI::SK_Reverse, MVT::nxv4i32, 1 },
1399  { TTI::SK_Reverse, MVT::nxv2i64, 1 },
1400  { TTI::SK_Reverse, MVT::nxv8f16, 1 },
1402  { TTI::SK_Reverse, MVT::nxv4f32, 1 },
1403  { TTI::SK_Reverse, MVT::nxv2f64, 1 },
1404  { TTI::SK_Reverse, MVT::nxv16i1, 1 },
1405  { TTI::SK_Reverse, MVT::nxv8i1, 1 },
1406  { TTI::SK_Reverse, MVT::nxv4i1, 1 },
1407  { TTI::SK_Reverse, MVT::nxv2i1, 1 },
1408  };
1409  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
1410  if (const auto *Entry = CostTableLookup(ShuffleTbl, Kind, LT.second))
1411  return LT.first * Entry->Cost;
1412  }
1413 
1414  return BaseT::getShuffleCost(Kind, Tp, Mask, Index, SubTp);
1415 }
llvm::AArch64TTIImpl::getVectorInstrCost
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: AArch64TargetTransformInfo.cpp:668
i
i
Definition: README.txt:29
llvm::MVT::nxv4i1
@ nxv4i1
Definition: MachineValueType.h:175
llvm::InstructionCost
Definition: InstructionCost.h:26
llvm::EngineKind::Kind
Kind
Definition: ExecutionEngine.h:524
llvm::TargetTransformInfo::PSK_FastHardware
@ PSK_FastHardware
Definition: TargetTransformInfo.h:586
llvm::MVT::nxv4i64
@ nxv4i64
Definition: MachineValueType.h:205
llvm::TargetTransformInfo::UnrollingPreferences::PartialOptSizeThreshold
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
Definition: TargetTransformInfo.h:452
llvm::TargetTransformInfo::SK_Select
@ SK_Select
Selects elements from the corresponding lane of either source operand.
Definition: TargetTransformInfo.h:848
llvm::MVT::nxv2i1
@ nxv2i1
Definition: MachineValueType.h:174
llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition: TargetTransformInfo.h:211
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:63
llvm::TargetTransformInfo::UnrollingPreferences::PartialThreshold
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
Definition: TargetTransformInfo.h:448
llvm
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
getFalkorUnrollingPreferences
static void getFalkorUnrollingPreferences(Loop *L, ScalarEvolution &SE, TargetTransformInfo::UnrollingPreferences &UP)
Definition: AArch64TargetTransformInfo.cpp:1044
llvm::MVT::nxv2f64
@ nxv2f64
Definition: MachineValueType.h:232
llvm::SystemZISD::TM
@ TM
Definition: SystemZISelLowering.h:65
llvm::RecurKind::Or
@ Or
Bitwise or logical OR of integers.
llvm::InstructionCost::getValue
Optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
Definition: InstructionCost.h:68
llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:618
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:722
llvm::SCEVAddRecExpr::isAffine
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
Definition: ScalarEvolutionExpressions.h:378
llvm::TargetTransformInfo::UnrollingPreferences::MaxCount
unsigned MaxCount
Definition: TargetTransformInfo.h:464
IntrinsicInst.h
llvm::ElementCount
Definition: TypeSize.h:386
llvm::AArch64TTIImpl::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: AArch64TargetTransformInfo.cpp:945
llvm::Function
Definition: Function.h:61
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:529
llvm::ISD::UDIV
@ UDIV
Definition: ISDOpcodes.h:236
llvm::TargetTransformInfo::PopcntSupportKind
PopcntSupportKind
Flags indicating the kind of support for population count.
Definition: TargetTransformInfo.h:586
llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:52
llvm::MVT::nxv2f32
@ nxv2f32
Definition: MachineValueType.h:226
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:317
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::APInt::getSExtValue
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1643
llvm::AArch64TTIImpl::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
Definition: AArch64TargetTransformInfo.cpp:1091
llvm::IRBuilder<>
llvm::TargetTransformInfo::MemCmpExpansionOptions::AllowOverlappingLoads
bool AllowOverlappingLoads
Definition: TargetTransformInfo.h:773
llvm::IntrinsicCostAttributes::getReturnType
Type * getReturnType() const
Definition: TargetTransformInfo.h:148
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:443
llvm::MVT::nxv2i64
@ nxv2i64
Definition: MachineValueType.h:204
llvm::ISD::FP_TO_SINT
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:770
llvm::Type::isFPOrFPVectorTy
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:190
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::CostTblEntry
Cost Table Entry.
Definition: CostTable.h:24
llvm::MVT::nxv4f16
@ nxv4f16
Definition: MachineValueType.h:215
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1581
llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:529
llvm::Optional< unsigned >
llvm::FeatureBitset
Container class for subtarget features.
Definition: SubtargetFeature.h:40
llvm::MVT::nxv4f64
@ nxv4f64
Definition: MachineValueType.h:233
llvm::TargetTransformInfo::OP_PowerOf2
@ OP_PowerOf2
Definition: TargetTransformInfo.h:903
llvm::VectorType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:424
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:116
llvm::CallBase::getNumArgOperands
unsigned getNumArgOperands() const
Definition: InstrTypes.h:1339
llvm::LoadInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:266
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:492
llvm::RecurKind::SMin
@ SMin
Signed integer min implemented in terms of select(cmp()).
llvm::MVT::v2f64
@ v2f64
Definition: MachineValueType.h:158
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::TargetTransformInfo::SK_PermuteSingleSrc
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
Definition: TargetTransformInfo.h:856
llvm::MVT::nxv8i16
@ nxv8i16
Definition: MachineValueType.h:192
llvm::LinearPolySize::isScalable
bool isScalable() const
Returns whether the size is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:299
llvm::APIntOps::umin
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition: APInt.h:2183
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
llvm::TargetTransformInfo::SK_Broadcast
@ SK_Broadcast
Broadcast element 0 to all other elements.
Definition: TargetTransformInfo.h:846
llvm::AArch64TTIImpl::areInlineCompatible
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
Definition: AArch64TargetTransformInfo.cpp:30
llvm::EVT::isSimple
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:124
llvm::MVT::nxv8bf16
@ nxv8bf16
Definition: MachineValueType.h:223
llvm::AArch64TTIImpl::getExtractWithExtendCost
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
Definition: AArch64TargetTransformInfo.cpp:597
llvm::AArch64CC::LT
@ LT
Definition: AArch64BaseInfo.h:247
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
TargetLowering.h
llvm::RecurKind::And
@ And
Bitwise or logical AND of integers.
llvm::PatternMatch::m_Select
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
Definition: PatternMatch.h:1423
llvm::ISD::SELECT
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:664
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::ISD::ZERO_EXTEND
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:714
llvm::AArch64TTIImpl::getCostOfKeepingLiveOverCall
int getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys)
Definition: AArch64TargetTransformInfo.cpp:1021
llvm::TargetTransformInfo::OP_None
@ OP_None
Definition: TargetTransformInfo.h:903
llvm::TargetTransformInfo::ShuffleKind
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Definition: TargetTransformInfo.h:845
llvm::AArch64_AM::isLogicalImmediate
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
Definition: AArch64AddressingModes.h:275
llvm::TargetTransformInfo::CastContextHint
CastContextHint
Represents a hint about the context in which a cast is used.
Definition: TargetTransformInfo.h:1079
llvm::User
Definition: User.h:44
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:35
llvm::RecurrenceDescriptor::getRecurrenceType
Type * getRecurrenceType() const
Returns the type of the recurrence.
Definition: IVDescriptors.h:224
llvm::ISD::TRUNCATE
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:720
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:235
llvm::AArch64TTIImpl::getIntImmCostIntrin
int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
Definition: AArch64TargetTransformInfo.cpp:155
llvm::MVT::v16i1
@ v16i1
Definition: MachineValueType.h:65
llvm::MVT::nxv2f16
@ nxv2f16
Definition: MachineValueType.h:214
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:119
llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:597
llvm::MVT::v8i1
@ v8i1
Definition: MachineValueType.h:64
llvm::Instruction
Definition: Instruction.h:45
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:154
llvm::RecurrenceDescriptor::getRecurrenceKind
RecurKind getRecurrenceKind() const
Definition: IVDescriptors.h:180
llvm::ISD::SINT_TO_FP
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:724
llvm::AArch64TTIImpl::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: AArch64TargetTransformInfo.cpp:346
llvm::TypeConversionCostTblEntry
Type Conversion Cost Table.
Definition: CostTable.h:44
llvm::MVT::nxv4i8
@ nxv4i8
Definition: MachineValueType.h:183
llvm::MVT::nxv4f32
@ nxv4f32
Definition: MachineValueType.h:227
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1770
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:26
llvm::ISD::AND
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:617
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
PatternMatch.h
llvm::AArch64TTIImpl::getMinMaxReductionCost
InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsPairwise, bool IsUnsigned, TTI::TargetCostKind CostKind)
Definition: AArch64TargetTransformInfo.cpp:1251
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:650
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:154
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
llvm::MVT::v4i16
@ v4i16
Definition: MachineValueType.h:86
llvm::EVT::getTypeForEVT
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:181
llvm::MVT::v4i8
@ v4i8
Definition: MachineValueType.h:75
llvm::IntrinsicCostAttributes
Definition: TargetTransformInfo.h:116
llvm::MVT::nxv4i16
@ nxv4i16
Definition: MachineValueType.h:191
llvm::RecurKind::UMin
@ UMin
Unisgned integer min implemented in terms of select(cmp()).
llvm::TargetTransformInfo::PSK_Software
@ PSK_Software
Definition: TargetTransformInfo.h:586
LoopInfo.h
Operands
mir Rename Register Operands
Definition: MIRNamerPass.cpp:78
llvm::APInt::ashr
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition: APInt.h:963
llvm::AArch64TTIImpl::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Definition: AArch64TargetTransformInfo.cpp:216
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:3885
AArch64AddressingModes.h
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:202
llvm::ISD::FADD
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:371
llvm::TargetTransformInfo::SK_Reverse
@ SK_Reverse
Reverse the order of the vector.
Definition: TargetTransformInfo.h:847
llvm::MVT::v2i8
@ v2i8
Definition: MachineValueType.h:74
llvm::MVT::v4i64
@ v4i64
Definition: MachineValueType.h:110
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:391
llvm::MVT::nxv16i8
@ nxv16i8
Definition: MachineValueType.h:185
llvm::cl::opt< bool >
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:78
llvm::MVT::nxv8i64
@ nxv8i64
Definition: MachineValueType.h:206
llvm::MVT::v16i8
@ v16i8
Definition: MachineValueType.h:77
llvm::MVT::v16i16
@ v16i16
Definition: MachineValueType.h:88
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::MVT::v2i64
@ v2i64
Definition: MachineValueType.h:109
llvm::ISD::FP_TO_UINT
@ FP_TO_UINT
Definition: ISDOpcodes.h:771
llvm::CostTableLookup
const CostTblEntry * CostTableLookup(ArrayRef< CostTblEntry > Tbl, int ISD, MVT Ty)
Find in cost table, TypeTy must be comparable to CompareTy by ==.
Definition: CostTable.h:31
llvm::AArch64TTIImpl::getArithmeticReductionCostSVE
InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, bool IsPairwiseForm, TTI::TargetCostKind CostKind)
Definition: AArch64TargetTransformInfo.cpp:1277
AArch64ExpandImm.h
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::MVT::v16f32
@ v16f32
Definition: MachineValueType.h:148
llvm::TargetTransformInfo::OK_UniformConstantValue
@ OK_UniformConstantValue
Definition: TargetTransformInfo.h:898
llvm::ConvertCostTableLookup
const TypeConversionCostTblEntry * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntry > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table, TypeTy must be comparable to CompareTy by ==.
Definition: CostTable.h:54
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:58
llvm::AArch64TTIImpl::isLegalToVectorizeReduction
bool isLegalToVectorizeReduction(RecurrenceDescriptor RdxDesc, ElementCount VF) const
Definition: AArch64TargetTransformInfo.cpp:1223
llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition: TargetTransformInfo.h:423
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::TargetTransformInfo::OperandValueProperties
OperandValueProperties
Additional properties of an operand's values.
Definition: TargetTransformInfo.h:903
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:905
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:440
llvm::RecurKind::Add
@ Add
Sum of integers.
llvm::MVT::v4f32
@ v4f32
Definition: MachineValueType.h:145
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
llvm::MVT::Other
@ Other
Definition: MachineValueType.h:39
llvm::LoopBase::getLoopDepth
unsigned getLoopDepth() const
Return the nesting level of this loop.
Definition: LoopInfo.h:96
llvm::MVT::nxv4i32
@ nxv4i32
Definition: MachineValueType.h:198
llvm::CmpInst::BAD_ICMP_PREDICATE
@ BAD_ICMP_PREDICATE
Definition: InstrTypes.h:755
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:30
llvm::LinearPolySize::getKnownMinValue
ScalarTy getKnownMinValue() const
Returns the minimum value this size can represent.
Definition: TypeSize.h:297
llvm::AArch64TTIImpl::getMaxInterleaveFactor
unsigned getMaxInterleaveFactor(unsigned VF)
Definition: AArch64TargetTransformInfo.cpp:1035
llvm::TargetTransformInfo::OperandValueKind
OperandValueKind
Additional information about an operand's possible values.
Definition: TargetTransformInfo.h:895
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:649
llvm::TargetTransformInfo::MemCmpExpansionOptions::MaxNumLoads
unsigned MaxNumLoads
Definition: TargetTransformInfo.h:755
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:70
llvm::CmpInst::isIntPredicate
bool isIntPredicate() const
Definition: InstrTypes.h:817
llvm::TargetTransformInfo::MemCmpExpansionOptions
Returns options for expansion of memcmp. IsZeroCmp is.
Definition: TargetTransformInfo.h:750
llvm::AArch64TTIImpl::getAddressComputationCost
int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr)
Definition: AArch64TargetTransformInfo.cpp:825
llvm::MVT::nxv2i32
@ nxv2i32
Definition: MachineValueType.h:197
llvm::TargetTransformInfo::TCC_Free
@ TCC_Free
Expected to fold away in lowering.
Definition: TargetTransformInfo.h:262
llvm::APIntOps::smin
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition: APInt.h:2173
llvm::RecurKind::UMax
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1512
llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:212
llvm::MVT::v2i32
@ v2i32
Definition: MachineValueType.h:94
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::MVT::v2f32
@ v2f32
Definition: MachineValueType.h:143
CostKind
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::RecurKind::FMax
@ FMax
FP max implemented in terms of select(cmp()).
llvm::MVT::v4i32
@ v4i32
Definition: MachineValueType.h:96
llvm::AArch64TTIImpl::getPopcntSupport
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
Definition: AArch64TargetTransformInfo.cpp:207
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:281
llvm::Type::getContext
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:174
llvm::TargetTransformInfo::SK_Transpose
@ SK_Transpose
Transpose two vectors.
Definition: TargetTransformInfo.h:851
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::MVT::v8i64
@ v8i64
Definition: MachineValueType.h:111
llvm::TargetTransformInfo::CastContextHint::None
@ None
The cast is not used with a load/store of any kind.
llvm::ISD::XOR
@ XOR
Definition: ISDOpcodes.h:619
llvm::TargetTransformInfo::MemCmpExpansionOptions::LoadSizes
SmallVector< unsigned, 8 > LoadSizes
Definition: TargetTransformInfo.h:758
AArch64TargetTransformInfo.h
llvm::MVT::v16i32
@ v16i32
Definition: MachineValueType.h:99
llvm::AArch64TTIImpl::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: AArch64TargetTransformInfo.cpp:843
llvm::MCID::Select
@ Select
Definition: MCInstrDesc.h:163
llvm::AArch64TTIImpl::getShuffleCost
int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, int Index, VectorType *SubTp)
Definition: AArch64TargetTransformInfo.cpp:1340
llvm::MVT::nxv16i1
@ nxv16i1
Definition: MachineValueType.h:177
llvm::APIntOps::umax
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition: APInt.h:2188
llvm::MVT::nxv2i16
@ nxv2i16
Definition: MachineValueType.h:190
llvm::Type::getInt64Ty
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:205
llvm::AArch64TTIImpl::getCFInstrCost
unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: AArch64TargetTransformInfo.cpp:658
EnableFalkorHWPFUnrollFix
static cl::opt< bool > EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix", cl::init(true), cl::Hidden)
CostTable.h
llvm::AArch64_IMM::expandMOVImm
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
Definition: AArch64ExpandImm.cpp:305
llvm::AArch64TTIImpl::getIntImmCost
int getIntImmCost(int64_t Val)
Calculate the cost of materializing a 64-bit value.
Definition: AArch64TargetTransformInfo.cpp:47
llvm::APInt::sextOrTrunc
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:956
llvm::MVT::v8i16
@ v8i16
Definition: MachineValueType.h:87
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:352
llvm::AArch64TTIImpl::getTgtMemIntrinsic
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)
Definition: AArch64TargetTransformInfo.cpp:1151
llvm::ISD::SDIV
@ SDIV
Definition: ISDOpcodes.h:235
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:184
llvm::MVT::nxv8i8
@ nxv8i8
Definition: MachineValueType.h:184
llvm::MVT::v8i32
@ v8i32
Definition: MachineValueType.h:98
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:403
llvm::ISD::UINT_TO_FP
@ UINT_TO_FP
Definition: ISDOpcodes.h:725
llvm::ISD::ADD
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:232
llvm::MVT::nxv8i32
@ nxv8i32
Definition: MachineValueType.h:199
llvm::APInt::sext
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:906
llvm::AArch64TTIImpl::getIntImmCostInst
int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
Definition: AArch64TargetTransformInfo.cpp:87
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:45
llvm::MVT::nxv8f16
@ nxv8f16
Definition: MachineValueType.h:216
llvm::InstructionCost::getInvalid
static InstructionCost getInvalid(CostType Val=0)
Definition: InstructionCost.h:54
llvm::RecurrenceDescriptor
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:66
llvm::ISD::FP_EXTEND
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:818
llvm::TargetTransformInfo::MemCmpExpansionOptions::NumLoadsPerBlock
unsigned NumLoadsPerBlock
Definition: TargetTransformInfo.h:768
llvm::RecurKind::FAdd
@ FAdd
Sum of floats.
llvm::IntrinsicCostAttributes::getID
Intrinsic::ID getID() const
Definition: TargetTransformInfo.h:146
llvm::MVT::nxv8i1
@ nxv8i1
Definition: MachineValueType.h:176
llvm::Type::isBFloatTy
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition: Type.h:145
llvm::ISD::MUL
@ MUL
Definition: ISDOpcodes.h:234
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1341
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:350
TargetTransformInfo.h
llvm::AArch64TTIImpl::getInterleavedMemoryOpCost
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, bool UseMaskForCond=false, bool UseMaskForGaps=false)
Definition: AArch64TargetTransformInfo.cpp:995
llvm::AArch64TTIImpl::getGatherScatterOpCost
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: AArch64TargetTransformInfo.cpp:921
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::ISD::MULHU
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:606
llvm::MemIntrinsicInfo
Information about a load/store intrinsic defined by the target.
Definition: TargetTransformInfo.h:68
llvm::AArch64TTIImpl::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
Definition: AArch64TargetTransformInfo.cpp:1110
llvm::RecurKind::FMin
@ FMin
FP min implemented in terms of select(cmp()).
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AArch64TTIImpl::shouldConsiderAddressTypePromotion
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)
See if I should be considered for address type promotion.
Definition: AArch64TargetTransformInfo.cpp:1196
llvm::TargetTransformInfo::TCC_Basic
@ TCC_Basic
The cost of a typical 'add' instruction.
Definition: TargetTransformInfo.h:263
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::AArch64TTIImpl::getArithmeticReductionCost
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, bool IsPairwiseForm, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput)
Definition: AArch64TargetTransformInfo.cpp:1306
BasicTTIImpl.h
llvm::ISD::SIGN_EXTEND
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:711
llvm::MVT::v8i8
@ v8i8
Definition: MachineValueType.h:76
llvm::MVT::nxv2i8
@ nxv2i8
Definition: MachineValueType.h:182
llvm::MVT::v8f32
@ v8f32
Definition: MachineValueType.h:147
llvm::MVT::v2i16
@ v2i16
Definition: MachineValueType.h:84
llvm::MVT::v16i64
@ v16i64
Definition: MachineValueType.h:112
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1272
llvm::AArch64TTIImpl::getArithmeticInstrCost
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
Definition: AArch64TargetTransformInfo.cpp:693
llvm::PatternMatch::m_Cmp
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:89
llvm::ISD::FP_ROUND
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:799
llvm::RecurKind::SMax
@ SMax
Signed integer max implemented in terms of select(cmp()).
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::AArch64Subtarget::Falkor
@ Falkor
Definition: AArch64Subtarget.h:65
llvm::TargetTransformInfo::TCK_RecipThroughput
@ TCK_RecipThroughput
Reciprocal throughput.
Definition: TargetTransformInfo.h:212
llvm::Type::isFP128Ty
bool isFP128Ty() const
Return true if this is 'fp128'.
Definition: Type.h:157
Debug.h
llvm::VectorType::get
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:634
llvm::AArch64TTIImpl::useNeonVector
bool useNeonVector(const Type *Ty) const
Definition: AArch64TargetTransformInfo.cpp:941
llvm::APIntOps::smax
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Definition: APInt.h:2178
llvm::MVT::nxv8f64
@ nxv8f64
Definition: MachineValueType.h:234
llvm::Optional::getValue
constexpr const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:280
llvm::RecurKind::Xor
@ Xor
Bitwise or logical XOR of integers.
llvm::AArch64TTIImpl::getOrCreateResultFromMemIntrinsic
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType)
Definition: AArch64TargetTransformInfo.cpp:1115
llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:129
llvm::EVT::getSimpleVT
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:281
llvm::MVT::v4i1
@ v4i1
Definition: MachineValueType.h:63
llvm::AArch64TTIImpl::enableMemCmpExpansion
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
Definition: AArch64TargetTransformInfo.cpp:904
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::MVT::nxv8f32
@ nxv8f32
Definition: MachineValueType.h:228