LLVM  16.0.0git
RISCVTargetTransformInfo.cpp
Go to the documentation of this file.
1 //===-- RISCVTargetTransformInfo.cpp - RISC-V specific TTI ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
13 #include "llvm/CodeGen/CostTable.h"
15 #include <cmath>
16 using namespace llvm;
17 
18 #define DEBUG_TYPE "riscvtti"
19 
21  "riscv-v-register-bit-width-lmul",
22  cl::desc(
23  "The LMUL to use for getRegisterBitWidth queries. Affects LMUL used "
24  "by autovectorized code. Fractional LMULs are not supported."),
25  cl::init(1), cl::Hidden);
26 
28  "riscv-v-slp-max-vf",
29  cl::desc(
30  "Result used for getMaximumVF query which is used exclusively by "
31  "SLP vectorizer. Defaults to 1 which disables SLP."),
32  cl::init(1), cl::Hidden);
33 
34 InstructionCost RISCVTTIImpl::getLMULCost(MVT VT) {
35  // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
36  // implementation-defined.
37  if (!VT.isVector())
39  unsigned Cost;
40  if (VT.isScalableVector()) {
41  unsigned LMul;
42  bool Fractional;
43  std::tie(LMul, Fractional) =
45  if (Fractional)
46  Cost = 1;
47  else
48  Cost = LMul;
49  } else {
50  Cost = VT.getSizeInBits() / ST->getRealMinVLen();
51  }
52  return std::max<unsigned>(Cost, 1);
53 }
54 
57  assert(Ty->isIntegerTy() &&
58  "getIntImmCost can only estimate cost of materialising integers");
59 
60  // We have a Zero register, so 0 is always free.
61  if (Imm == 0)
62  return TTI::TCC_Free;
63 
64  // Otherwise, we check how many instructions it will take to materialise.
65  const DataLayout &DL = getDataLayout();
67  getST()->getFeatureBits());
68 }
69 
70 // Look for patterns of shift followed by AND that can be turned into a pair of
71 // shifts. We won't need to materialize an immediate for the AND so these can
72 // be considered free.
73 static bool canUseShiftPair(Instruction *Inst, const APInt &Imm) {
74  uint64_t Mask = Imm.getZExtValue();
75  auto *BO = dyn_cast<BinaryOperator>(Inst->getOperand(0));
76  if (!BO || !BO->hasOneUse())
77  return false;
78 
79  if (BO->getOpcode() != Instruction::Shl)
80  return false;
81 
82  if (!isa<ConstantInt>(BO->getOperand(1)))
83  return false;
84 
85  unsigned ShAmt = cast<ConstantInt>(BO->getOperand(1))->getZExtValue();
86  // (and (shl x, c2), c1) will be matched to (srli (slli x, c2+c3), c3) if c1
87  // is a mask shifted by c2 bits with c3 leading zeros.
88  if (isShiftedMask_64(Mask)) {
89  unsigned Trailing = countTrailingZeros(Mask);
90  if (ShAmt == Trailing)
91  return true;
92  }
93 
94  return false;
95 }
96 
97 InstructionCost RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
98  const APInt &Imm, Type *Ty,
100  Instruction *Inst) {
101  assert(Ty->isIntegerTy() &&
102  "getIntImmCost can only estimate cost of materialising integers");
103 
104  // We have a Zero register, so 0 is always free.
105  if (Imm == 0)
106  return TTI::TCC_Free;
107 
108  // Some instructions in RISC-V can take a 12-bit immediate. Some of these are
109  // commutative, in others the immediate comes from a specific argument index.
110  bool Takes12BitImm = false;
111  unsigned ImmArgIdx = ~0U;
112 
113  switch (Opcode) {
114  case Instruction::GetElementPtr:
115  // Never hoist any arguments to a GetElementPtr. CodeGenPrepare will
116  // split up large offsets in GEP into better parts than ConstantHoisting
117  // can.
118  return TTI::TCC_Free;
119  case Instruction::And:
120  // zext.h
121  if (Imm == UINT64_C(0xffff) && ST->hasStdExtZbb())
122  return TTI::TCC_Free;
123  // zext.w
124  if (Imm == UINT64_C(0xffffffff) && ST->hasStdExtZba())
125  return TTI::TCC_Free;
126  if (Inst && Idx == 1 && Imm.getBitWidth() <= ST->getXLen() &&
127  canUseShiftPair(Inst, Imm))
128  return TTI::TCC_Free;
129  [[fallthrough]];
130  case Instruction::Add:
131  case Instruction::Or:
132  case Instruction::Xor:
133  Takes12BitImm = true;
134  break;
135  case Instruction::Mul:
136  // Negated power of 2 is a shift and a negate.
137  if (Imm.isNegatedPowerOf2())
138  return TTI::TCC_Free;
139  // FIXME: There is no MULI instruction.
140  Takes12BitImm = true;
141  break;
142  case Instruction::Sub:
143  case Instruction::Shl:
144  case Instruction::LShr:
145  case Instruction::AShr:
146  Takes12BitImm = true;
147  ImmArgIdx = 1;
148  break;
149  default:
150  break;
151  }
152 
153  if (Takes12BitImm) {
154  // Check immediate is the correct argument...
155  if (Instruction::isCommutative(Opcode) || Idx == ImmArgIdx) {
156  // ... and fits into the 12-bit immediate.
157  if (Imm.getMinSignedBits() <= 64 &&
158  getTLI()->isLegalAddImmediate(Imm.getSExtValue())) {
159  return TTI::TCC_Free;
160  }
161  }
162 
163  // Otherwise, use the full materialisation cost.
164  return getIntImmCost(Imm, Ty, CostKind);
165  }
166 
167  // By default, prevent hoisting.
168  return TTI::TCC_Free;
169 }
170 
173  const APInt &Imm, Type *Ty,
175  // Prevent hoisting in unknown cases.
176  return TTI::TCC_Free;
177 }
178 
181  assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
183 }
184 
186  // Currently, the ExpandReductions pass can't expand scalable-vector
187  // reductions, but we still request expansion as RVV doesn't support certain
188  // reductions and the SelectionDAG can't legalize them either.
189  switch (II->getIntrinsicID()) {
190  default:
191  return false;
192  // These reductions have no equivalent in RVV
193  case Intrinsic::vector_reduce_mul:
194  case Intrinsic::vector_reduce_fmul:
195  return true;
196  }
197 }
198 
200  if (ST->hasVInstructions())
201  return ST->getRealMaxVLen() / RISCV::RVVBitsPerBlock;
202  return BaseT::getMaxVScale();
203 }
204 
206  if (ST->hasVInstructions())
207  if (unsigned MinVLen = ST->getRealMinVLen();
208  MinVLen >= RISCV::RVVBitsPerBlock)
209  return MinVLen / RISCV::RVVBitsPerBlock;
210  return BaseT::getVScaleForTuning();
211 }
212 
213 TypeSize
215  unsigned LMUL = PowerOf2Floor(
216  std::max<unsigned>(std::min<unsigned>(RVVRegisterWidthLMUL, 8), 1));
217  switch (K) {
219  return TypeSize::getFixed(ST->getXLen());
221  return TypeSize::getFixed(
222  ST->useRVVForFixedLengthVectors() ? LMUL * ST->getRealMinVLen() : 0);
224  return TypeSize::getScalable(
225  (ST->hasVInstructions() &&
228  : 0);
229  }
230 
231  llvm_unreachable("Unsupported register kind");
232 }
233 
235  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
236 
237  unsigned Cost = 2; // vslidedown+vslideup.
238  // TODO: Multiplying by LT.first implies this legalizes into multiple copies
239  // of similar code, but I think we expand through memory.
240  return Cost * LT.first * getLMULCost(LT.second);
241 }
242 
246  int Index, VectorType *SubTp,
248  if (isa<ScalableVectorType>(Tp)) {
249  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
250  switch (Kind) {
251  default:
252  // Fallthrough to generic handling.
253  // TODO: Most of these cases will return getInvalid in generic code, and
254  // must be implemented here.
255  break;
256  case TTI::SK_Broadcast: {
257  return LT.first * 1;
258  }
259  case TTI::SK_Splice:
260  return getSpliceCost(Tp, Index);
261  case TTI::SK_Reverse:
262  // Most of the cost here is producing the vrgather index register
263  // Example sequence:
264  // csrr a0, vlenb
265  // srli a0, a0, 3
266  // addi a0, a0, -1
267  // vsetvli a1, zero, e8, mf8, ta, mu (ignored)
268  // vid.v v9
269  // vrsub.vx v10, v9, a0
270  // vrgather.vv v9, v8, v10
271  if (Tp->getElementType()->isIntegerTy(1))
272  // Mask operation additionally required extend and truncate
273  return LT.first * 9;
274  return LT.first * 6;
275  }
276  }
277 
278  if (isa<FixedVectorType>(Tp) && Kind == TargetTransformInfo::SK_Broadcast) {
279  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
280  bool HasScalar = (Args.size() > 0) && (Operator::getOpcode(Args[0]) ==
281  Instruction::InsertElement);
282  if (LT.second.getScalarSizeInBits() == 1) {
283  if (HasScalar) {
284  // Example sequence:
285  // andi a0, a0, 1
286  // vsetivli zero, 2, e8, mf8, ta, ma (ignored)
287  // vmv.v.x v8, a0
288  // vmsne.vi v0, v8, 0
289  return LT.first * getLMULCost(LT.second) * 3;
290  }
291  // Example sequence:
292  // vsetivli zero, 2, e8, mf8, ta, mu (ignored)
293  // vmv.v.i v8, 0
294  // vmerge.vim v8, v8, 1, v0
295  // vmv.x.s a0, v8
296  // andi a0, a0, 1
297  // vmv.v.x v8, a0
298  // vmsne.vi v0, v8, 0
299 
300  return LT.first * getLMULCost(LT.second) * 6;
301  }
302 
303  if (HasScalar) {
304  // Example sequence:
305  // vmv.v.x v8, a0
306  return LT.first * getLMULCost(LT.second);
307  }
308 
309  // Example sequence:
310  // vrgather.vi v9, v8, 0
311  // TODO: vrgather could be slower than vmv.v.x. It is
312  // implementation-dependent.
313  return LT.first * getLMULCost(LT.second);
314  }
315 
316  return BaseT::getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp);
317 }
318 
320 RISCVTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
321  unsigned AddressSpace,
323  if (!isLegalMaskedLoadStore(Src, Alignment) ||
325  return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
326  CostKind);
327 
328  return getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind);
329 }
330 
332  unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
333  Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) {
335  return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
336  Alignment, CostKind, I);
337 
338  if ((Opcode == Instruction::Load &&
339  !isLegalMaskedGather(DataTy, Align(Alignment))) ||
340  (Opcode == Instruction::Store &&
341  !isLegalMaskedScatter(DataTy, Align(Alignment))))
342  return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
343  Alignment, CostKind, I);
344 
345  // Cost is proportional to the number of memory operations implied. For
346  // scalable vectors, we use an estimate on that number since we don't
347  // know exactly what VL will be.
348  auto &VTy = *cast<VectorType>(DataTy);
349  InstructionCost MemOpCost =
350  getMemoryOpCost(Opcode, VTy.getElementType(), Alignment, 0, CostKind,
351  {TTI::OK_AnyValue, TTI::OP_None}, I);
352  unsigned NumLoads = getEstimatedVLFor(&VTy);
353  return NumLoads * MemOpCost;
354 }
355 
356 // Currently, these represent both throughput and codesize costs
357 // for the respective intrinsics. The costs in this table are simply
358 // instruction counts with the following adjustments made:
359 // * One vsetvli is considered free.
429  {Intrinsic::roundeven, MVT::v2f32, 9},
430  {Intrinsic::roundeven, MVT::v4f32, 9},
431  {Intrinsic::roundeven, MVT::v8f32, 9},
432  {Intrinsic::roundeven, MVT::v16f32, 9},
433  {Intrinsic::roundeven, MVT::nxv1f32, 9},
434  {Intrinsic::roundeven, MVT::nxv2f32, 9},
435  {Intrinsic::roundeven, MVT::nxv4f32, 9},
436  {Intrinsic::roundeven, MVT::nxv8f32, 9},
437  {Intrinsic::roundeven, MVT::nxv16f32, 9},
438  {Intrinsic::roundeven, MVT::v2f64, 9},
439  {Intrinsic::roundeven, MVT::v4f64, 9},
440  {Intrinsic::roundeven, MVT::v8f64, 9},
441  {Intrinsic::roundeven, MVT::v16f64, 9},
442  {Intrinsic::roundeven, MVT::nxv1f64, 9},
443  {Intrinsic::roundeven, MVT::nxv2f64, 9},
444  {Intrinsic::roundeven, MVT::nxv4f64, 9},
445  {Intrinsic::roundeven, MVT::nxv8f64, 9},
446  {Intrinsic::fabs, MVT::v2f32, 1},
447  {Intrinsic::fabs, MVT::v4f32, 1},
448  {Intrinsic::fabs, MVT::v8f32, 1},
449  {Intrinsic::fabs, MVT::v16f32, 1},
450  {Intrinsic::fabs, MVT::nxv1f32, 1},
451  {Intrinsic::fabs, MVT::nxv2f32, 1},
452  {Intrinsic::fabs, MVT::nxv4f32, 1},
453  {Intrinsic::fabs, MVT::nxv8f32, 1},
454  {Intrinsic::fabs, MVT::nxv16f32, 1},
455  {Intrinsic::fabs, MVT::v2f64, 1},
456  {Intrinsic::fabs, MVT::v4f64, 1},
457  {Intrinsic::fabs, MVT::v8f64, 1},
458  {Intrinsic::fabs, MVT::v16f64, 1},
459  {Intrinsic::fabs, MVT::nxv1f64, 1},
460  {Intrinsic::fabs, MVT::nxv2f64, 1},
461  {Intrinsic::fabs, MVT::nxv4f64, 1},
462  {Intrinsic::fabs, MVT::nxv8f64, 1},
463  {Intrinsic::sqrt, MVT::v2f32, 1},
464  {Intrinsic::sqrt, MVT::v4f32, 1},
465  {Intrinsic::sqrt, MVT::v8f32, 1},
466  {Intrinsic::sqrt, MVT::v16f32, 1},
467  {Intrinsic::sqrt, MVT::nxv1f32, 1},
468  {Intrinsic::sqrt, MVT::nxv2f32, 1},
469  {Intrinsic::sqrt, MVT::nxv4f32, 1},
470  {Intrinsic::sqrt, MVT::nxv8f32, 1},
471  {Intrinsic::sqrt, MVT::nxv16f32, 1},
472  {Intrinsic::sqrt, MVT::v2f64, 1},
473  {Intrinsic::sqrt, MVT::v4f64, 1},
474  {Intrinsic::sqrt, MVT::v8f64, 1},
475  {Intrinsic::sqrt, MVT::v16f64, 1},
476  {Intrinsic::sqrt, MVT::nxv1f64, 1},
477  {Intrinsic::sqrt, MVT::nxv2f64, 1},
478  {Intrinsic::sqrt, MVT::nxv4f64, 1},
479  {Intrinsic::sqrt, MVT::nxv8f64, 1},
480  {Intrinsic::bswap, MVT::v2i16, 3},
481  {Intrinsic::bswap, MVT::v4i16, 3},
482  {Intrinsic::bswap, MVT::v8i16, 3},
483  {Intrinsic::bswap, MVT::v16i16, 3},
484  {Intrinsic::bswap, MVT::nxv1i16, 3},
485  {Intrinsic::bswap, MVT::nxv2i16, 3},
486  {Intrinsic::bswap, MVT::nxv4i16, 3},
487  {Intrinsic::bswap, MVT::nxv8i16, 3},
488  {Intrinsic::bswap, MVT::nxv16i16, 3},
489  {Intrinsic::bswap, MVT::v2i32, 12},
490  {Intrinsic::bswap, MVT::v4i32, 12},
491  {Intrinsic::bswap, MVT::v8i32, 12},
492  {Intrinsic::bswap, MVT::v16i32, 12},
493  {Intrinsic::bswap, MVT::nxv1i32, 12},
494  {Intrinsic::bswap, MVT::nxv2i32, 12},
495  {Intrinsic::bswap, MVT::nxv4i32, 12},
496  {Intrinsic::bswap, MVT::nxv8i32, 12},
497  {Intrinsic::bswap, MVT::nxv16i32, 12},
498  {Intrinsic::bswap, MVT::v2i64, 31},
499  {Intrinsic::bswap, MVT::v4i64, 31},
500  {Intrinsic::bswap, MVT::v8i64, 31},
501  {Intrinsic::bswap, MVT::v16i64, 31},
502  {Intrinsic::bswap, MVT::nxv1i64, 31},
503  {Intrinsic::bswap, MVT::nxv2i64, 31},
504  {Intrinsic::bswap, MVT::nxv4i64, 31},
505  {Intrinsic::bswap, MVT::nxv8i64, 31},
506  {Intrinsic::vp_bswap, MVT::v2i16, 3},
507  {Intrinsic::vp_bswap, MVT::v4i16, 3},
508  {Intrinsic::vp_bswap, MVT::v8i16, 3},
509  {Intrinsic::vp_bswap, MVT::v16i16, 3},
510  {Intrinsic::vp_bswap, MVT::nxv1i16, 3},
511  {Intrinsic::vp_bswap, MVT::nxv2i16, 3},
512  {Intrinsic::vp_bswap, MVT::nxv4i16, 3},
513  {Intrinsic::vp_bswap, MVT::nxv8i16, 3},
514  {Intrinsic::vp_bswap, MVT::nxv16i16, 3},
515  {Intrinsic::vp_bswap, MVT::v2i32, 12},
516  {Intrinsic::vp_bswap, MVT::v4i32, 12},
517  {Intrinsic::vp_bswap, MVT::v8i32, 12},
518  {Intrinsic::vp_bswap, MVT::v16i32, 12},
519  {Intrinsic::vp_bswap, MVT::nxv1i32, 12},
520  {Intrinsic::vp_bswap, MVT::nxv2i32, 12},
521  {Intrinsic::vp_bswap, MVT::nxv4i32, 12},
522  {Intrinsic::vp_bswap, MVT::nxv8i32, 12},
523  {Intrinsic::vp_bswap, MVT::nxv16i32, 12},
524  {Intrinsic::vp_bswap, MVT::v2i64, 31},
525  {Intrinsic::vp_bswap, MVT::v4i64, 31},
526  {Intrinsic::vp_bswap, MVT::v8i64, 31},
527  {Intrinsic::vp_bswap, MVT::v16i64, 31},
528  {Intrinsic::vp_bswap, MVT::nxv1i64, 31},
529  {Intrinsic::vp_bswap, MVT::nxv2i64, 31},
530  {Intrinsic::vp_bswap, MVT::nxv4i64, 31},
531  {Intrinsic::vp_bswap, MVT::nxv8i64, 31},
532  {Intrinsic::bitreverse, MVT::v2i8, 17},
533  {Intrinsic::bitreverse, MVT::v4i8, 17},
534  {Intrinsic::bitreverse, MVT::v8i8, 17},
535  {Intrinsic::bitreverse, MVT::v16i8, 17},
536  {Intrinsic::bitreverse, MVT::nxv1i8, 17},
537  {Intrinsic::bitreverse, MVT::nxv2i8, 17},
538  {Intrinsic::bitreverse, MVT::nxv4i8, 17},
539  {Intrinsic::bitreverse, MVT::nxv8i8, 17},
540  {Intrinsic::bitreverse, MVT::nxv16i8, 17},
541  {Intrinsic::bitreverse, MVT::v2i16, 24},
542  {Intrinsic::bitreverse, MVT::v4i16, 24},
543  {Intrinsic::bitreverse, MVT::v8i16, 24},
544  {Intrinsic::bitreverse, MVT::v16i16, 24},
545  {Intrinsic::bitreverse, MVT::nxv1i16, 24},
546  {Intrinsic::bitreverse, MVT::nxv2i16, 24},
547  {Intrinsic::bitreverse, MVT::nxv4i16, 24},
548  {Intrinsic::bitreverse, MVT::nxv8i16, 24},
549  {Intrinsic::bitreverse, MVT::nxv16i16, 24},
550  {Intrinsic::bitreverse, MVT::v2i32, 33},
551  {Intrinsic::bitreverse, MVT::v4i32, 33},
552  {Intrinsic::bitreverse, MVT::v8i32, 33},
553  {Intrinsic::bitreverse, MVT::v16i32, 33},
554  {Intrinsic::bitreverse, MVT::nxv1i32, 33},
555  {Intrinsic::bitreverse, MVT::nxv2i32, 33},
556  {Intrinsic::bitreverse, MVT::nxv4i32, 33},
557  {Intrinsic::bitreverse, MVT::nxv8i32, 33},
558  {Intrinsic::bitreverse, MVT::nxv16i32, 33},
559  {Intrinsic::bitreverse, MVT::v2i64, 52},
560  {Intrinsic::bitreverse, MVT::v4i64, 52},
561  {Intrinsic::bitreverse, MVT::v8i64, 52},
562  {Intrinsic::bitreverse, MVT::v16i64, 52},
563  {Intrinsic::bitreverse, MVT::nxv1i64, 52},
564  {Intrinsic::bitreverse, MVT::nxv2i64, 52},
565  {Intrinsic::bitreverse, MVT::nxv4i64, 52},
566  {Intrinsic::bitreverse, MVT::nxv8i64, 52},
567  {Intrinsic::ctpop, MVT::v2i8, 12},
568  {Intrinsic::ctpop, MVT::v4i8, 12},
569  {Intrinsic::ctpop, MVT::v8i8, 12},
570  {Intrinsic::ctpop, MVT::v16i8, 12},
571  {Intrinsic::ctpop, MVT::nxv1i8, 12},
572  {Intrinsic::ctpop, MVT::nxv2i8, 12},
573  {Intrinsic::ctpop, MVT::nxv4i8, 12},
574  {Intrinsic::ctpop, MVT::nxv8i8, 12},
575  {Intrinsic::ctpop, MVT::nxv16i8, 12},
576  {Intrinsic::ctpop, MVT::v2i16, 19},
577  {Intrinsic::ctpop, MVT::v4i16, 19},
578  {Intrinsic::ctpop, MVT::v8i16, 19},
579  {Intrinsic::ctpop, MVT::v16i16, 19},
580  {Intrinsic::ctpop, MVT::nxv1i16, 19},
581  {Intrinsic::ctpop, MVT::nxv2i16, 19},
582  {Intrinsic::ctpop, MVT::nxv4i16, 19},
583  {Intrinsic::ctpop, MVT::nxv8i16, 19},
584  {Intrinsic::ctpop, MVT::nxv16i16, 19},
585  {Intrinsic::ctpop, MVT::v2i32, 20},
586  {Intrinsic::ctpop, MVT::v4i32, 20},
587  {Intrinsic::ctpop, MVT::v8i32, 20},
588  {Intrinsic::ctpop, MVT::v16i32, 20},
589  {Intrinsic::ctpop, MVT::nxv1i32, 20},
590  {Intrinsic::ctpop, MVT::nxv2i32, 20},
591  {Intrinsic::ctpop, MVT::nxv4i32, 20},
592  {Intrinsic::ctpop, MVT::nxv8i32, 20},
593  {Intrinsic::ctpop, MVT::nxv16i32, 20},
594  {Intrinsic::ctpop, MVT::v2i64, 21},
595  {Intrinsic::ctpop, MVT::v4i64, 21},
596  {Intrinsic::ctpop, MVT::v8i64, 21},
597  {Intrinsic::ctpop, MVT::v16i64, 21},
598  {Intrinsic::ctpop, MVT::nxv1i64, 21},
599  {Intrinsic::ctpop, MVT::nxv2i64, 21},
600  {Intrinsic::ctpop, MVT::nxv4i64, 21},
601  {Intrinsic::ctpop, MVT::nxv8i64, 21},
602 };
603 
607  auto *RetTy = ICA.getReturnType();
608  switch (ICA.getID()) {
609  case Intrinsic::ceil:
610  case Intrinsic::floor:
611  case Intrinsic::trunc:
612  case Intrinsic::rint:
613  case Intrinsic::round:
614  case Intrinsic::roundeven: {
615  // These all use the same code.
616  auto LT = getTypeLegalizationCost(RetTy);
617  if (!LT.second.isVector() && TLI->isOperationCustom(ISD::FCEIL, LT.second))
618  return LT.first * 8;
619  break;
620  }
621  case Intrinsic::umin:
622  case Intrinsic::umax:
623  case Intrinsic::smin:
624  case Intrinsic::smax: {
625  auto LT = getTypeLegalizationCost(RetTy);
626  if ((ST->hasVInstructions() && LT.second.isVector()) ||
627  (LT.second.isScalarInteger() && ST->hasStdExtZbb()))
628  return LT.first;
629  break;
630  }
631  case Intrinsic::sadd_sat:
632  case Intrinsic::ssub_sat:
633  case Intrinsic::uadd_sat:
634  case Intrinsic::usub_sat: {
635  auto LT = getTypeLegalizationCost(RetTy);
636  if (ST->hasVInstructions() && LT.second.isVector())
637  return LT.first;
638  break;
639  }
640  // TODO: add more intrinsic
641  case Intrinsic::experimental_stepvector: {
642  unsigned Cost = 1; // vid
643  auto LT = getTypeLegalizationCost(RetTy);
644  return Cost + (LT.first - 1);
645  }
646  case Intrinsic::vp_rint: {
647  // RISC-V target uses at least 5 instructions to lower rounding intrinsics.
648  unsigned Cost = 5;
649  auto LT = getTypeLegalizationCost(RetTy);
650  if (TLI->isOperationCustom(ISD::VP_FRINT, LT.second))
651  return Cost * LT.first;
652  break;
653  }
654  case Intrinsic::vp_nearbyint: {
655  // More one read and one write for fflags than vp_rint.
656  unsigned Cost = 7;
657  auto LT = getTypeLegalizationCost(RetTy);
658  if (TLI->isOperationCustom(ISD::VP_FRINT, LT.second))
659  return Cost * LT.first;
660  break;
661  }
662  }
663 
664  if (ST->hasVInstructions() && RetTy->isVectorTy()) {
665  auto LT = getTypeLegalizationCost(RetTy);
666  if (const auto *Entry = CostTableLookup(VectorIntrinsicCostTable,
667  ICA.getID(), LT.second))
668  return LT.first * Entry->Cost;
669  }
670 
672 }
673 
675  Type *Src,
678  const Instruction *I) {
679  if (isa<VectorType>(Dst) && isa<VectorType>(Src)) {
680  // FIXME: Need to compute legalizing cost for illegal types.
681  if (!isTypeLegal(Src) || !isTypeLegal(Dst))
682  return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
683 
684  // Skip if element size of Dst or Src is bigger than ELEN.
685  if (Src->getScalarSizeInBits() > ST->getELEN() ||
686  Dst->getScalarSizeInBits() > ST->getELEN())
687  return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
688 
689  int ISD = TLI->InstructionOpcodeToISD(Opcode);
690  assert(ISD && "Invalid opcode");
691 
692  // FIXME: Need to consider vsetvli and lmul.
693  int PowDiff = (int)Log2_32(Dst->getScalarSizeInBits()) -
694  (int)Log2_32(Src->getScalarSizeInBits());
695  switch (ISD) {
696  case ISD::SIGN_EXTEND:
697  case ISD::ZERO_EXTEND:
698  if (Src->getScalarSizeInBits() == 1) {
699  // We do not use vsext/vzext to extend from mask vector.
700  // Instead we use the following instructions to extend from mask vector:
701  // vmv.v.i v8, 0
702  // vmerge.vim v8, v8, -1, v0
703  return 2;
704  }
705  return 1;
706  case ISD::TRUNCATE:
707  if (Dst->getScalarSizeInBits() == 1) {
708  // We do not use several vncvt to truncate to mask vector. So we could
709  // not use PowDiff to calculate it.
710  // Instead we use the following instructions to truncate to mask vector:
711  // vand.vi v8, v8, 1
712  // vmsne.vi v0, v8, 0
713  return 2;
714  }
715  [[fallthrough]];
716  case ISD::FP_EXTEND:
717  case ISD::FP_ROUND:
718  // Counts of narrow/widen instructions.
719  return std::abs(PowDiff);
720  case ISD::FP_TO_SINT:
721  case ISD::FP_TO_UINT:
722  case ISD::SINT_TO_FP:
723  case ISD::UINT_TO_FP:
724  if (Src->getScalarSizeInBits() == 1 || Dst->getScalarSizeInBits() == 1) {
725  // The cost of convert from or to mask vector is different from other
726  // cases. We could not use PowDiff to calculate it.
727  // For mask vector to fp, we should use the following instructions:
728  // vmv.v.i v8, 0
729  // vmerge.vim v8, v8, -1, v0
730  // vfcvt.f.x.v v8, v8
731 
732  // And for fp vector to mask, we use:
733  // vfncvt.rtz.x.f.w v9, v8
734  // vand.vi v8, v9, 1
735  // vmsne.vi v0, v8, 0
736  return 3;
737  }
738  if (std::abs(PowDiff) <= 1)
739  return 1;
740  // Backend could lower (v[sz]ext i8 to double) to vfcvt(v[sz]ext.f8 i8),
741  // so it only need two conversion.
742  if (Src->isIntOrIntVectorTy())
743  return 2;
744  // Counts of narrow/widen instructions.
745  return std::abs(PowDiff);
746  }
747  }
748  return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
749 }
750 
751 unsigned RISCVTTIImpl::getEstimatedVLFor(VectorType *Ty) {
752  if (isa<ScalableVectorType>(Ty)) {
753  const unsigned EltSize = DL.getTypeSizeInBits(Ty->getElementType());
754  const unsigned MinSize = DL.getTypeSizeInBits(Ty).getKnownMinValue();
756  return RISCVTargetLowering::computeVLMAX(VectorBits, EltSize, MinSize);
757  }
758  return cast<FixedVectorType>(Ty)->getNumElements();
759 }
760 
763  bool IsUnsigned,
765  if (isa<FixedVectorType>(Ty) && !ST->useRVVForFixedLengthVectors())
766  return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
767 
768  // Skip if scalar size of Ty is bigger than ELEN.
769  if (Ty->getScalarSizeInBits() > ST->getELEN())
770  return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
771 
772  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
773  if (Ty->getElementType()->isIntegerTy(1))
774  // vcpop sequences, see vreduction-mask.ll. umax, smin actually only
775  // cost 2, but we don't have enough info here so we slightly over cost.
776  return (LT.first - 1) + 3;
777 
778  // IR Reduction is composed by two vmv and one rvv reduction instruction.
779  InstructionCost BaseCost = 2;
780  unsigned VL = getEstimatedVLFor(Ty);
781  return (LT.first - 1) + BaseCost + Log2_32_Ceil(VL);
782 }
783 
788  if (isa<FixedVectorType>(Ty) && !ST->useRVVForFixedLengthVectors())
789  return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
790 
791  // Skip if scalar size of Ty is bigger than ELEN.
792  if (Ty->getScalarSizeInBits() > ST->getELEN())
793  return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
794 
795  int ISD = TLI->InstructionOpcodeToISD(Opcode);
796  assert(ISD && "Invalid opcode");
797 
798  if (ISD != ISD::ADD && ISD != ISD::OR && ISD != ISD::XOR && ISD != ISD::AND &&
799  ISD != ISD::FADD)
800  return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
801 
802  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
803  if (Ty->getElementType()->isIntegerTy(1))
804  // vcpop sequences, see vreduction-mask.ll
805  return (LT.first - 1) + (ISD == ISD::AND ? 3 : 2);
806 
807  // IR Reduction is composed by two vmv and one rvv reduction instruction.
808  InstructionCost BaseCost = 2;
809  unsigned VL = getEstimatedVLFor(Ty);
811  return (LT.first - 1) + BaseCost + VL;
812  return (LT.first - 1) + BaseCost + Log2_32_Ceil(VL);
813 }
814 
816  unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy,
818  if (isa<FixedVectorType>(ValTy) && !ST->useRVVForFixedLengthVectors())
819  return BaseT::getExtendedReductionCost(Opcode, IsUnsigned, ResTy, ValTy,
820  FMF, CostKind);
821 
822  // Skip if scalar size of ResTy is bigger than ELEN.
823  if (ResTy->getScalarSizeInBits() > ST->getELEN())
824  return BaseT::getExtendedReductionCost(Opcode, IsUnsigned, ResTy, ValTy,
825  FMF, CostKind);
826 
827  if (Opcode != Instruction::Add && Opcode != Instruction::FAdd)
828  return BaseT::getExtendedReductionCost(Opcode, IsUnsigned, ResTy, ValTy,
829  FMF, CostKind);
830 
831  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
832 
833  if (ResTy->getScalarSizeInBits() != 2 * LT.second.getScalarSizeInBits())
834  return BaseT::getExtendedReductionCost(Opcode, IsUnsigned, ResTy, ValTy,
835  FMF, CostKind);
836 
837  return (LT.first - 1) +
838  getArithmeticReductionCost(Opcode, ValTy, FMF, CostKind);
839 }
840 
842  TTI::OperandValueInfo OpInfo,
844  assert(OpInfo.isConstant() && "non constant operand?");
845  if (!isa<VectorType>(Ty))
846  // FIXME: We need to account for immediate materialization here, but doing
847  // a decent job requires more knowledge about the immediate than we
848  // currently have here.
849  return 0;
850 
851  if (OpInfo.isUniform())
852  // vmv.x.i, vmv.v.x, or vfmv.v.f
853  // We ignore the cost of the scalar constant materialization to be consistent
854  // with how we treat scalar constants themselves just above.
855  return 1;
856 
857  // Add a cost of address generation + the cost of the vector load. The
858  // address is expected to be a PC relative offset to a constant pool entry
859  // using auipc/addi.
861  /*AddressSpace=*/0, CostKind);
862 }
863 
864 
866  MaybeAlign Alignment,
867  unsigned AddressSpace,
869  TTI::OperandValueInfo OpInfo,
870  const Instruction *I) {
871  InstructionCost Cost = 0;
872  if (Opcode == Instruction::Store && OpInfo.isConstant())
873  Cost += getStoreImmCost(Src, OpInfo, CostKind);
874  return Cost + BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
875  CostKind, OpInfo, I);
876 }
877 
879  Type *CondTy,
880  CmpInst::Predicate VecPred,
882  const Instruction *I) {
884  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
885  I);
886 
887  if (isa<FixedVectorType>(ValTy) && !ST->useRVVForFixedLengthVectors())
888  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
889  I);
890 
891  // Skip if scalar size of ValTy is bigger than ELEN.
892  if (ValTy->isVectorTy() && ValTy->getScalarSizeInBits() > ST->getELEN())
893  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
894  I);
895 
896  if (Opcode == Instruction::Select && ValTy->isVectorTy()) {
897  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
898  if (CondTy->isVectorTy()) {
899  if (ValTy->getScalarSizeInBits() == 1) {
900  // vmandn.mm v8, v8, v9
901  // vmand.mm v9, v0, v9
902  // vmor.mm v0, v9, v8
903  return LT.first * 3;
904  }
905  // vselect and max/min are supported natively.
906  return LT.first * 1;
907  }
908 
909  if (ValTy->getScalarSizeInBits() == 1) {
910  // vmv.v.x v9, a0
911  // vmsne.vi v9, v9, 0
912  // vmandn.mm v8, v8, v9
913  // vmand.mm v9, v0, v9
914  // vmor.mm v0, v9, v8
915  return LT.first * 5;
916  }
917 
918  // vmv.v.x v10, a0
919  // vmsne.vi v0, v10, 0
920  // vmerge.vvm v8, v9, v8, v0
921  return LT.first * 3;
922  }
923 
924  if ((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
925  ValTy->isVectorTy()) {
926  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
927 
928  // Support natively.
929  if (CmpInst::isIntPredicate(VecPred))
930  return LT.first * 1;
931 
932  // If we do not support the input floating point vector type, use the base
933  // one which will calculate as:
934  // ScalarizeCost + Num * Cost for fixed vector,
935  // InvalidCost for scalable vector.
936  if ((ValTy->getScalarSizeInBits() == 16 && !ST->hasVInstructionsF16()) ||
937  (ValTy->getScalarSizeInBits() == 32 && !ST->hasVInstructionsF32()) ||
938  (ValTy->getScalarSizeInBits() == 64 && !ST->hasVInstructionsF64()))
939  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
940  I);
941  switch (VecPred) {
942  // Support natively.
943  case CmpInst::FCMP_OEQ:
944  case CmpInst::FCMP_OGT:
945  case CmpInst::FCMP_OGE:
946  case CmpInst::FCMP_OLT:
947  case CmpInst::FCMP_OLE:
948  case CmpInst::FCMP_UNE:
949  return LT.first * 1;
950  // TODO: Other comparisons?
951  default:
952  break;
953  }
954  }
955 
956  // TODO: Add cost for scalar type.
957 
958  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
959 }
960 
962  unsigned Index) {
963  assert(Val->isVectorTy() && "This must be a vector type");
964 
965  if (Opcode != Instruction::ExtractElement &&
966  Opcode != Instruction::InsertElement)
967  return BaseT::getVectorInstrCost(Opcode, Val, Index);
968 
969  // Legalize the type.
970  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Val);
971 
972  // This type is legalized to a scalar type.
973  if (!LT.second.isVector())
974  return 0;
975 
976  // For unsupported scalable vector.
977  if (LT.second.isScalableVector() && !LT.first.isValid())
978  return LT.first;
979 
980  if (!isTypeLegal(Val))
981  return BaseT::getVectorInstrCost(Opcode, Val, Index);
982 
983  // In RVV, we could use vslidedown + vmv.x.s to extract element from vector
984  // and vslideup + vmv.s.x to insert element to vector.
985  unsigned BaseCost = 1;
986  // When insertelement we should add the index with 1 as the input of vslideup.
987  unsigned SlideCost = Opcode == Instruction::InsertElement ? 2 : 1;
988 
989  if (Index != -1U) {
990  // The type may be split. For fixed-width vectors we can normalize the
991  // index to the new type.
992  if (LT.second.isFixedLengthVector()) {
993  unsigned Width = LT.second.getVectorNumElements();
994  Index = Index % Width;
995  }
996 
997  // We could extract/insert the first element without vslidedown/vslideup.
998  if (Index == 0)
999  SlideCost = 0;
1000  else if (Opcode == Instruction::InsertElement)
1001  SlideCost = 1; // With a constant index, we do not need to use addi.
1002  }
1003 
1004  // Mask vector extract/insert element is different from normal case.
1005  if (Val->getScalarSizeInBits() == 1) {
1006  // For extractelement, we need the following instructions:
1007  // vmv.v.i v8, 0
1008  // vmerge.vim v8, v8, 1, v0
1009  // vsetivli zero, 1, e8, m2, ta, mu (not count)
1010  // vslidedown.vx v8, v8, a0
1011  // vmv.x.s a0, v8
1012 
1013  // For insertelement, we need the following instructions:
1014  // vsetvli a2, zero, e8, m1, ta, mu (not count)
1015  // vmv.s.x v8, a0
1016  // vmv.v.i v9, 0
1017  // vmerge.vim v9, v9, 1, v0
1018  // addi a0, a1, 1
1019  // vsetvli zero, a0, e8, m1, tu, mu (not count)
1020  // vslideup.vx v9, v8, a1
1021  // vsetvli a0, zero, e8, m1, ta, mu (not count)
1022  // vand.vi v8, v9, 1
1023  // vmsne.vi v0, v8, 0
1024 
1025  // TODO: should we count these special vsetvlis?
1026  BaseCost = Opcode == Instruction::InsertElement ? 5 : 3;
1027  }
1028  // Extract i64 in the target that has XLEN=32 need more instruction.
1029  if (Val->getScalarType()->isIntegerTy() &&
1030  ST->getXLen() < Val->getScalarSizeInBits()) {
1031  // For extractelement, we need the following instructions:
1032  // vsetivli zero, 1, e64, m1, ta, mu (not count)
1033  // vslidedown.vx v8, v8, a0
1034  // vmv.x.s a0, v8
1035  // li a1, 32
1036  // vsrl.vx v8, v8, a1
1037  // vmv.x.s a1, v8
1038 
1039  // For insertelement, we need the following instructions:
1040  // vsetivli zero, 2, e32, m4, ta, mu (not count)
1041  // vmv.v.i v12, 0
1042  // vslide1up.vx v16, v12, a1
1043  // vslide1up.vx v12, v16, a0
1044  // addi a0, a2, 1
1045  // vsetvli zero, a0, e64, m4, tu, mu (not count)
1046  // vslideup.vx v8, v12, a2
1047 
1048  // TODO: should we count these special vsetvlis?
1049  BaseCost = Opcode == Instruction::InsertElement ? 3 : 4;
1050  }
1051  return BaseCost + SlideCost;
1052 }
1053 
1055  unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
1057  ArrayRef<const Value *> Args, const Instruction *CxtI) {
1058 
1059  // TODO: Handle more cost kinds.
1061  return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,
1062  Args, CxtI);
1063 
1064  if (isa<FixedVectorType>(Ty) && !ST->useRVVForFixedLengthVectors())
1065  return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,
1066  Args, CxtI);
1067 
1068  // Skip if scalar size of Ty is bigger than ELEN.
1069  if (isa<VectorType>(Ty) && Ty->getScalarSizeInBits() > ST->getELEN())
1070  return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,
1071  Args, CxtI);
1072 
1073  // Legalize the type.
1074  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
1075 
1076  // TODO: Handle scalar type.
1077  if (!LT.second.isVector())
1078  return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,
1079  Args, CxtI);
1080 
1081 
1082  auto getConstantMatCost =
1083  [&](unsigned Operand, TTI::OperandValueInfo OpInfo) -> InstructionCost {
1084  if (OpInfo.isUniform() && TLI->canSplatOperand(Opcode, Operand))
1085  // Two sub-cases:
1086  // * Has a 5 bit immediate operand which can be splatted.
1087  // * Has a larger immediate which must be materialized in scalar register
1088  // We return 0 for both as we currently ignore the cost of materializing
1089  // scalar constants in GPRs.
1090  return 0;
1091 
1092  // Add a cost of address generation + the cost of the vector load. The
1093  // address is expected to be a PC relative offset to a constant pool entry
1094  // using auipc/addi.
1096  /*AddressSpace=*/0, CostKind);
1097  };
1098 
1099  // Add the cost of materializing any constant vectors required.
1100  InstructionCost ConstantMatCost = 0;
1101  if (Op1Info.isConstant())
1102  ConstantMatCost += getConstantMatCost(0, Op1Info);
1103  if (Op2Info.isConstant())
1104  ConstantMatCost += getConstantMatCost(1, Op2Info);
1105 
1106  switch (TLI->InstructionOpcodeToISD(Opcode)) {
1107  case ISD::ADD:
1108  case ISD::SUB:
1109  case ISD::AND:
1110  case ISD::OR:
1111  case ISD::XOR:
1112  case ISD::SHL:
1113  case ISD::SRL:
1114  case ISD::SRA:
1115  case ISD::MUL:
1116  case ISD::MULHS:
1117  case ISD::MULHU:
1118  case ISD::FADD:
1119  case ISD::FSUB:
1120  case ISD::FMUL:
1121  case ISD::FNEG: {
1122  return ConstantMatCost + getLMULCost(LT.second) * LT.first * 1;
1123  }
1124  default:
1125  return ConstantMatCost +
1126  BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,
1127  Args, CxtI);
1128  }
1129 }
1130 
1134  // TODO: More tuning on benchmarks and metrics with changes as needed
1135  // would apply to all settings below to enable performance.
1136 
1137 
1138  if (ST->enableDefaultUnroll())
1139  return BasicTTIImplBase::getUnrollingPreferences(L, SE, UP, ORE);
1140 
1141  // Enable Upper bound unrolling universally, not dependant upon the conditions
1142  // below.
1143  UP.UpperBound = true;
1144 
1145  // Disable loop unrolling for Oz and Os.
1146  UP.OptSizeThreshold = 0;
1147  UP.PartialOptSizeThreshold = 0;
1148  if (L->getHeader()->getParent()->hasOptSize())
1149  return;
1150 
1151  SmallVector<BasicBlock *, 4> ExitingBlocks;
1152  L->getExitingBlocks(ExitingBlocks);
1153  LLVM_DEBUG(dbgs() << "Loop has:\n"
1154  << "Blocks: " << L->getNumBlocks() << "\n"
1155  << "Exit blocks: " << ExitingBlocks.size() << "\n");
1156 
1157  // Only allow another exit other than the latch. This acts as an early exit
1158  // as it mirrors the profitability calculation of the runtime unroller.
1159  if (ExitingBlocks.size() > 2)
1160  return;
1161 
1162  // Limit the CFG of the loop body for targets with a branch predictor.
1163  // Allowing 4 blocks permits if-then-else diamonds in the body.
1164  if (L->getNumBlocks() > 4)
1165  return;
1166 
1167  // Don't unroll vectorized loops, including the remainder loop
1168  if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))
1169  return;
1170 
1171  // Scan the loop: don't unroll loops with calls as this could prevent
1172  // inlining.
1173  InstructionCost Cost = 0;
1174  for (auto *BB : L->getBlocks()) {
1175  for (auto &I : *BB) {
1176  // Initial setting - Don't unroll loops containing vectorized
1177  // instructions.
1178  if (I.getType()->isVectorTy())
1179  return;
1180 
1181  if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
1182  if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
1183  if (!isLoweredToCall(F))
1184  continue;
1185  }
1186  return;
1187  }
1188 
1189  SmallVector<const Value *> Operands(I.operand_values());
1192  }
1193  }
1194 
1195  LLVM_DEBUG(dbgs() << "Cost of loop: " << Cost << "\n");
1196 
1197  UP.Partial = true;
1198  UP.Runtime = true;
1199  UP.UnrollRemainder = true;
1200  UP.UnrollAndJam = true;
1202 
1203  // Force unrolling small loops can be very useful because of the branch
1204  // taken cost of the backedge.
1205  if (Cost < 12)
1206  UP.Force = true;
1207 }
1208 
1211  BaseT::getPeelingPreferences(L, SE, PP);
1212 }
1213 
1215  TypeSize Size = DL.getTypeSizeInBits(Ty);
1216  if (Ty->isVectorTy()) {
1217  if (Size.isScalable() && ST->hasVInstructions())
1218  return divideCeil(Size.getKnownMinValue(), RISCV::RVVBitsPerBlock);
1219 
1220  if (ST->useRVVForFixedLengthVectors())
1221  return divideCeil(Size, ST->getRealMinVLen());
1222  }
1223 
1224  return BaseT::getRegUsageForType(Ty);
1225 }
1226 
1227 unsigned RISCVTTIImpl::getMaximumVF(unsigned ElemWidth, unsigned Opcode) const {
1228  // This interface is currently only used by SLP. Returning 1 (which is the
1229  // default value for SLPMaxVF) disables SLP. We currently have a cost modeling
1230  // problem w/ constant materialization which causes SLP to perform majorly
1231  // unprofitable transformations.
1232  // TODO: Figure out constant materialization cost modeling and remove.
1233  return SLPMaxVF;
1234 }
llvm::ISD::SUB
@ SUB
Definition: ISDOpcodes.h:240
llvm::RISCVTTIImpl::getRegUsageForType
unsigned getRegUsageForType(Type *Ty)
Definition: RISCVTargetTransformInfo.cpp:1214
llvm::InstructionCost
Definition: InstructionCost.h:30
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::TargetTransformInfo::PSK_FastHardware
@ PSK_FastHardware
Definition: TargetTransformInfo.h:586
llvm::MVT::nxv4i64
@ nxv4i64
Definition: MachineValueType.h:238
llvm::TargetTransformInfo::UnrollingPreferences::PartialOptSizeThreshold
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
Definition: TargetTransformInfo.h:446
llvm::RISCVTTIImpl::getMaxVScale
Optional< unsigned > getMaxVScale() const
Definition: RISCVTargetTransformInfo.cpp:199
llvm::RISCVMatInt::getIntMatCost
int getIntMatCost(const APInt &Val, unsigned Size, const FeatureBitset &ActiveFeatures, bool CompressionCost)
Definition: RISCVMatInt.cpp:380
llvm::BasicTTIImplBase< RISCVTTIImpl >::DL
const DataLayout & DL
Definition: TargetTransformInfoImpl.h:37
llvm::TargetTransformInfo::UnrollingPreferences::Runtime
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Definition: TargetTransformInfo.h:474
llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition: TargetTransformInfo.h:218
llvm::RISCVTTIImpl::shouldExpandReduction
bool shouldExpandReduction(const IntrinsicInst *II) const
Definition: RISCVTargetTransformInfo.cpp:185
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::RISCVTTIImpl::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Definition: RISCVTargetTransformInfo.cpp:605
llvm::CostTblEntryT
Cost Table Entry.
Definition: CostTable.h:25
llvm::MVT::nxv1i32
@ nxv1i32
Definition: MachineValueType.h:229
llvm::RISCVTTIImpl::getIntImmCostIntrin
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
Definition: RISCVTargetTransformInfo.cpp:172
llvm::RISCVTTIImpl::getPopcntSupport
TargetTransformInfo::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
Definition: RISCVTargetTransformInfo.cpp:180
llvm::MVT::nxv2f64
@ nxv2f64
Definition: MachineValueType.h:267
llvm::RISCVTTIImpl::getMinMaxReductionCost
InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, TTI::TargetCostKind CostKind)
Definition: RISCVTargetTransformInfo.cpp:762
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:667
llvm::RISCVSubtarget::useRVVForFixedLengthVectors
bool useRVVForFixedLengthVectors() const
Definition: RISCVSubtarget.cpp:200
llvm::RISCVTTIImpl::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
Definition: RISCVTargetTransformInfo.cpp:1054
llvm::RISCVSubtarget::hasVInstructionsF16
bool hasVInstructionsF16() const
Definition: RISCVSubtarget.h:245
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:720
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
ceil
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g ceil
Definition: README-FPStack.txt:54
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
llvm::RISCVTTIImpl::getMaskedMemoryOpCost
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
Definition: RISCVTargetTransformInfo.cpp:320
llvm::TargetTransformInfo::PopcntSupportKind
PopcntSupportKind
Flags indicating the kind of support for population count.
Definition: TargetTransformInfo.h:586
llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:53
llvm::MVT::nxv2f32
@ nxv2f32
Definition: MachineValueType.h:261
llvm::DataLayout::getTypeSizeInBits
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:673
llvm::BasicTTIImplBase< RISCVTTIImpl >::isTypeLegal
bool isTypeLegal(Type *Ty)
Definition: BasicTTIImpl.h:392
llvm::TargetTransformInfoImplCRTPBase< RISCVTTIImpl >::getInstructionCost
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)
Definition: TargetTransformInfoImpl.h:1015
llvm::MVT::v16f64
@ v16f64
Definition: MachineValueType.h:194
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:328
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::TargetLoweringBase::isOperationCustom
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
Definition: TargetLowering.h:1225
llvm::MVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: MachineValueType.h:386
llvm::MVT::nxv1f64
@ nxv1f64
Definition: MachineValueType.h:266
llvm::RISCVSubtarget::hasVInstructions
bool hasVInstructions() const
Definition: RISCVSubtarget.h:243
llvm::RISCVSubtarget::getRealMaxVLen
unsigned getRealMaxVLen() const
Definition: RISCVSubtarget.h:230
llvm::IntrinsicCostAttributes::getReturnType
Type * getReturnType() const
Definition: TargetTransformInfo.h:153
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
llvm::TargetTransformInfo::RGK_Scalar
@ RGK_Scalar
Definition: TargetTransformInfo.h:965
llvm::TargetTransformInfo::UnrollingPreferences::UnrollAndJamInnerLoopThreshold
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
Definition: TargetTransformInfo.h:493
llvm::RISCVTTIImpl::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: RISCVTargetTransformInfo.cpp:961
llvm::RISCVTTIImpl::isLegalMaskedScatter
bool isLegalMaskedScatter(Type *DataType, Align Alignment)
Definition: RISCVTargetTransformInfo.h:225
llvm::MVT::nxv2i64
@ nxv2i64
Definition: MachineValueType.h:237
llvm::TargetTransformInfo::UnrollingPreferences::UnrollRemainder
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
Definition: TargetTransformInfo.h:486
llvm::ISD::FP_TO_SINT
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:819
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
SLPMaxVF
static cl::opt< unsigned > SLPMaxVF("riscv-v-slp-max-vf", cl::desc("Result used for getMaximumVF query which is used exclusively by " "SLP vectorizer. Defaults to 1 which disables SLP."), cl::init(1), cl::Hidden)
llvm::RISCVTTIImpl::getArithmeticReductionCost
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, Optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
Definition: RISCVTargetTransformInfo.cpp:785
llvm::TargetTransformInfo::UnrollingPreferences::Partial
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
Definition: TargetTransformInfo.h:470
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:529
llvm::RISCVTTIImpl::isLegalMaskedGather
bool isLegalMaskedGather(Type *DataType, Align Alignment)
Definition: RISCVTargetTransformInfo.h:222
llvm::Optional< unsigned >
llvm::MVT::nxv4f64
@ nxv4f64
Definition: MachineValueType.h:268
llvm::RISCVTTIImpl::getIntImmCost
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
Definition: RISCVTargetTransformInfo.cpp:55
llvm::VectorType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:422
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:458
llvm::MVT::v2f64
@ v2f64
Definition: MachineValueType.h:190
RISCVMatInt.h
llvm::TargetTransformInfo::OperandValueInfo
Definition: TargetTransformInfo.h:924
llvm::CmpInst::FCMP_OGT
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:724
llvm::MVT::nxv8i16
@ nxv8i16
Definition: MachineValueType.h:225
llvm::RISCVTTIImpl::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpdInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
Definition: RISCVTargetTransformInfo.cpp:865
llvm::RISCVTargetLowering::canSplatOperand
bool canSplatOperand(Instruction *I, int Operand) const
Return true if the (vector) instruction I will be lowered to an instruction with a scalar splat opera...
Definition: RISCVISelLowering.cpp:1315
llvm::LoopBase::getNumBlocks
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:202
llvm::APIntOps::umin
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
Definition: APInt.h:2157
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::TargetTransformInfo::SK_Broadcast
@ SK_Broadcast
Broadcast element 0 to all other elements.
Definition: TargetTransformInfo.h:888
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::RISCVTargetLowering::computeVLMAX
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
Definition: RISCVISelLowering.h:566
llvm::MVT::isScalableVector
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
Definition: MachineValueType.h:393
llvm::TargetTransformInfo::requiresOrderedReduction
static bool requiresOrderedReduction(Optional< FastMathFlags > FMF)
A helper function to determine the type of reduction algorithm used for a given Opcode and set of Fas...
Definition: TargetTransformInfo.h:1267
floor
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g floor
Definition: README-FPStack.txt:54
llvm::AArch64CC::LT
@ LT
Definition: AArch64BaseInfo.h:266
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::TargetTransformInfoImplBase::getDataLayout
const DataLayout & getDataLayout() const
Definition: TargetTransformInfoImpl.h:46
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
TargetLowering.h
llvm::RISCVSubtarget::hasVInstructionsF64
bool hasVInstructionsF64() const
Definition: RISCVSubtarget.h:249
llvm::MVT::nxv16f32
@ nxv16f32
Definition: MachineValueType.h:264
llvm::BasicTTIImplBase< RISCVTTIImpl >::getTypeLegalizationCost
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
Definition: BasicTTIImpl.h:789
llvm::MVT::v8f64
@ v8f64
Definition: MachineValueType.h:193
llvm::RISCVTTIImpl::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
Definition: RISCVTargetTransformInfo.cpp:1209
llvm::BasicTTIImplBase::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
Definition: BasicTTIImpl.h:524
llvm::RISCVTTIImpl::getRegisterBitWidth
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
Definition: RISCVTargetTransformInfo.cpp:214
llvm::PowerOf2Floor
uint64_t PowerOf2Floor(uint64_t A)
Returns the power of two which is less than or equal to the given value.
Definition: MathExtras.h:623
llvm::ISD::ZERO_EXTEND
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:763
llvm::BasicTTIImplBase< RISCVTTIImpl >::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: BasicTTIImpl.h:1187
InlinePriorityMode::Cost
@ Cost
llvm::TargetTransformInfo::ShuffleKind
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Definition: TargetTransformInfo.h:887
llvm::TargetTransformInfo::CastContextHint
CastContextHint
Represents a hint about the context in which a cast is used.
Definition: TargetTransformInfo.h:1139
llvm::TargetTransformInfo::UnrollingPreferences::Force
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
Definition: TargetTransformInfo.h:482
round
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:56
llvm::isShiftedMask_64
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:452
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::RISCVTTIImpl::getSpliceCost
InstructionCost getSpliceCost(VectorType *Tp, int Index)
Definition: RISCVTargetTransformInfo.cpp:234
llvm::BasicTTIImplBase< RISCVTTIImpl >::getArithmeticReductionCost
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, Optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
Definition: BasicTTIImpl.h:2284
llvm::ISD::TRUNCATE
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:769
llvm::ISD::SRA
@ SRA
Definition: ISDOpcodes.h:692
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:246
llvm::RISCVSubtarget::getELEN
unsigned getELEN() const
Definition: RISCVSubtarget.h:222
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:547
llvm::BasicTTIImplBase< RISCVTTIImpl >::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
Definition: BasicTTIImpl.h:825
llvm::TargetTransformInfo::UnrollingPreferences::UnrollAndJam
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
Definition: TargetTransformInfo.h:488
llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:188
llvm::MVT::v4f64
@ v4f64
Definition: MachineValueType.h:192
llvm::BasicTTIImplBase< RISCVTTIImpl >::getRegUsageForType
unsigned getRegUsageForType(Type *Ty)
Definition: BasicTTIImpl.h:397
llvm::Instruction
Definition: Instruction.h:42
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:189
llvm::DataLayout::getABITypeAlign
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:829
llvm::Operator::getOpcode
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition: Operator.h:42
llvm::ISD::SINT_TO_FP
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:773
llvm::MVT::nxv4i8
@ nxv4i8
Definition: MachineValueType.h:216
llvm::LoopBase::getExitingBlocks
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:33
llvm::MVT::nxv4f32
@ nxv4f32
Definition: MachineValueType.h:262
llvm::CmpInst::FCMP_OEQ
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:723
llvm::RISCVSubtarget::hasStdExtZbb
bool hasStdExtZbb() const
Definition: RISCVSubtarget.h:168
llvm::CmpInst::FCMP_OLT
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:726
llvm::ISD::AND
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:666
llvm::RISCVTTIImpl::getShuffleCost
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=None)
Definition: RISCVTargetTransformInfo.cpp:243
llvm::RISCVSubtarget::enableDefaultUnroll
bool enableDefaultUnroll() const
Definition: RISCVSubtarget.h:204
Align
uint64_t Align
Definition: ELFObjHandler.cpp:82
llvm::TargetTransformInfo::RGK_FixedWidthVector
@ RGK_FixedWidthVector
Definition: TargetTransformInfo.h:965
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::TargetTransformInfo::OperandValueInfo::isUniform
bool isUniform() const
Definition: TargetTransformInfo.h:931
llvm::RISCVSubtarget::getRealMinVLen
unsigned getRealMinVLen() const
Definition: RISCVSubtarget.h:226
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
llvm::BasicTTIImplBase< RISCVTTIImpl >::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:1137
llvm::BasicTTIImplBase< RISCVTTIImpl >::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:969
llvm::LinearPolySize< TypeSize >::getFixed
static TypeSize getFixed(ScalarTy MinVal)
Definition: TypeSize.h:283
llvm::MVT::v4i16
@ v4i16
Definition: MachineValueType.h:100
llvm::MVT::v4i8
@ v4i8
Definition: MachineValueType.h:87
llvm::IntrinsicCostAttributes
Definition: TargetTransformInfo.h:120
llvm::MVT::nxv4i16
@ nxv4i16
Definition: MachineValueType.h:224
llvm::Log2_32_Ceil
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:560
llvm::Instruction::isCommutative
bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
Definition: Instruction.cpp:807
llvm::TargetTransformInfo::PSK_Software
@ PSK_Software
Definition: TargetTransformInfo.h:586
llvm::RISCVTTIImpl::getGatherScatterOpCost
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
Definition: RISCVTargetTransformInfo.cpp:331
Operands
mir Rename Register Operands
Definition: MIRNamerPass.cpp:74
llvm::MVT::nxv16i16
@ nxv16i16
Definition: MachineValueType.h:226
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:210
llvm::ISD::FADD
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:390
llvm::TargetTransformInfo::SK_Reverse
@ SK_Reverse
Reverse the order of the vector.
Definition: TargetTransformInfo.h:889
llvm::MVT::v2i8
@ v2i8
Definition: MachineValueType.h:86
llvm::MVT::v4i64
@ v4i64
Definition: MachineValueType.h:133
llvm::RISCV::RVVBitsPerBlock
static constexpr unsigned RVVBitsPerBlock
Definition: TargetParser.h:161
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
llvm::MVT::nxv16i8
@ nxv16i8
Definition: MachineValueType.h:218
llvm::cl::opt
Definition: CommandLine.h:1412
llvm::RISCVTTIImpl::getVScaleForTuning
Optional< unsigned > getVScaleForTuning() const
Definition: RISCVTargetTransformInfo.cpp:205
llvm::divideCeil
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:683
llvm::MVT::nxv8i64
@ nxv8i64
Definition: MachineValueType.h:239
llvm::MVT::v16i8
@ v16i8
Definition: MachineValueType.h:89
llvm::RISCVVType::decodeVLMUL
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
Definition: RISCVBaseInfo.cpp:147
llvm::CostTableLookup
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType >> Tbl, int ISD, MVT Ty)
Find in cost table.
Definition: CostTable.h:35
VectorIntrinsicCostTable
static const CostTblEntry VectorIntrinsicCostTable[]
Definition: RISCVTargetTransformInfo.cpp:360
llvm::MVT::v16i16
@ v16i16
Definition: MachineValueType.h:102
llvm::MVT::v2i64
@ v2i64
Definition: MachineValueType.h:131
uint64_t
llvm::ISD::FP_TO_UINT
@ FP_TO_UINT
Definition: ISDOpcodes.h:820
llvm::MVT::v16f32
@ v16f32
Definition: MachineValueType.h:179
llvm::BasicTTIImplBase< RISCVTTIImpl >::getVScaleForTuning
Optional< unsigned > getVScaleForTuning() const
Definition: BasicTTIImpl.h:702
llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition: TargetTransformInfo.h:417
I
#define I(x, y, z)
Definition: MD5.cpp:58
getCalledFunction
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
Definition: MemoryBuiltins.cpp:154
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:447
llvm::BasicTTIImplBase< RISCVTTIImpl >::getShuffleCost
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=None)
Definition: BasicTTIImpl.h:939
llvm::RISCVTTIImpl::isLegalMaskedLoadStore
bool isLegalMaskedLoadStore(Type *DataType, Align Alignment)
Definition: RISCVTargetTransformInfo.h:173
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::MVT::v4f32
@ v4f32
Definition: MachineValueType.h:170
llvm::RISCVTTIImpl::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
Definition: RISCVTargetTransformInfo.cpp:1131
llvm::RISCVTTIImpl::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: RISCVTargetTransformInfo.cpp:674
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::ISD::MULHS
@ MULHS
Definition: ISDOpcodes.h:638
llvm::MVT::nxv16i32
@ nxv16i32
Definition: MachineValueType.h:233
llvm::MVT::getSizeInBits
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
Definition: MachineValueType.h:919
RVVRegisterWidthLMUL
static cl::opt< unsigned > RVVRegisterWidthLMUL("riscv-v-register-bit-width-lmul", cl::desc("The LMUL to use for getRegisterBitWidth queries. Affects LMUL used " "by autovectorized code. Fractional LMULs are not supported."), cl::init(1), cl::Hidden)
llvm::CmpInst::FCMP_OGE
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:725
llvm::MVT::nxv4i32
@ nxv4i32
Definition: MachineValueType.h:231
llvm::BasicTTIImplBase< RISCVTTIImpl >::getMaxVScale
Optional< unsigned > getMaxVScale() const
Definition: BasicTTIImpl.h:701
llvm::TargetTransformInfoImplBase::isLoweredToCall
bool isLoweredToCall(const Function *F) const
Definition: TargetTransformInfoImpl.h:123
Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:60
llvm::BasicTTIImplBase< RISCVTTIImpl >::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:1231
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:31
llvm::TargetTransformInfo::SK_Splice
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
Definition: TargetTransformInfo.h:900
llvm::LinearPolySize::getKnownMinValue
ScalarTy getKnownMinValue() const
Returns the minimum value this size can represent.
Definition: TypeSize.h:296
llvm::MVT::nxv1i64
@ nxv1i64
Definition: MachineValueType.h:236
llvm::RISCVSubtarget::hasStdExtZba
bool hasStdExtZba() const
Definition: RISCVSubtarget.h:167
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::CmpInst::isIntPredicate
bool isIntPredicate() const
Definition: InstrTypes.h:828
llvm::SystemZ::VectorBits
const unsigned VectorBits
Definition: SystemZ.h:154
llvm::MVT::nxv2i32
@ nxv2i32
Definition: MachineValueType.h:230
llvm::TargetTransformInfo::TCC_Free
@ TCC_Free
Expected to fold away in lowering.
Definition: TargetTransformInfo.h:244
llvm::APIntOps::smin
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition: APInt.h:2147
llvm::MVT::nxv1f32
@ nxv1f32
Definition: MachineValueType.h:260
llvm::RISCVTTIImpl::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: RISCVTargetTransformInfo.cpp:878
llvm::ArrayRef< int >
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
Mul
BinaryOperator * Mul
Definition: X86PartialReduction.cpp:70
llvm::RISCVTTIImpl::getExtendedReductionCost
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, Optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
Definition: RISCVTargetTransformInfo.cpp:815
llvm::countTrailingZeros
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: MathExtras.h:152
llvm::MVT::v2i32
@ v2i32
Definition: MachineValueType.h:110
llvm::TargetTransformInfo::TCK_SizeAndLatency
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Definition: TargetTransformInfo.h:222
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::MVT::v2f32
@ v2f32
Definition: MachineValueType.h:168
CostKind
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
trunc
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g trunc
Definition: README-FPStack.txt:63
llvm::MVT::nxv1i8
@ nxv1i8
Definition: MachineValueType.h:214
llvm::TargetLoweringBase::InstructionOpcodeToISD
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
Definition: TargetLoweringBase.cpp:1785
llvm::MVT::v4i32
@ v4i32
Definition: MachineValueType.h:112
llvm::BasicTTIImplBase< RISCVTTIImpl >::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
Definition: BasicTTIImpl.h:596
llvm::ISD::FMUL
@ FMUL
Definition: ISDOpcodes.h:392
llvm::MVT::v8i64
@ v8i64
Definition: MachineValueType.h:134
llvm::ISD::XOR
@ XOR
Definition: ISDOpcodes.h:668
llvm::Function::hasOptSize
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:645
llvm::MVT::v16i32
@ v16i32
Definition: MachineValueType.h:121
llvm::MCID::Select
@ Select
Definition: MCInstrDesc.h:164
llvm::APIntOps::umax
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition: APInt.h:2162
llvm::MVT::nxv2i16
@ nxv2i16
Definition: MachineValueType.h:223
CostTable.h
llvm::TargetTransformInfo::UnrollingPreferences::UpperBound
bool UpperBound
Allow using trip count upper bound to unroll loops.
Definition: TargetTransformInfo.h:484
llvm::RISCVSubtarget::hasVInstructionsF32
bool hasVInstructionsF32() const
Definition: RISCVSubtarget.h:247
llvm::ISD::FCEIL
@ FCEIL
Definition: ISDOpcodes.h:924
llvm::MVT::v8i16
@ v8i16
Definition: MachineValueType.h:101
llvm::TypeSize
Definition: TypeSize.h:435
llvm::BasicTTIImplBase< RISCVTTIImpl >::getMaskedMemoryOpCost
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
Definition: BasicTTIImpl.h:1275
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:105
llvm::LinearPolySize< TypeSize >::getScalable
static TypeSize getScalable(ScalarTy MinVal)
Definition: TypeSize.h:286
llvm::BasicTTIImplBase< RISCVTTIImpl >::isLegalAddImmediate
bool isLegalAddImmediate(int64_t imm)
Definition: BasicTTIImpl.h:306
llvm::RISCVSubtarget::getXLen
unsigned getXLen() const
Definition: RISCVSubtarget.h:212
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:185
llvm::XCoreISD::LMUL
@ LMUL
Definition: XCoreISelLowering.h:59
llvm::MVT::nxv8i8
@ nxv8i8
Definition: MachineValueType.h:217
llvm::RISCVTargetLowering::getLMUL
static RISCVII::VLMUL getLMUL(MVT VT)
Definition: RISCVISelLowering.cpp:1557
llvm::MVT::v8i32
@ v8i32
Definition: MachineValueType.h:116
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:433
llvm::ISD::UINT_TO_FP
@ UINT_TO_FP
Definition: ISDOpcodes.h:774
llvm::ISD::ADD
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
llvm::MVT::nxv8i32
@ nxv8i32
Definition: MachineValueType.h:232
llvm::TargetTransformInfo::RGK_ScalableVector
@ RGK_ScalableVector
Definition: TargetTransformInfo.h:965
llvm::MVT::nxv1i16
@ nxv1i16
Definition: MachineValueType.h:222
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:46
llvm::InstructionCost::getInvalid
static InstructionCost getInvalid(CostType Val=0)
Definition: InstructionCost.h:74
llvm::BasicTTIImplBase< RISCVTTIImpl >::getExtendedReductionCost
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, Optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
Definition: BasicTTIImpl.h:2357
llvm::RISCVMatInt::Imm
@ Imm
Definition: RISCVMatInt.h:23
llvm::BasicTTIImplBase< RISCVTTIImpl >::getMinMaxReductionCost
InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
Definition: BasicTTIImpl.h:2294
llvm::ISD::FP_EXTEND
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:871
llvm::RISCVTTIImpl::getStoreImmCost
InstructionCost getStoreImmCost(Type *VecTy, TTI::OperandValueInfo OpInfo, TTI::TargetCostKind CostKind)
Return the cost of materializing an immediate for a value operand of a store instruction.
Definition: RISCVTargetTransformInfo.cpp:841
llvm::BasicTTIImplBase< RISCVTTIImpl >::getGatherScatterOpCost
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:1282
llvm::ISD::FSUB
@ FSUB
Definition: ISDOpcodes.h:391
llvm::IntrinsicCostAttributes::getID
Intrinsic::ID getID() const
Definition: TargetTransformInfo.h:151
llvm::ISD::SHL
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:691
llvm::TargetTransformInfo::RegisterKind
RegisterKind
Definition: TargetTransformInfo.h:965
llvm::ISD::MUL
@ MUL
Definition: ISDOpcodes.h:241
llvm::CmpInst::FCMP_UNE
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:736
llvm::BasicTTIImplBase< RISCVTTIImpl >::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
Definition: BasicTTIImpl.h:1435
llvm::getBooleanLoopAttribute
bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
Definition: LoopInfo.cpp:1085
llvm::ISD::SRL
@ SRL
Definition: ISDOpcodes.h:693
canUseShiftPair
static bool canUseShiftPair(Instruction *Inst, const APInt &Imm)
Definition: RISCVTargetTransformInfo.cpp:73
TargetTransformInfo.h
llvm::CmpInst::FCMP_OLE
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:727
llvm::ISD::MULHU
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:637
llvm::TargetTransformInfo::UnrollingPreferences::OptSizeThreshold
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
Definition: TargetTransformInfo.h:439
llvm::ISD::FNEG
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:911
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:394
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
BasicTTIImpl.h
llvm::cl::desc
Definition: CommandLine.h:413
llvm::ISD::SIGN_EXTEND
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:760
llvm::MVT::v8i8
@ v8i8
Definition: MachineValueType.h:88
llvm::TargetTransformInfo::OperandValueInfo::isConstant
bool isConstant() const
Definition: TargetTransformInfo.h:928
llvm::MVT::nxv2i8
@ nxv2i8
Definition: MachineValueType.h:215
llvm::MVT::v8f32
@ v8f32
Definition: MachineValueType.h:174
RISCVTargetTransformInfo.h
llvm::MVT::v2i16
@ v2i16
Definition: MachineValueType.h:98
llvm::MVT::v16i64
@ v16i64
Definition: MachineValueType.h:135
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1297
llvm::RISCVTTIImpl::getMaximumVF
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
Definition: RISCVTargetTransformInfo.cpp:1227
llvm::ISD::FP_ROUND
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:852
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::TargetTransformInfo::TCK_RecipThroughput
@ TCK_RecipThroughput
Reciprocal throughput.
Definition: TargetTransformInfo.h:219
llvm::APIntOps::smax
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Definition: APInt.h:2152
llvm::MVT::nxv8f64
@ nxv8f64
Definition: MachineValueType.h:269
llvm::RISCVTTIImpl::getIntImmCostInst
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
Definition: RISCVTargetTransformInfo.cpp:97
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:39
llvm::MVT::nxv8f32
@ nxv8f32
Definition: MachineValueType.h:263