LLVM  16.0.0git
WebAssemblyTargetTransformInfo.cpp
Go to the documentation of this file.
1 //===-- WebAssemblyTargetTransformInfo.cpp - WebAssembly-specific TTI -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file defines the WebAssembly-specific TargetTransformInfo
11 /// implementation.
12 ///
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/CodeGen/CostTable.h"
17 #include "llvm/Support/Debug.h"
18 using namespace llvm;
19 
20 #define DEBUG_TYPE "wasmtti"
21 
23 WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) const {
24  assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
26 }
27 
28 unsigned WebAssemblyTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
29  unsigned Result = BaseT::getNumberOfRegisters(ClassID);
30 
31  // For SIMD, use at least 16 registers, as a rough guess.
32  bool Vector = (ClassID == 1);
33  if (Vector)
34  Result = std::max(Result, 16u);
35 
36  return Result;
37 }
38 
41  switch (K) {
43  return TypeSize::getFixed(64);
45  return TypeSize::getFixed(getST()->hasSIMD128() ? 128 : 64);
47  return TypeSize::getScalable(0);
48  }
49 
50  llvm_unreachable("Unsupported register kind");
51 }
52 
54  unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
57  const Instruction *CxtI) {
58 
61  Opcode, Ty, CostKind, Op1Info, Op2Info);
62 
63  if (auto *VTy = dyn_cast<VectorType>(Ty)) {
64  switch (Opcode) {
65  case Instruction::LShr:
66  case Instruction::AShr:
67  case Instruction::Shl:
68  // SIMD128's shifts currently only accept a scalar shift count. For each
69  // element, we'll need to extract, op, insert. The following is a rough
70  // approximation.
71  if (!Op2Info.isUniform())
72  Cost =
73  cast<FixedVectorType>(VTy)->getNumElements() *
75  getArithmeticInstrCost(Opcode, VTy->getElementType(), CostKind) +
77  break;
78  }
79  }
80  return Cost;
81 }
82 
84  Type *Val,
85  unsigned Index) {
88 
89  // SIMD128's insert/extract currently only take constant indices.
90  if (Index == -1u)
92 
93  return Cost;
94 }
95 
97  const Function *Callee) const {
98  // Allow inlining only when the Callee has a subset of the Caller's
99  // features. In principle, we should be able to inline regardless of any
100  // features because WebAssembly supports features at module granularity, not
101  // function granularity, but without this restriction it would be possible for
102  // a module to "forget" about features if all the functions that used them
103  // were inlined.
104  const TargetMachine &TM = getTLI()->getTargetMachine();
105 
106  const FeatureBitset &CallerBits =
107  TM.getSubtargetImpl(*Caller)->getFeatureBits();
108  const FeatureBitset &CalleeBits =
109  TM.getSubtargetImpl(*Callee)->getFeatureBits();
110 
111  return (CallerBits & CalleeBits) == CalleeBits;
112 }
113 
116  OptimizationRemarkEmitter *ORE) const {
117  // Scan the loop: don't unroll loops with calls. This is a standard approach
118  // for most (all?) targets.
119  for (BasicBlock *BB : L->blocks())
120  for (Instruction &I : *BB)
121  if (isa<CallInst>(I) || isa<InvokeInst>(I))
122  if (const Function *F = cast<CallBase>(I).getCalledFunction())
123  if (isLoweredToCall(F))
124  return;
125 
126  // The chosen threshold is within the range of 'LoopMicroOpBufferSize' of
127  // the various microarchitectures that use the BasicTTI implementation and
128  // has been selected through heuristics across multiple cores and runtimes.
129  UP.Partial = UP.Runtime = UP.UpperBound = true;
130  UP.PartialThreshold = 30;
131 
132  // Avoid unrolling when optimizing for size.
133  UP.OptSizeThreshold = 0;
135 
136  // Set number of instructions optimized when "back edge"
137  // becomes "fall through" to default value of 2.
138  UP.BEInsns = 2;
139 }
140 
142  return getST()->hasTailCall();
143 }
llvm::InstructionCost
Definition: InstructionCost.h:29
llvm::TargetTransformInfo::PSK_FastHardware
@ PSK_FastHardware
Definition: TargetTransformInfo.h:583
llvm::TargetTransformInfo::UnrollingPreferences::BEInsns
unsigned BEInsns
Definition: TargetTransformInfo.h:464
llvm::TargetTransformInfo::UnrollingPreferences::PartialOptSizeThreshold
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
Definition: TargetTransformInfo.h:443
llvm::TargetTransformInfo::UnrollingPreferences::Runtime
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Definition: TargetTransformInfo.h:471
llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition: TargetTransformInfo.h:217
llvm::TargetTransformInfo::TCC_Expensive
@ TCC_Expensive
The cost of a 'div' instruction on x86.
Definition: TargetTransformInfo.h:245
llvm::TargetTransformInfo::UnrollingPreferences::PartialThreshold
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
Definition: TargetTransformInfo.h:439
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::WebAssemblySubtarget::hasTailCall
bool hasTailCall() const
Definition: WebAssemblySubtarget.h:102
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:546
llvm::TargetTransformInfo::PopcntSupportKind
PopcntSupportKind
Flags indicating the kind of support for population count.
Definition: TargetTransformInfo.h:583
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
llvm::TargetTransformInfo::RGK_Scalar
@ RGK_Scalar
Definition: TargetTransformInfo.h:962
llvm::TargetTransformInfo::UnrollingPreferences::Partial
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
Definition: TargetTransformInfo.h:467
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Vector
So we should use XX3Form_Rcr to implement intrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store Vector
Definition: README_P9.txt:497
llvm::FeatureBitset
Container class for subtarget features.
Definition: SubtargetFeature.h:40
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
WebAssemblyTargetTransformInfo.h
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:458
llvm::TargetTransformInfo::OperandValueInfo
Definition: TargetTransformInfo.h:921
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::TargetTransformInfoImplBase::getNumberOfRegisters
unsigned getNumberOfRegisters(unsigned ClassID) const
Definition: TargetTransformInfoImpl.h:408
llvm::BasicTTIImplBase::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: BasicTTIImpl.h:1181
InlinePriorityMode::Cost
@ Cost
llvm::WebAssemblyTTIImpl::supportsTailCalls
bool supportsTailCalls() const
Definition: WebAssemblyTargetTransformInfo.cpp:141
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:194
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
llvm::BasicTTIImplBase::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
Definition: BasicTTIImpl.h:819
llvm::Instruction
Definition: Instruction.h:42
llvm::TargetTransformInfo::RGK_FixedWidthVector
@ RGK_FixedWidthVector
Definition: TargetTransformInfo.h:962
llvm::TargetTransformInfo::OperandValueInfo::isUniform
bool isUniform() const
Definition: TargetTransformInfo.h:928
llvm::LinearPolySize< TypeSize >::getFixed
static TypeSize getFixed(ScalarTy MinVal)
Definition: TypeSize.h:283
llvm::WebAssemblyTTIImpl::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
Definition: WebAssemblyTargetTransformInfo.cpp:53
llvm::WebAssemblyTTIImpl::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: WebAssemblyTargetTransformInfo.cpp:83
llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition: TargetTransformInfo.h:414
I
#define I(x, y, z)
Definition: MD5.cpp:58
getCalledFunction
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
Definition: MemoryBuiltins.cpp:154
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
llvm::TargetTransformInfoImplBase::isLoweredToCall
bool isLoweredToCall(const Function *F) const
Definition: TargetTransformInfoImpl.h:121
llvm::WebAssemblyTTIImpl::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const
Definition: WebAssemblyTargetTransformInfo.cpp:114
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
CostKind
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
llvm::TargetLoweringBase::getTargetMachine
const TargetMachine & getTargetMachine() const
Definition: TargetLowering.h:346
CostTable.h
llvm::TargetTransformInfo::UnrollingPreferences::UpperBound
bool UpperBound
Allow using trip count upper bound to unroll loops.
Definition: TargetTransformInfo.h:481
llvm::TypeSize
Definition: TypeSize.h:435
llvm::LinearPolySize< TypeSize >::getScalable
static TypeSize getScalable(ScalarTy MinVal)
Definition: TypeSize.h:286
llvm::WebAssemblyTTIImpl::getPopcntSupport
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const
Definition: WebAssemblyTargetTransformInfo.cpp:23
llvm::TargetTransformInfo::RGK_ScalableVector
@ RGK_ScalableVector
Definition: TargetTransformInfo.h:962
llvm::WebAssemblyTTIImpl::getRegisterBitWidth
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
Definition: WebAssemblyTargetTransformInfo.cpp:39
llvm::TargetTransformInfo::RegisterKind
RegisterKind
Definition: TargetTransformInfo.h:962
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::TargetTransformInfo::UnrollingPreferences::OptSizeThreshold
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
Definition: TargetTransformInfo.h:436
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::TargetTransformInfo::TCC_Basic
@ TCC_Basic
The cost of a typical 'add' instruction.
Definition: TargetTransformInfo.h:244
llvm::WebAssemblyTTIImpl::getNumberOfRegisters
unsigned getNumberOfRegisters(unsigned ClassID) const
Definition: WebAssemblyTargetTransformInfo.cpp:28
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:394
llvm::WebAssemblyTTIImpl::areInlineCompatible
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
Definition: WebAssemblyTargetTransformInfo.cpp:96
Debug.h