LLVM  15.0.0git
ReplaceWithVeclib.cpp
Go to the documentation of this file.
1 //=== ReplaceWithVeclib.cpp - Replace vector intrinsics with veclib calls -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Replaces calls to LLVM vector intrinsics (i.e., calls to LLVM intrinsics
10 // with vector operands) with matching calls to functions from a vector
11 // library (e.g., libmvec, SVML) according to TargetLibraryInfo.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/Statistic.h"
23 #include "llvm/CodeGen/Passes.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/InstIterator.h"
27 
28 using namespace llvm;
29 
30 #define DEBUG_TYPE "replace-with-veclib"
31 
32 STATISTIC(NumCallsReplaced,
33  "Number of calls to intrinsics that have been replaced.");
34 
35 STATISTIC(NumTLIFuncDeclAdded,
36  "Number of vector library function declarations added.");
37 
38 STATISTIC(NumFuncUsedAdded,
39  "Number of functions added to `llvm.compiler.used`");
40 
41 static bool replaceWithTLIFunction(CallInst &CI, const StringRef TLIName) {
42  Module *M = CI.getModule();
43 
44  Function *OldFunc = CI.getCalledFunction();
45 
46  // Check if the vector library function is already declared in this module,
47  // otherwise insert it.
48  Function *TLIFunc = M->getFunction(TLIName);
49  if (!TLIFunc) {
50  TLIFunc = Function::Create(OldFunc->getFunctionType(),
51  Function::ExternalLinkage, TLIName, *M);
52  TLIFunc->copyAttributesFrom(OldFunc);
53 
54  LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Added vector library function `"
55  << TLIName << "` of type `" << *(TLIFunc->getType())
56  << "` to module.\n");
57 
58  ++NumTLIFuncDeclAdded;
59 
60  // Add the freshly created function to llvm.compiler.used,
61  // similar to as it is done in InjectTLIMappings
62  appendToCompilerUsed(*M, {TLIFunc});
63 
64  LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Adding `" << TLIName
65  << "` to `@llvm.compiler.used`.\n");
66  ++NumFuncUsedAdded;
67  }
68 
69  // Replace the call to the vector intrinsic with a call
70  // to the corresponding function from the vector library.
73  // Preserve the operand bundles.
75  CI.getOperandBundlesAsDefs(OpBundles);
76  CallInst *Replacement = IRBuilder.CreateCall(TLIFunc, Args, OpBundles);
77  assert(OldFunc->getFunctionType() == TLIFunc->getFunctionType() &&
78  "Expecting function types to be identical");
79  CI.replaceAllUsesWith(Replacement);
80  if (isa<FPMathOperator>(Replacement)) {
81  // Preserve fast math flags for FP math.
82  Replacement->copyFastMathFlags(&CI);
83  }
84 
85  LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Replaced call to `"
86  << OldFunc->getName() << "` with call to `" << TLIName
87  << "`.\n");
88  ++NumCallsReplaced;
89  return true;
90 }
91 
93  CallInst &CI) {
94  if (!CI.getCalledFunction()) {
95  return false;
96  }
97 
98  auto IntrinsicID = CI.getCalledFunction()->getIntrinsicID();
99  if (IntrinsicID == Intrinsic::not_intrinsic) {
100  // Replacement is only performed for intrinsic functions
101  return false;
102  }
103 
104  // Convert vector arguments to scalar type and check that
105  // all vector operands have identical vector width.
107  SmallVector<Type *> ScalarTypes;
108  for (auto Arg : enumerate(CI.args())) {
109  auto *ArgType = Arg.value()->getType();
110  // Vector calls to intrinsics can still have
111  // scalar operands for specific arguments.
112  if (isVectorIntrinsicWithScalarOpAtArg(IntrinsicID, Arg.index())) {
113  ScalarTypes.push_back(ArgType);
114  } else {
115  // The argument in this place should be a vector if
116  // this is a call to a vector intrinsic.
117  auto *VectorArgTy = dyn_cast<VectorType>(ArgType);
118  if (!VectorArgTy) {
119  // The argument is not a vector, do not perform
120  // the replacement.
121  return false;
122  }
123  ElementCount NumElements = VectorArgTy->getElementCount();
124  if (NumElements.isScalable()) {
125  // The current implementation does not support
126  // scalable vectors.
127  return false;
128  }
129  if (VF.isNonZero() && VF != NumElements) {
130  // The different arguments differ in vector size.
131  return false;
132  } else {
133  VF = NumElements;
134  }
135  ScalarTypes.push_back(VectorArgTy->getElementType());
136  }
137  }
138 
139  // Try to reconstruct the name for the scalar version of this
140  // intrinsic using the intrinsic ID and the argument types
141  // converted to scalar above.
142  std::string ScalarName;
143  if (Intrinsic::isOverloaded(IntrinsicID)) {
144  ScalarName = Intrinsic::getName(IntrinsicID, ScalarTypes, CI.getModule());
145  } else {
146  ScalarName = Intrinsic::getName(IntrinsicID).str();
147  }
148 
149  if (!TLI.isFunctionVectorizable(ScalarName)) {
150  // The TargetLibraryInfo does not contain a vectorized version of
151  // the scalar function.
152  return false;
153  }
154 
155  // Try to find the mapping for the scalar version of this intrinsic
156  // and the exact vector width of the call operands in the
157  // TargetLibraryInfo.
158  const std::string TLIName =
159  std::string(TLI.getVectorizedFunction(ScalarName, VF));
160 
161  LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Looking up TLI mapping for `"
162  << ScalarName << "` and vector width " << VF << ".\n");
163 
164  if (!TLIName.empty()) {
165  // Found the correct mapping in the TargetLibraryInfo,
166  // replace the call to the intrinsic with a call to
167  // the vector library function.
168  LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Found TLI function `" << TLIName
169  << "`.\n");
170  return replaceWithTLIFunction(CI, TLIName);
171  }
172 
173  return false;
174 }
175 
176 static bool runImpl(const TargetLibraryInfo &TLI, Function &F) {
177  bool Changed = false;
178  SmallVector<CallInst *> ReplacedCalls;
179  for (auto &I : instructions(F)) {
180  if (auto *CI = dyn_cast<CallInst>(&I)) {
181  if (replaceWithCallToVeclib(TLI, *CI)) {
182  ReplacedCalls.push_back(CI);
183  Changed = true;
184  }
185  }
186  }
187  // Erase the calls to the intrinsics that have been replaced
188  // with calls to the vector library.
189  for (auto *CI : ReplacedCalls) {
190  CI->eraseFromParent();
191  }
192  return Changed;
193 }
194 
195 ////////////////////////////////////////////////////////////////////////////////
196 // New pass manager implementation.
197 ////////////////////////////////////////////////////////////////////////////////
201  auto Changed = runImpl(TLI, F);
202  if (Changed) {
204  PA.preserveSet<CFGAnalyses>();
210  return PA;
211  } else {
212  // The pass did not replace any calls, hence it preserves all analyses.
213  return PreservedAnalyses::all();
214  }
215 }
216 
217 ////////////////////////////////////////////////////////////////////////////////
218 // Legacy PM Implementation.
219 ////////////////////////////////////////////////////////////////////////////////
221  const TargetLibraryInfo &TLI =
222  getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
223  return runImpl(TLI, F);
224 }
225 
227  AU.setPreservesCFG();
236 }
237 
238 ////////////////////////////////////////////////////////////////////////////////
239 // Legacy Pass manager initialization
240 ////////////////////////////////////////////////////////////////////////////////
242 
244  "Replace intrinsics with calls to vector library", false,
245  false)
248  "Replace intrinsics with calls to vector library", false,
249  false)
250 
252  return new ReplaceWithVeclibLegacy();
253 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm::Intrinsic::isOverloaded
bool isOverloaded(ID id)
Returns true if the intrinsic can be overloaded.
Definition: Function.cpp:1387
llvm::ScalarEvolutionAnalysis
Analysis pass that exposes the ScalarEvolution for a function.
Definition: ScalarEvolution.h:2115
llvm::LoopAccessLegacyAnalysis
This analysis provides dependence information for the memory accesses of a loop.
Definition: LoopAccessAnalysis.h:770
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::CallBase::getOperandBundlesAsDefs
void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Definition: Instructions.cpp:396
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:65
llvm::ElementCount
Definition: TypeSize.h:390
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:780
InstIterator.h
llvm::Function
Definition: Function.h:60
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
Statistic.h
replaceWithCallToVeclib
static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI, CallInst &CI)
Definition: ReplaceWithVeclib.cpp:92
llvm::enumerate
detail::enumerator< R > enumerate(R &&TheRange)
Given an input range, returns a new range whose values are are pair (A,B) such that A is the 0-based ...
Definition: STLExtras.h:2047
llvm::Intrinsic::getName
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:879
llvm::IRBuilder<>
llvm::TargetLibraryInfo::isFunctionVectorizable
bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const
Definition: TargetLibraryInfo.h:331
OptimizationRemarkEmitter.h
llvm::isVectorIntrinsicWithScalarOpAtArg
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:101
GlobalsModRef.h
llvm::ReplaceWithVeclibLegacy::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: ReplaceWithVeclib.cpp:226
llvm::LoopAccessAnalysis
This analysis provides dependence information for the memory accesses of a loop.
Definition: LoopAccessAnalysis.h:812
to
Should compile to
Definition: README.txt:449
llvm::DemandedBitsAnalysis
An analysis that produces DemandedBits for a function.
Definition: DemandedBits.h:123
llvm::ReplaceWithVeclibLegacy::ID
static char ID
Definition: ReplaceWithVeclib.h:30
llvm::Intrinsic::not_intrinsic
@ not_intrinsic
Definition: Intrinsics.h:45
STLExtras.h
DEBUG_TYPE
#define DEBUG_TYPE
Definition: ReplaceWithVeclib.cpp:30
llvm::LinearPolySize::isScalable
bool isScalable() const
Returns whether the size is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:298
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:186
replaceWithTLIFunction
static bool replaceWithTLIFunction(CallInst &CI, const StringRef TLIName)
Definition: ReplaceWithVeclib.cpp:41
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1396
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
runImpl
static bool runImpl(const TargetLibraryInfo &TLI, Function &F)
Definition: ReplaceWithVeclib.cpp:176
llvm::createReplaceWithVeclibLegacyPass
FunctionPass * createReplaceWithVeclibLegacyPass()
Definition: ReplaceWithVeclib.cpp:251
TargetLibraryInfo.h
false
Definition: StackSlotColoring.cpp:141
llvm::appendToCompilerUsed
void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
Definition: ModuleUtils.cpp:109
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::Function::copyAttributesFrom
void copyAttributesFrom(const Function *Src)
copyAttributesFrom - copy all additional attributes (those not needed to create a Function) from the ...
Definition: Function.cpp:711
llvm::ScalarEvolutionWrapperPass
Definition: ScalarEvolution.h:2145
llvm::StringRef::str
LLVM_NODISCARD std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:245
llvm::LinearPolySize< ElementCount >::getFixed
static ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:283
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
Passes.h
VectorUtils.h
llvm::instructions
inst_range instructions(Function *F)
Definition: InstIterator.h:133
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(ReplaceWithVeclibLegacy, DEBUG_TYPE, "Replace intrinsics with calls to vector library", false, false) INITIALIZE_PASS_END(ReplaceWithVeclibLegacy
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:468
intrinsics
expand Expand reduction intrinsics
Definition: ExpandReductions.cpp:198
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:173
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::Function::Create
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:137
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::Function::getIntrinsicID
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:205
library
Replace intrinsics with calls to vector library
Definition: ReplaceWithVeclib.cpp:248
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:263
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:113
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:529
llvm::ReplaceWithVeclibLegacy
Definition: ReplaceWithVeclib.h:29
llvm::Instruction::copyFastMathFlags
void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
Definition: Instruction.cpp:244
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:305
DemandedBits.h
llvm::OptimizationRemarkEmitterWrapperPass
OptimizationRemarkEmitter legacy analysis pass.
Definition: OptimizationRemarkEmitter.h:146
llvm::ReplaceWithVeclibLegacy::runOnFunction
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
Definition: ReplaceWithVeclib.cpp:220
llvm::DemandedBitsWrapperPass
Definition: DemandedBits.h:102
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:222
llvm::Function::getFunctionType
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:175
llvm::GlobalValue::ExternalLinkage
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:48
llvm::PreservedAnalyses::preserveSet
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:188
ReplaceWithVeclib.h
ModuleUtils.h
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1347
llvm::GlobalsAAWrapperPass
Legacy wrapper pass to provide the GlobalsAAResult object.
Definition: GlobalsModRef.h:148
calls
amdgpu Simplify well known AMD library calls
Definition: AMDGPULibCalls.cpp:179
llvm::GlobalValue::getType
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:270
llvm::ReplaceWithVeclib::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: ReplaceWithVeclib.cpp:198
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1474
llvm::UnivariateLinearPolyBase::isNonZero
bool isNonZero() const
Definition: TypeSize.h:230
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:394
llvm::OptimizationRemarkEmitterAnalysis
Definition: OptimizationRemarkEmitter.h:164
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:443
llvm::IRBuilderBase::CreateCall
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2229
llvm::CallBase::args
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
Definition: InstrTypes.h:1332
llvm::TargetLibraryInfo::getVectorizedFunction
StringRef getVectorizedFunction(StringRef F, const ElementCount &VF) const
Definition: TargetLibraryInfo.h:337