LLVM  15.0.0git
PartiallyInlineLibCalls.cpp
Go to the documentation of this file.
1 //===--- PartiallyInlineLibCalls.cpp - Partially inline libcalls ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass tries to partially inline the fast path of well-known library
10 // functions, such as using square-root instructions for cases where sqrt()
11 // does not need to set errno.
12 //
13 //===----------------------------------------------------------------------===//
14 
19 #include "llvm/IR/Dominators.h"
20 #include "llvm/IR/IRBuilder.h"
21 #include "llvm/InitializePasses.h"
23 #include "llvm/Transforms/Scalar.h"
25 
26 using namespace llvm;
27 
28 #define DEBUG_TYPE "partially-inline-libcalls"
29 
30 DEBUG_COUNTER(PILCounter, "partially-inline-libcalls-transform",
31  "Controls transformations in partially-inline-libcalls");
32 
33 static bool optimizeSQRT(CallInst *Call, Function *CalledFunc,
35  const TargetTransformInfo *TTI, DomTreeUpdater *DTU) {
36  // There is no need to change the IR, since backend will emit sqrt
37  // instruction if the call has already been marked read-only.
38  if (Call->onlyReadsMemory())
39  return false;
40 
41  if (!DebugCounter::shouldExecute(PILCounter))
42  return false;
43 
44  // Do the following transformation:
45  //
46  // (before)
47  // dst = sqrt(src)
48  //
49  // (after)
50  // v0 = sqrt_noreadmem(src) # native sqrt instruction.
51  // [if (v0 is a NaN) || if (src < 0)]
52  // v1 = sqrt(src) # library call.
53  // dst = phi(v0, v1)
54  //
55 
56  Type *Ty = Call->getType();
57  IRBuilder<> Builder(Call->getNextNode());
58 
59  // Split CurrBB right after the call, create a 'then' block (that branches
60  // back to split-off tail of CurrBB) into which we'll insert a libcall.
62  Builder.getTrue(), Call->getNextNode(), /*Unreachable=*/false,
63  /*BranchWeights*/ nullptr, DTU);
64 
65  auto *CurrBBTerm = cast<BranchInst>(CurrBB.getTerminator());
66  // We want an 'else' block though, not a 'then' block.
67  cast<BranchInst>(CurrBBTerm)->swapSuccessors();
68 
69  // Create phi that will merge results of either sqrt and replace all uses.
70  BasicBlock *JoinBB = LibCallTerm->getSuccessor(0);
71  JoinBB->setName(CurrBB.getName() + ".split");
72  Builder.SetInsertPoint(JoinBB, JoinBB->begin());
73  PHINode *Phi = Builder.CreatePHI(Ty, 2);
74  Call->replaceAllUsesWith(Phi);
75 
76  // Finally, insert the libcall into 'else' block.
77  BasicBlock *LibCallBB = LibCallTerm->getParent();
78  LibCallBB->setName("call.sqrt");
79  Builder.SetInsertPoint(LibCallTerm);
80  Instruction *LibCall = Call->clone();
81  Builder.Insert(LibCall);
82 
83  // Add attribute "readnone" so that backend can use a native sqrt instruction
84  // for this call.
85  Call->removeFnAttr(Attribute::WriteOnly);
86  Call->addFnAttr(Attribute::ReadNone);
87 
88  // Insert a FP compare instruction and use it as the CurrBB branch condition.
89  Builder.SetInsertPoint(CurrBBTerm);
91  ? Builder.CreateFCmpORD(Call, Call)
92  : Builder.CreateFCmpOGE(Call->getOperand(0),
93  ConstantFP::get(Ty, 0.0));
94  CurrBBTerm->setCondition(FCmp);
95 
96  // Add phi operands.
97  Phi->addIncoming(Call, &CurrBB);
98  Phi->addIncoming(LibCall, LibCallBB);
99 
100  BB = JoinBB->getIterator();
101  return true;
102 }
103 
105  const TargetTransformInfo *TTI,
106  DominatorTree *DT) {
108  if (DT)
110 
111  bool Changed = false;
112 
113  Function::iterator CurrBB;
114  for (Function::iterator BB = F.begin(), BE = F.end(); BB != BE;) {
115  CurrBB = BB++;
116 
117  for (BasicBlock::iterator II = CurrBB->begin(), IE = CurrBB->end();
118  II != IE; ++II) {
119  CallInst *Call = dyn_cast<CallInst>(&*II);
120  Function *CalledFunc;
121 
122  if (!Call || !(CalledFunc = Call->getCalledFunction()))
123  continue;
124 
125  if (Call->isNoBuiltin() || Call->isStrictFP())
126  continue;
127 
128  if (Call->isMustTailCall())
129  continue;
130 
131  // Skip if function either has local linkage or is not a known library
132  // function.
133  LibFunc LF;
134  if (CalledFunc->hasLocalLinkage() ||
135  !TLI->getLibFunc(*CalledFunc, LF) || !TLI->has(LF))
136  continue;
137 
138  switch (LF) {
139  case LibFunc_sqrtf:
140  case LibFunc_sqrt:
141  if (TTI->haveFastSqrt(Call->getType()) &&
142  optimizeSQRT(Call, CalledFunc, *CurrBB, BB, TTI,
143  DTU ? DTU.getPointer() : nullptr))
144  break;
145  continue;
146  default:
147  continue;
148  }
149 
150  Changed = true;
151  break;
152  }
153  }
154 
155  return Changed;
156 }
157 
160  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
161  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
162  auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
163  if (!runPartiallyInlineLibCalls(F, &TLI, &TTI, DT))
164  return PreservedAnalyses::all();
167  return PA;
168 }
169 
170 namespace {
171 class PartiallyInlineLibCallsLegacyPass : public FunctionPass {
172 public:
173  static char ID;
174 
175  PartiallyInlineLibCallsLegacyPass() : FunctionPass(ID) {
178  }
179 
180  void getAnalysisUsage(AnalysisUsage &AU) const override {
185  }
186 
187  bool runOnFunction(Function &F) override {
188  if (skipFunction(F))
189  return false;
190 
191  TargetLibraryInfo *TLI =
192  &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
193  const TargetTransformInfo *TTI =
194  &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
195  DominatorTree *DT = nullptr;
196  if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
197  DT = &DTWP->getDomTree();
198  return runPartiallyInlineLibCalls(F, TLI, TTI, DT);
199  }
200 };
201 }
202 
204 INITIALIZE_PASS_BEGIN(PartiallyInlineLibCallsLegacyPass,
205  "partially-inline-libcalls",
206  "Partially inline calls to library functions", false,
207  false)
211 INITIALIZE_PASS_END(PartiallyInlineLibCallsLegacyPass,
212  "partially-inline-libcalls",
214 
216  return new PartiallyInlineLibCallsLegacyPass();
217 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2479
libcalls
partially inline libcalls
Definition: PartiallyInlineLibCalls.cpp:212
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:780
Scalar.h
llvm::Function
Definition: Function.h:60
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::IRBuilder<>
DomTreeUpdater.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
to
Should compile to
Definition: README.txt:449
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::Optional
Definition: APInt.h:33
PartiallyInlineLibCalls.h
runPartiallyInlineLibCalls
static bool runPartiallyInlineLibCalls(Function &F, TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, DominatorTree *DT)
Definition: PartiallyInlineLibCalls.cpp:104
library
Itanium Name Demangler i e convert the string _Z1fv into and both[sub] projects need to demangle but neither can depend on each other *libcxxabi needs the demangler to implement which is part of the itanium ABI spec *LLVM needs a copy for a bunch of and cannot rely on the system s __cxa_demangle because it a might not be and b may not be up to date on the latest language features The copy of the demangler in LLVM has some extra stuff that aren t needed in which depend on the shared generic components Despite these we want to keep the core generic demangling library identical between both copies to simplify development and testing If you re working on the generic library
Definition: README.txt:36
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::DomTreeUpdater::UpdateStrategy::Lazy
@ Lazy
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::Optional::getPointer
constexpr const T * getPointer() const
Definition: Optional.h:303
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::LibFunc
LibFunc
Definition: TargetLibraryInfo.h:35
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:297
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
TargetLibraryInfo.h
false
Definition: StackSlotColoring.cpp:141
functions
partially inline Partially calls to library functions
Definition: PartiallyInlineLibCalls.cpp:213
llvm::TargetLibraryInfo::getLibFunc
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Definition: TargetLibraryInfo.h:294
llvm::Instruction
Definition: Instruction.h:42
llvm::DebugCounter::shouldExecute
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:74
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:302
llvm::Value::setName
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:372
llvm::DomTreeUpdater
Definition: DomTreeUpdater.h:28
llvm::Instruction::getSuccessor
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
Definition: Instruction.cpp:789
llvm::TargetTransformInfo::isFCmpOrdCheaperThanFCmpZero
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const
Return true if it is faster to check if a floating-point value is NaN (or not-NaN) versus a compariso...
Definition: TargetTransformInfo.cpp:565
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(PartiallyInlineLibCallsLegacyPass, "partially-inline-libcalls", "Partially inline calls to library functions", false, false) INITIALIZE_PASS_END(PartiallyInlineLibCallsLegacyPass
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
DEBUG_COUNTER
DEBUG_COUNTER(PILCounter, "partially-inline-libcalls-transform", "Controls transformations in partially-inline-libcalls")
llvm::TargetTransformInfo::haveFastSqrt
bool haveFastSqrt(Type *Ty) const
Return true if the hardware has a fast square-root instruction.
Definition: TargetTransformInfo.cpp:561
llvm::ARM_PROC::IE
@ IE
Definition: ARMBaseInfo.h:27
optimizeSQRT
static bool optimizeSQRT(CallInst *Call, Function *CalledFunc, BasicBlock &CurrBB, Function::iterator &BB, const TargetTransformInfo *TTI, DomTreeUpdater *DTU)
Definition: PartiallyInlineLibCalls.cpp:33
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:468
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2535
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:173
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2801
llvm::Optional::emplace
void emplace(ArgTypes &&... Args)
Create a new object by constructing it in place with the given arguments.
Definition: Optional.h:287
llvm::createPartiallyInlineLibCallsPass
FunctionPass * createPartiallyInlineLibCallsPass()
Definition: PartiallyInlineLibCalls.cpp:215
inline
into xmm2 addss xmm2 xmm1 xmm3 addss xmm3 movaps xmm0 unpcklps xmm0 ret seems silly when it could just be one addps Expand libm rounding functions inline
Definition: README-SSE.txt:72
IRBuilder.h
llvm::GlobalValue::hasLocalLinkage
bool hasLocalLinkage() const
Definition: GlobalValue.h:493
llvm::TargetLibraryInfo::has
bool has(LibFunc F) const
Tests whether a library function is available.
Definition: TargetLibraryInfo.h:328
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:305
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::ConstantFP::get
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:975
llvm::AnalysisManager::getCachedResult
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:799
DebugCounter.h
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:222
llvm::initializePartiallyInlineLibCallsLegacyPassPass
void initializePartiallyInlineLibCallsLegacyPassPass(PassRegistry &)
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:267
Dominators.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
TargetTransformInfo.h
llvm::PHINode
Definition: Instructions.h:2651
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:119
calls
amdgpu Simplify well known AMD library calls
Definition: AMDGPULibCalls.cpp:179
llvm::Pass::getAnalysisUsage
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:97
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1461
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::SplitBlockAndInsertIfThen
Instruction * SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore, bool Unreachable, MDNode *BranchWeights, DominatorTree *DT, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
Definition: BasicBlockUtils.cpp:1446
llvm::PartiallyInlineLibCallsPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: PartiallyInlineLibCalls.cpp:159
BasicBlockUtils.h
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:443
llvm::Function::iterator
BasicBlockListType::iterator iterator
Definition: Function.h:66
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38