LLVM  16.0.0git
FunctionSpecialization.cpp
Go to the documentation of this file.
1 //===- FunctionSpecialization.cpp - Function Specialization ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This specialises functions with constant parameters. Constant parameters
10 // like function pointers and constant globals are propagated to the callee by
11 // specializing the function. The main benefit of this pass at the moment is
12 // that indirect calls are transformed into direct calls, which provides inline
13 // opportunities that the inliner would not have been able to achieve. That's
14 // why function specialisation is run before the inliner in the optimisation
15 // pipeline; that is by design. Otherwise, we would only benefit from constant
16 // passing, which is a valid use-case too, but hasn't been explored much in
17 // terms of performance uplifts, cost-model and compile-time impact.
18 //
19 // Current limitations:
20 // - It does not yet handle integer ranges. We do support "literal constants",
21 // but that's off by default under an option.
22 // - The cost-model could be further looked into (it mainly focuses on inlining
23 // benefits),
24 //
25 // Ideas:
26 // - With a function specialization attribute for arguments, we could have
27 // a direct way to steer function specialization, avoiding the cost-model,
28 // and thus control compile-times / code-size.
29 //
30 // Todos:
31 // - Specializing recursive functions relies on running the transformation a
32 // number of times, which is controlled by option
33 // `func-specialization-max-iters`. Thus, increasing this value and the
34 // number of iterations, will linearly increase the number of times recursive
35 // functions get specialized, see also the discussion in
36 // https://reviews.llvm.org/D106426 for details. Perhaps there is a
37 // compile-time friendlier way to control/limit the number of specialisations
38 // for recursive functions.
39 // - Don't transform the function if function specialization does not trigger;
40 // the SCCPSolver may make IR changes.
41 //
42 // References:
43 // - 2021 LLVM Dev Mtg “Introducing function specialisation, and can we enable
44 // it by default?”, https://www.youtube.com/watch?v=zJiCjeXgV5Q
45 //
46 //===----------------------------------------------------------------------===//
47 
48 #include "llvm/ADT/Statistic.h"
51 #include "llvm/Analysis/LoopInfo.h"
55 #include "llvm/IR/IntrinsicInst.h"
60 #include <cmath>
61 
62 using namespace llvm;
63 
64 #define DEBUG_TYPE "function-specialization"
65 
66 STATISTIC(NumFuncSpecialized, "Number of functions specialized");
67 
69  "force-function-specialization", cl::init(false), cl::Hidden,
70  cl::desc("Force function specialization for every call site with a "
71  "constant argument"));
72 
74  "func-specialization-max-iters", cl::Hidden,
75  cl::desc("The maximum number of iterations function specialization is run"),
76  cl::init(1));
77 
79  "func-specialization-max-clones", cl::Hidden,
80  cl::desc("The maximum number of clones allowed for a single function "
81  "specialization"),
82  cl::init(3));
83 
85  "func-specialization-size-threshold", cl::Hidden,
86  cl::desc("Don't specialize functions that have less than this theshold "
87  "number of instructions"),
88  cl::init(100));
89 
90 static cl::opt<unsigned>
91  AvgLoopIterationCount("func-specialization-avg-iters-cost", cl::Hidden,
92  cl::desc("Average loop iteration count cost"),
93  cl::init(10));
94 
96  "func-specialization-on-address", cl::init(false), cl::Hidden,
97  cl::desc("Enable function specialization on the address of global values"));
98 
99 // Disabled by default as it can significantly increase compilation times.
100 // Running nikic's compile time tracker on x86 with instruction count as the
101 // metric shows 3-4% regression for SPASS while being neutral for all other
102 // benchmarks of the llvm test suite.
103 //
104 // https://llvm-compile-time-tracker.com
105 // https://github.com/nikic/llvm-compile-time-tracker
107  "function-specialization-for-literal-constant", cl::init(false), cl::Hidden,
108  cl::desc("Enable specialization of functions that take a literal constant "
109  "as an argument."));
110 
111 namespace {
112 // Bookkeeping struct to pass data from the analysis and profitability phase
113 // to the actual transform helper functions.
114 struct SpecializationInfo {
115  SmallVector<ArgInfo, 8> Args; // Stores the {formal,actual} argument pairs.
116  InstructionCost Gain; // Profitability: Gain = Bonus - Cost.
117 };
118 } // Anonymous namespace
119 
121 using CallArgBinding = std::pair<CallBase *, Constant *>;
122 using CallSpecBinding = std::pair<CallBase *, SpecializationInfo>;
123 // We are using MapVector because it guarantees deterministic iteration
124 // order across executions.
126 
127 // Helper to check if \p LV is either a constant or a constant
128 // range with a single element. This should cover exactly the same cases as the
129 // old ValueLatticeElement::isConstant() and is intended to be used in the
130 // transition to ValueLatticeElement.
131 static bool isConstant(const ValueLatticeElement &LV) {
132  return LV.isConstant() ||
134 }
135 
136 // Helper to check if \p LV is either overdefined or a constant int.
137 static bool isOverdefined(const ValueLatticeElement &LV) {
138  return !LV.isUnknownOrUndef() && !isConstant(LV);
139 }
140 
142  Value *StoreValue = nullptr;
143  for (auto *User : Alloca->users()) {
144  // We can't use llvm::isAllocaPromotable() as that would fail because of
145  // the usage in the CallInst, which is what we check here.
146  if (User == Call)
147  continue;
148  if (auto *Bitcast = dyn_cast<BitCastInst>(User)) {
149  if (!Bitcast->hasOneUse() || *Bitcast->user_begin() != Call)
150  return nullptr;
151  continue;
152  }
153 
154  if (auto *Store = dyn_cast<StoreInst>(User)) {
155  // This is a duplicate store, bail out.
156  if (StoreValue || Store->isVolatile())
157  return nullptr;
158  StoreValue = Store->getValueOperand();
159  continue;
160  }
161  // Bail if there is any other unknown usage.
162  return nullptr;
163  }
164  return dyn_cast_or_null<Constant>(StoreValue);
165 }
166 
167 // A constant stack value is an AllocaInst that has a single constant
168 // value stored to it. Return this constant if such an alloca stack value
169 // is a function argument.
171  SCCPSolver &Solver) {
172  if (!Val)
173  return nullptr;
174  Val = Val->stripPointerCasts();
175  if (auto *ConstVal = dyn_cast<ConstantInt>(Val))
176  return ConstVal;
177  auto *Alloca = dyn_cast<AllocaInst>(Val);
178  if (!Alloca || !Alloca->getAllocatedType()->isIntegerTy())
179  return nullptr;
180  return getPromotableAlloca(Alloca, Call);
181 }
182 
183 // To support specializing recursive functions, it is important to propagate
184 // constant arguments because after a first iteration of specialisation, a
185 // reduced example may look like this:
186 //
187 // define internal void @RecursiveFn(i32* arg1) {
188 // %temp = alloca i32, align 4
189 // store i32 2 i32* %temp, align 4
190 // call void @RecursiveFn.1(i32* nonnull %temp)
191 // ret void
192 // }
193 //
194 // Before a next iteration, we need to propagate the constant like so
195 // which allows further specialization in next iterations.
196 //
197 // @funcspec.arg = internal constant i32 2
198 //
199 // define internal void @someFunc(i32* arg1) {
200 // call void @otherFunc(i32* nonnull @funcspec.arg)
201 // ret void
202 // }
203 //
204 static void constantArgPropagation(FuncList &WorkList, Module &M,
205  SCCPSolver &Solver) {
206  // Iterate over the argument tracked functions see if there
207  // are any new constant values for the call instruction via
208  // stack variables.
209  for (auto *F : WorkList) {
210 
211  for (auto *User : F->users()) {
212 
213  auto *Call = dyn_cast<CallInst>(User);
214  if (!Call)
215  continue;
216 
217  bool Changed = false;
218  for (const Use &U : Call->args()) {
219  unsigned Idx = Call->getArgOperandNo(&U);
220  Value *ArgOp = Call->getArgOperand(Idx);
221  Type *ArgOpType = ArgOp->getType();
222 
223  if (!Call->onlyReadsMemory(Idx) || !ArgOpType->isPointerTy())
224  continue;
225 
226  auto *ConstVal = getConstantStackValue(Call, ArgOp, Solver);
227  if (!ConstVal)
228  continue;
229 
230  Value *GV = new GlobalVariable(M, ConstVal->getType(), true,
232  "funcspec.arg");
233  if (ArgOpType != ConstVal->getType())
234  GV = ConstantExpr::getBitCast(cast<Constant>(GV), ArgOpType);
235 
236  Call->setArgOperand(Idx, GV);
237  Changed = true;
238  }
239 
240  // Add the changed CallInst to Solver Worklist
241  if (Changed)
242  Solver.visitCall(*Call);
243  }
244  }
245 }
246 
247 // ssa_copy intrinsics are introduced by the SCCP solver. These intrinsics
248 // interfere with the constantArgPropagation optimization.
249 static void removeSSACopy(Function &F) {
250  for (BasicBlock &BB : F) {
251  for (Instruction &Inst : llvm::make_early_inc_range(BB)) {
252  auto *II = dyn_cast<IntrinsicInst>(&Inst);
253  if (!II)
254  continue;
255  if (II->getIntrinsicID() != Intrinsic::ssa_copy)
256  continue;
257  Inst.replaceAllUsesWith(II->getOperand(0));
258  Inst.eraseFromParent();
259  }
260  }
261 }
262 
263 static void removeSSACopy(Module &M) {
264  for (Function &F : M)
265  removeSSACopy(F);
266 }
267 
268 namespace {
269 class FunctionSpecializer {
270 
271  /// The IPSCCP Solver.
272  SCCPSolver &Solver;
273 
274  /// Analyses used to help determine if a function should be specialized.
278 
279  SmallPtrSet<Function *, 4> SpecializedFuncs;
280  SmallPtrSet<Function *, 4> FullySpecialized;
281  SmallVector<Instruction *> ReplacedWithConstant;
282  DenseMap<Function *, CodeMetrics> FunctionMetrics;
283 
284 public:
285  FunctionSpecializer(SCCPSolver &Solver,
289  : Solver(Solver), GetAC(GetAC), GetTTI(GetTTI), GetTLI(GetTLI) {}
290 
291  ~FunctionSpecializer() {
292  // Eliminate dead code.
293  removeDeadInstructions();
294  removeDeadFunctions();
295  }
296 
297  /// Attempt to specialize functions in the module to enable constant
298  /// propagation across function boundaries.
299  ///
300  /// \returns true if at least one function is specialized.
301  bool specializeFunctions(FuncList &Candidates, FuncList &WorkList) {
302  bool Changed = false;
303  for (auto *F : Candidates) {
304  if (!isCandidateFunction(F))
305  continue;
306 
307  auto Cost = getSpecializationCost(F);
308  if (!Cost.isValid()) {
309  LLVM_DEBUG(
310  dbgs() << "FnSpecialization: Invalid specialization cost.\n");
311  continue;
312  }
313 
314  LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for "
315  << F->getName() << " is " << Cost << "\n");
316 
317  SmallVector<CallSpecBinding, 8> Specializations;
318  if (!findSpecializations(F, Cost, Specializations)) {
319  LLVM_DEBUG(
320  dbgs() << "FnSpecialization: No possible specializations found\n");
321  continue;
322  }
323 
324  Changed = true;
325  for (auto &Entry : Specializations)
326  specializeFunction(F, Entry.second, WorkList);
327  }
328 
329  updateSpecializedFuncs(Candidates, WorkList);
330  NumFuncSpecialized += NbFunctionsSpecialized;
331  return Changed;
332  }
333 
334  void removeDeadInstructions() {
335  for (auto *I : ReplacedWithConstant) {
336  LLVM_DEBUG(dbgs() << "FnSpecialization: Removing dead instruction " << *I
337  << "\n");
338  I->eraseFromParent();
339  }
340  ReplacedWithConstant.clear();
341  }
342 
343  void removeDeadFunctions() {
344  for (auto *F : FullySpecialized) {
345  LLVM_DEBUG(dbgs() << "FnSpecialization: Removing dead function "
346  << F->getName() << "\n");
347  F->eraseFromParent();
348  }
349  FullySpecialized.clear();
350  }
351 
353  if (!V->getType()->isSingleValueType() || isa<CallBase>(V) ||
354  V->user_empty())
355  return false;
356 
357  const ValueLatticeElement &IV = Solver.getLatticeValueFor(V);
358  if (isOverdefined(IV))
359  return false;
360  auto *Const =
361  isConstant(IV) ? Solver.getConstant(IV) : UndefValue::get(V->getType());
362 
363  LLVM_DEBUG(dbgs() << "FnSpecialization: Replacing " << *V
364  << "\nFnSpecialization: with " << *Const << "\n");
365 
366  // Record uses of V to avoid visiting irrelevant uses of const later.
368  for (auto *U : V->users())
369  if (auto *I = dyn_cast<Instruction>(U))
370  if (Solver.isBlockExecutable(I->getParent()))
371  UseInsts.push_back(I);
372 
373  V->replaceAllUsesWith(Const);
374 
375  for (auto *I : UseInsts)
376  Solver.visit(I);
377 
378  // Remove the instruction from Block and Solver.
379  if (auto *I = dyn_cast<Instruction>(V)) {
380  if (I->isSafeToRemove()) {
381  ReplacedWithConstant.push_back(I);
382  Solver.removeLatticeValueFor(I);
383  }
384  }
385  return true;
386  }
387 
388 private:
389  // The number of functions specialised, used for collecting statistics and
390  // also in the cost model.
391  unsigned NbFunctionsSpecialized = 0;
392 
393  // Compute the code metrics for function \p F.
394  CodeMetrics &analyzeFunction(Function *F) {
395  auto I = FunctionMetrics.insert({F, CodeMetrics()});
396  CodeMetrics &Metrics = I.first->second;
397  if (I.second) {
398  // The code metrics were not cached.
400  CodeMetrics::collectEphemeralValues(F, &(GetAC)(*F), EphValues);
401  for (BasicBlock &BB : *F)
402  Metrics.analyzeBasicBlock(&BB, (GetTTI)(*F), EphValues);
403 
404  LLVM_DEBUG(dbgs() << "FnSpecialization: Code size of function "
405  << F->getName() << " is " << Metrics.NumInsts
406  << " instructions\n");
407  }
408  return Metrics;
409  }
410 
411  /// Clone the function \p F and remove the ssa_copy intrinsics added by
412  /// the SCCPSolver in the cloned version.
413  Function *cloneCandidateFunction(Function *F, ValueToValueMapTy &Mappings) {
414  Function *Clone = CloneFunction(F, Mappings);
415  removeSSACopy(*Clone);
416  return Clone;
417  }
418 
419  /// This function decides whether it's worthwhile to specialize function
420  /// \p F based on the known constant values its arguments can take on. It
421  /// only discovers potential specialization opportunities without actually
422  /// applying them.
423  ///
424  /// \returns true if any specializations have been found.
425  bool findSpecializations(Function *F, InstructionCost Cost,
427  // Get a list of interesting arguments.
429  for (Argument &Arg : F->args())
430  if (isArgumentInteresting(&Arg))
431  Args.push_back(&Arg);
432 
433  if (!Args.size())
434  return false;
435 
436  // Find all the call sites for the function.
437  SpecializationMap Specializations;
438  for (User *U : F->users()) {
439  if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
440  continue;
441  auto &CS = *cast<CallBase>(U);
442  // If the call site has attribute minsize set, that callsite won't be
443  // specialized.
444  if (CS.hasFnAttr(Attribute::MinSize))
445  continue;
446 
447  // If the parent of the call site will never be executed, we don't need
448  // to worry about the passed value.
449  if (!Solver.isBlockExecutable(CS.getParent()))
450  continue;
451 
452  // Examine arguments and create specialization candidates from call sites
453  // with constant arguments.
454  bool Added = false;
455  for (Argument *A : Args) {
456  Constant *C = getCandidateConstant(CS.getArgOperand(A->getArgNo()));
457  if (!C)
458  continue;
459 
460  if (!Added) {
461  Specializations[&CS] = {{}, 0 - Cost};
462  Added = true;
463  }
464 
465  SpecializationInfo &S = Specializations.back().second;
466  S.Gain += getSpecializationBonus(A, C, Solver.getLoopInfo(*F));
467  S.Args.push_back({A, C});
468  }
469  Added = false;
470  }
471 
472  // Remove unprofitable specializations.
474  Specializations.remove_if(
475  [](const auto &Entry) { return Entry.second.Gain <= 0; });
476 
477  // Clear the MapVector and return the underlying vector.
478  WorkList = Specializations.takeVector();
479 
480  // Sort the candidates in descending order.
481  llvm::stable_sort(WorkList, [](const auto &L, const auto &R) {
482  return L.second.Gain > R.second.Gain;
483  });
484 
485  // Truncate the worklist to 'MaxClonesThreshold' candidates if necessary.
486  if (WorkList.size() > MaxClonesThreshold) {
487  LLVM_DEBUG(dbgs() << "FnSpecialization: Number of candidates exceed "
488  << "the maximum number of clones threshold.\n"
489  << "FnSpecialization: Truncating worklist to "
490  << MaxClonesThreshold << " candidates.\n");
491  WorkList.erase(WorkList.begin() + MaxClonesThreshold, WorkList.end());
492  }
493 
494  LLVM_DEBUG(dbgs() << "FnSpecialization: Specializations for function "
495  << F->getName() << "\n";
496  for (const auto &Entry
497  : WorkList) {
498  dbgs() << "FnSpecialization: Gain = " << Entry.second.Gain
499  << "\n";
500  for (const ArgInfo &Arg : Entry.second.Args)
501  dbgs() << "FnSpecialization: FormalArg = "
502  << Arg.Formal->getNameOrAsOperand()
503  << ", ActualArg = "
504  << Arg.Actual->getNameOrAsOperand() << "\n";
505  });
506 
507  return !WorkList.empty();
508  }
509 
510  bool isCandidateFunction(Function *F) {
511  // Do not specialize the cloned function again.
512  if (SpecializedFuncs.contains(F))
513  return false;
514 
515  // If we're optimizing the function for size, we shouldn't specialize it.
516  if (F->hasOptSize() ||
517  shouldOptimizeForSize(F, nullptr, nullptr, PGSOQueryType::IRPass))
518  return false;
519 
520  // Exit if the function is not executable. There's no point in specializing
521  // a dead function.
522  if (!Solver.isBlockExecutable(&F->getEntryBlock()))
523  return false;
524 
525  // It wastes time to specialize a function which would get inlined finally.
526  if (F->hasFnAttribute(Attribute::AlwaysInline))
527  return false;
528 
529  LLVM_DEBUG(dbgs() << "FnSpecialization: Try function: " << F->getName()
530  << "\n");
531  return true;
532  }
533 
534  void specializeFunction(Function *F, SpecializationInfo &S,
535  FuncList &WorkList) {
537  Function *Clone = cloneCandidateFunction(F, Mappings);
538 
539  // Rewrite calls to the function so that they call the clone instead.
540  rewriteCallSites(Clone, S.Args, Mappings);
541 
542  // Initialize the lattice state of the arguments of the function clone,
543  // marking the argument on which we specialized the function constant
544  // with the given value.
545  Solver.markArgInFuncSpecialization(Clone, S.Args);
546 
547  // Mark all the specialized functions
548  WorkList.push_back(Clone);
549  NbFunctionsSpecialized++;
550 
551  // If the function has been completely specialized, the original function
552  // is no longer needed. Mark it unreachable.
553  if (F->getNumUses() == 0 || all_of(F->users(), [F](User *U) {
554  if (auto *CS = dyn_cast<CallBase>(U))
555  return CS->getFunction() == F;
556  return false;
557  })) {
558  Solver.markFunctionUnreachable(F);
559  FullySpecialized.insert(F);
560  }
561  }
562 
563  /// Compute and return the cost of specializing function \p F.
564  InstructionCost getSpecializationCost(Function *F) {
565  CodeMetrics &Metrics = analyzeFunction(F);
566  // If the code metrics reveal that we shouldn't duplicate the function, we
567  // shouldn't specialize it. Set the specialization cost to Invalid.
568  // Or if the lines of codes implies that this function is easy to get
569  // inlined so that we shouldn't specialize it.
570  if (Metrics.notDuplicatable || !Metrics.NumInsts.isValid() ||
572  !F->hasFnAttribute(Attribute::NoInline) &&
573  Metrics.NumInsts < SmallFunctionThreshold))
575 
576  // Otherwise, set the specialization cost to be the cost of all the
577  // instructions in the function and penalty for specializing more functions.
578  unsigned Penalty = NbFunctionsSpecialized + 1;
579  return Metrics.NumInsts * InlineConstants::getInstrCost() * Penalty;
580  }
581 
583  const LoopInfo &LI) {
584  auto *I = dyn_cast_or_null<Instruction>(U);
585  // If not an instruction we do not know how to evaluate.
586  // Keep minimum possible cost for now so that it doesnt affect
587  // specialization.
588  if (!I)
590 
593 
594  // Increase the cost if it is inside the loop.
595  unsigned LoopDepth = LI.getLoopDepth(I->getParent());
596  Cost *= std::pow((double)AvgLoopIterationCount, LoopDepth);
597 
598  // Traverse recursively if there are more uses.
599  // TODO: Any other instructions to be added here?
600  if (I->mayReadFromMemory() || I->isCast())
601  for (auto *User : I->users())
602  Cost += getUserBonus(User, TTI, LI);
603 
604  return Cost;
605  }
606 
607  /// Compute a bonus for replacing argument \p A with constant \p C.
608  InstructionCost getSpecializationBonus(Argument *A, Constant *C,
609  const LoopInfo &LI) {
610  Function *F = A->getParent();
611  auto &TTI = (GetTTI)(*F);
612  LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: "
613  << C->getNameOrAsOperand() << "\n");
614 
615  InstructionCost TotalCost = 0;
616  for (auto *U : A->users()) {
617  TotalCost += getUserBonus(U, TTI, LI);
618  LLVM_DEBUG(dbgs() << "FnSpecialization: User cost ";
619  TotalCost.print(dbgs()); dbgs() << " for: " << *U << "\n");
620  }
621 
622  // The below heuristic is only concerned with exposing inlining
623  // opportunities via indirect call promotion. If the argument is not a
624  // (potentially casted) function pointer, give up.
625  Function *CalledFunction = dyn_cast<Function>(C->stripPointerCasts());
626  if (!CalledFunction)
627  return TotalCost;
628 
629  // Get TTI for the called function (used for the inline cost).
630  auto &CalleeTTI = (GetTTI)(*CalledFunction);
631 
632  // Look at all the call sites whose called value is the argument.
633  // Specializing the function on the argument would allow these indirect
634  // calls to be promoted to direct calls. If the indirect call promotion
635  // would likely enable the called function to be inlined, specializing is a
636  // good idea.
637  int Bonus = 0;
638  for (User *U : A->users()) {
639  if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
640  continue;
641  auto *CS = cast<CallBase>(U);
642  if (CS->getCalledOperand() != A)
643  continue;
644 
645  // Get the cost of inlining the called function at this call site. Note
646  // that this is only an estimate. The called function may eventually
647  // change in a way that leads to it not being inlined here, even though
648  // inlining looks profitable now. For example, one of its called
649  // functions may be inlined into it, making the called function too large
650  // to be inlined into this call site.
651  //
652  // We apply a boost for performing indirect call promotion by increasing
653  // the default threshold by the threshold for indirect calls.
654  auto Params = getInlineParams();
655  Params.DefaultThreshold += InlineConstants::IndirectCallThreshold;
656  InlineCost IC =
657  getInlineCost(*CS, CalledFunction, Params, CalleeTTI, GetAC, GetTLI);
658 
659  // We clamp the bonus for this call to be between zero and the default
660  // threshold.
661  if (IC.isAlways())
662  Bonus += Params.DefaultThreshold;
663  else if (IC.isVariable() && IC.getCostDelta() > 0)
664  Bonus += IC.getCostDelta();
665 
666  LLVM_DEBUG(dbgs() << "FnSpecialization: Inlining bonus " << Bonus
667  << " for user " << *U << "\n");
668  }
669 
670  return TotalCost + Bonus;
671  }
672 
673  /// Determine if it is possible to specialise the function for constant values
674  /// of the formal parameter \p A.
675  bool isArgumentInteresting(Argument *A) {
676  // No point in specialization if the argument is unused.
677  if (A->user_empty())
678  return false;
679 
680  // For now, don't attempt to specialize functions based on the values of
681  // composite types.
682  Type *ArgTy = A->getType();
683  if (!ArgTy->isSingleValueType())
684  return false;
685 
686  // Specialization of integer and floating point types needs to be explicitly
687  // enabled.
689  (ArgTy->isIntegerTy() || ArgTy->isFloatingPointTy()))
690  return false;
691 
692  // SCCP solver does not record an argument that will be constructed on
693  // stack.
694  if (A->hasByValAttr() && !A->getParent()->onlyReadsMemory())
695  return false;
696 
697  // Check the lattice value and decide if we should attemt to specialize,
698  // based on this argument. No point in specialization, if the lattice value
699  // is already a constant.
700  const ValueLatticeElement &LV = Solver.getLatticeValueFor(A);
701  if (LV.isUnknownOrUndef() || LV.isConstant() ||
703  LLVM_DEBUG(dbgs() << "FnSpecialization: Nothing to do, argument "
704  << A->getNameOrAsOperand() << " is already constant\n");
705  return false;
706  }
707 
708  return true;
709  }
710 
711  /// Check if the valuy \p V (an actual argument) is a constant or can only
712  /// have a constant value. Return that constant.
713  Constant *getCandidateConstant(Value *V) {
714  if (isa<PoisonValue>(V))
715  return nullptr;
716 
717  // TrackValueOfGlobalVariable only tracks scalar global variables.
718  if (auto *GV = dyn_cast<GlobalVariable>(V)) {
719  // Check if we want to specialize on the address of non-constant
720  // global values.
721  if (!GV->isConstant() && !SpecializeOnAddresses)
722  return nullptr;
723 
724  if (!GV->getValueType()->isSingleValueType())
725  return nullptr;
726  }
727 
728  // Select for possible specialisation values that are constants or
729  // are deduced to be constants or constant ranges with a single element.
730  Constant *C = dyn_cast<Constant>(V);
731  if (!C) {
732  const ValueLatticeElement &LV = Solver.getLatticeValueFor(V);
733  if (LV.isConstant())
734  C = LV.getConstant();
735  else if (LV.isConstantRange() &&
737  assert(V->getType()->isIntegerTy() && "Non-integral constant range");
740  } else
741  return nullptr;
742  }
743 
744  LLVM_DEBUG(dbgs() << "FnSpecialization: Found interesting argument "
745  << V->getNameOrAsOperand() << "\n");
746 
747  return C;
748  }
749 
750  /// Rewrite calls to function \p F to call function \p Clone instead.
751  ///
752  /// This function modifies calls to function \p F as long as the actual
753  /// arguments match those in \p Args. Note that for recursive calls we
754  /// need to compare against the cloned formal arguments.
755  ///
756  /// Callsites that have been marked with the MinSize function attribute won't
757  /// be specialized and rewritten.
758  void rewriteCallSites(Function *Clone, const SmallVectorImpl<ArgInfo> &Args,
760  assert(!Args.empty() && "Specialization without arguments");
761  Function *F = Args[0].Formal->getParent();
762 
763  SmallVector<CallBase *, 8> CallSitesToRewrite;
764  for (auto *U : F->users()) {
765  if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
766  continue;
767  auto &CS = *cast<CallBase>(U);
768  if (!CS.getCalledFunction() || CS.getCalledFunction() != F)
769  continue;
770  CallSitesToRewrite.push_back(&CS);
771  }
772 
773  LLVM_DEBUG(dbgs() << "FnSpecialization: Replacing call sites of "
774  << F->getName() << " with " << Clone->getName() << "\n");
775 
776  for (auto *CS : CallSitesToRewrite) {
777  LLVM_DEBUG(dbgs() << "FnSpecialization: "
778  << CS->getFunction()->getName() << " ->" << *CS
779  << "\n");
780  if (/* recursive call */
781  (CS->getFunction() == Clone &&
782  all_of(Args,
783  [CS, &Mappings](const ArgInfo &Arg) {
784  unsigned ArgNo = Arg.Formal->getArgNo();
785  return CS->getArgOperand(ArgNo) == Mappings[Arg.Formal];
786  })) ||
787  /* normal call */
788  all_of(Args, [CS](const ArgInfo &Arg) {
789  unsigned ArgNo = Arg.Formal->getArgNo();
790  return CS->getArgOperand(ArgNo) == Arg.Actual;
791  })) {
792  CS->setCalledFunction(Clone);
793  Solver.markOverdefined(CS);
794  }
795  }
796  }
797 
798  void updateSpecializedFuncs(FuncList &Candidates, FuncList &WorkList) {
799  for (auto *F : WorkList) {
800  SpecializedFuncs.insert(F);
801 
802  // Initialize the state of the newly created functions, marking them
803  // argument-tracked and executable.
804  if (F->hasExactDefinition() && !F->hasFnAttribute(Attribute::Naked))
805  Solver.addTrackedFunction(F);
806 
808  Candidates.push_back(F);
809  Solver.markBlockExecutable(&F->front());
810 
811  // Replace the function arguments for the specialized functions.
812  for (Argument &Arg : F->args())
813  if (!Arg.use_empty() && tryToReplaceWithConstant(&Arg))
814  LLVM_DEBUG(dbgs() << "FnSpecialization: Replaced constant argument: "
815  << Arg.getNameOrAsOperand() << "\n");
816  }
817  }
818 };
819 } // namespace
820 
822  Module &M, const DataLayout &DL,
826  function_ref<AnalysisResultsForFn(Function &)> GetAnalysis) {
827  SCCPSolver Solver(DL, GetTLI, M.getContext());
828  FunctionSpecializer FS(Solver, GetAC, GetTTI, GetTLI);
829  bool Changed = false;
830 
831  // Loop over all functions, marking arguments to those with their addresses
832  // taken or that are external as overdefined.
833  for (Function &F : M) {
834  if (F.isDeclaration())
835  continue;
836  if (F.hasFnAttribute(Attribute::NoDuplicate))
837  continue;
838 
839  LLVM_DEBUG(dbgs() << "\nFnSpecialization: Analysing decl: " << F.getName()
840  << "\n");
841  Solver.addAnalysis(F, GetAnalysis(F));
842 
843  // Determine if we can track the function's arguments. If so, add the
844  // function to the solver's set of argument-tracked functions.
846  LLVM_DEBUG(dbgs() << "FnSpecialization: Can track arguments\n");
847  Solver.addArgumentTrackedFunction(&F);
848  continue;
849  } else {
850  LLVM_DEBUG(dbgs() << "FnSpecialization: Can't track arguments!\n"
851  << "FnSpecialization: Doesn't have local linkage, or "
852  << "has its address taken\n");
853  }
854 
855  // Assume the function is called.
856  Solver.markBlockExecutable(&F.front());
857 
858  // Assume nothing about the incoming arguments.
859  for (Argument &AI : F.args())
860  Solver.markOverdefined(&AI);
861  }
862 
863  // Determine if we can track any of the module's global variables. If so, add
864  // the global variables we can track to the solver's set of tracked global
865  // variables.
866  for (GlobalVariable &G : M.globals()) {
867  G.removeDeadConstantUsers();
869  Solver.trackValueOfGlobalVariable(&G);
870  }
871 
872  auto &TrackedFuncs = Solver.getArgumentTrackedFunctions();
873  SmallVector<Function *, 16> FuncDecls(TrackedFuncs.begin(),
874  TrackedFuncs.end());
875 
876  // No tracked functions, so nothing to do: don't run the solver and remove
877  // the ssa_copy intrinsics that may have been introduced.
878  if (TrackedFuncs.empty()) {
879  removeSSACopy(M);
880  return false;
881  }
882 
883  // Solve for constants.
884  auto RunSCCPSolver = [&](auto &WorkList) {
885  bool ResolvedUndefs = true;
886 
887  while (ResolvedUndefs) {
888  // Not running the solver unnecessary is checked in regression test
889  // nothing-to-do.ll, so if this debug message is changed, this regression
890  // test needs updating too.
891  LLVM_DEBUG(dbgs() << "FnSpecialization: Running solver\n");
892 
893  Solver.solve();
894  LLVM_DEBUG(dbgs() << "FnSpecialization: Resolving undefs\n");
895  ResolvedUndefs = false;
896  for (Function *F : WorkList)
897  if (Solver.resolvedUndefsIn(*F))
898  ResolvedUndefs = true;
899  }
900 
901  for (auto *F : WorkList) {
902  for (BasicBlock &BB : *F) {
903  if (!Solver.isBlockExecutable(&BB))
904  continue;
905  // FIXME: The solver may make changes to the function here, so set
906  // Changed, even if later function specialization does not trigger.
907  for (auto &I : make_early_inc_range(BB))
908  Changed |= FS.tryToReplaceWithConstant(&I);
909  }
910  }
911  };
912 
913 #ifndef NDEBUG
914  LLVM_DEBUG(dbgs() << "FnSpecialization: Worklist fn decls:\n");
915  for (auto *F : FuncDecls)
916  LLVM_DEBUG(dbgs() << "FnSpecialization: *) " << F->getName() << "\n");
917 #endif
918 
919  // Initially resolve the constants in all the argument tracked functions.
920  RunSCCPSolver(FuncDecls);
921 
923  unsigned I = 0;
924  while (FuncSpecializationMaxIters != I++ &&
925  FS.specializeFunctions(FuncDecls, WorkList)) {
926  LLVM_DEBUG(dbgs() << "FnSpecialization: Finished iteration " << I << "\n");
927 
928  // Run the solver for the specialized functions.
929  RunSCCPSolver(WorkList);
930 
931  // Replace some unresolved constant arguments.
932  constantArgPropagation(FuncDecls, M, Solver);
933 
934  WorkList.clear();
935  Changed = true;
936  }
937 
938  LLVM_DEBUG(dbgs() << "FnSpecialization: Number of specializations = "
939  << NumFuncSpecialized << "\n");
940 
941  // Remove any ssa_copy intrinsics that may have been introduced.
942  removeSSACopy(M);
943  return Changed;
944 }
llvm::InstructionCost
Definition: InstructionCost.h:30
llvm::InlineCost::isAlways
bool isAlways() const
Definition: InlineCost.h:137
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
CallArgBinding
std::pair< CallBase *, Constant * > CallArgBinding
Definition: FunctionSpecialization.cpp:121
llvm::ValueLatticeElement::getConstantRange
const ConstantRange & getConstantRange(bool UndefAllowed=true) const
Returns the constant range for this value.
Definition: ValueLattice.h:272
llvm::ValueLatticeElement::isConstant
bool isConstant() const
Definition: ValueLattice.h:243
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::SmallVectorImpl::erase
iterator erase(const_iterator CI)
Definition: SmallVector.h:741
llvm::MapVector::back
std::pair< KeyT, ValueT > & back()
Definition: MapVector.h:86
llvm::MapVector::remove_if
void remove_if(Predicate Pred)
Remove the elements that match the predicate.
llvm::TargetTransformInfo::getInstructionCost
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
Definition: TargetTransformInfo.cpp:224
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
IntrinsicInst.h
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:237
llvm::AnalysisResultsForFn
Helper struct for bundling up the analysis results per function for IPSCCP.
Definition: SCCPSolver.h:41
llvm::ValueLatticeElement::getConstant
Constant * getConstant() const
Definition: ValueLattice.h:258
llvm::Function
Definition: Function.h:60
SpecializeOnAddresses
static cl::opt< bool > SpecializeOnAddresses("func-specialization-on-address", cl::init(false), cl::Hidden, cl::desc("Enable function specialization on the address of global values"))
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
Statistic.h
llvm::CodeMetrics
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:31
llvm::LegacyLegalizeActions::Bitcast
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition: LegacyLegalizerInfo.h:54
SizeOpts.h
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:173
SCCP.h
isConstant
static bool isConstant(const ValueLatticeElement &LV)
Definition: FunctionSpecialization.cpp:131
llvm::GlobalVariable
Definition: GlobalVariable.h:39
llvm::ConstantExpr::getBitCast
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2202
llvm::SCCPSolver::getLoopInfo
const LoopInfo & getLoopInfo(Function &F)
Definition: SCCPSolver.cpp:1529
llvm::SCCPSolver::getConstant
Constant * getConstant(const ValueLatticeElement &LV) const
Helper to return a Constant if LV is either a constant or a constant range with a single element.
Definition: SCCPSolver.cpp:1606
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::X86AS::FS
@ FS
Definition: X86.h:200
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::SCCPSolver::markBlockExecutable
bool markBlockExecutable(BasicBlock *BB)
markBlockExecutable - This method can be used by clients to mark all of the blocks that are known to ...
Definition: SCCPSolver.cpp:1521
llvm::SCCPSolver::visit
void visit(Instruction *I)
Definition: SCCPSolver.cpp:1623
llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
llvm::InstructionCost::print
void print(raw_ostream &OS) const
Definition: InstructionCost.cpp:19
llvm::SCCPSolver::addArgumentTrackedFunction
void addArgumentTrackedFunction(Function *F)
Definition: SCCPSolver.cpp:1551
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
AvgLoopIterationCount
static cl::opt< unsigned > AvgLoopIterationCount("func-specialization-avg-iters-cost", cl::Hidden, cl::desc("Average loop iteration count cost"), cl::init(10))
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:265
llvm::SCCPSolver::markFunctionUnreachable
void markFunctionUnreachable(Function *F)
Mark all of the blocks in function F non-executable.
Definition: SCCPSolver.cpp:1619
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Mappings
Inject TLI Mappings
Definition: InjectTLIMappings.cpp:171
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:187
CodeMetrics.h
llvm::Type::isSingleValueType
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition: Type.h:268
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1709
llvm::shouldOptimizeForSize
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
Definition: MachineSizeOpts.cpp:183
llvm::ValueLatticeElement::isUnknownOrUndef
bool isUnknownOrUndef() const
Definition: ValueLattice.h:242
llvm::getInlineCost
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
Definition: InlineCost.cpp:2822
InlinePriorityMode::Cost
@ Cost
llvm::SCCPSolver::markOverdefined
void markOverdefined(Value *V)
markOverdefined - Mark the specified value overdefined.
Definition: SCCPSolver.cpp:1600
llvm::User
Definition: User.h:44
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::ConstantRange::isSingleElement
bool isSingleElement() const
Return true if this set contains exactly one member.
Definition: ConstantRange.h:261
llvm::InlineCost
Represents the cost of inlining a function.
Definition: InlineCost.h:90
llvm::InlineConstants::getInstrCost
int getInstrCost()
Definition: InlineCost.cpp:183
llvm::SCCPSolver::addAnalysis
void addAnalysis(Function &F, AnalysisResultsForFn A)
Definition: SCCPSolver.cpp:1517
llvm::Instruction
Definition: Instruction.h:42
SmallFunctionThreshold
static cl::opt< unsigned > SmallFunctionThreshold("func-specialization-size-threshold", cl::Hidden, cl::desc("Don't specialize functions that have less than this theshold " "number of instructions"), cl::init(100))
CallSpecBinding
std::pair< CallBase *, SpecializationInfo > CallSpecBinding
Definition: FunctionSpecialization.cpp:122
llvm::PGSOQueryType::IRPass
@ IRPass
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1713
tryToReplaceWithConstant
static bool tryToReplaceWithConstant(SCCPSolver &Solver, Value *V)
Definition: SCCP.cpp:106
llvm::CodeMetrics::collectEphemeralValues
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:70
llvm::getInlineParams
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
Definition: InlineCost.cpp:3101
llvm::GlobalValue::InternalLinkage
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
llvm::canTrackGlobalVariableInterprocedurally
bool canTrackGlobalVariableInterprocedurally(GlobalVariable *GV)
Determine if the value maintained in the given global variable can be tracked interprocedurally.
Definition: ValueLatticeUtils.cpp:27
LoopInfo.h
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:210
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLFunctionalExtras.h:36
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:200
llvm::SmallMapVector
A MapVector that performs no allocations if smaller than a certain size.
Definition: MapVector.h:233
llvm::cl::opt< bool >
llvm::SCCPSolver::visitCall
void visitCall(CallInst &I)
Definition: SCCPSolver.cpp:1625
FuncSpecializationMaxIters
static cl::opt< unsigned > FuncSpecializationMaxIters("func-specialization-max-iters", cl::Hidden, cl::desc("The maximum number of iterations function specialization is run"), cl::init(1))
llvm::SCCPSolver::getLatticeValueFor
const ValueLatticeElement & getLatticeValueFor(Value *V) const
Definition: SCCPSolver.cpp:1582
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::Value::getNameOrAsOperand
std::string getNameOrAsOperand() const
Definition: Value.cpp:443
llvm::InlineConstants::IndirectCallThreshold
const int IndirectCallThreshold
Definition: InlineCost.h:49
llvm::DenseMap
Definition: DenseMap.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:58
Cloning.h
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:447
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:716
Metrics
Machine Trace Metrics
Definition: MachineTraceMetrics.cpp:53
llvm::SCCPSolver
SCCPSolver - This interface class is a general purpose solver for Sparse Conditional Constant Propaga...
Definition: SCCPSolver.h:63
llvm::InlineCost::getCostDelta
int getCostDelta() const
Get the cost delta from the threshold for inlining.
Definition: InlineCost.h:173
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MapVector::takeVector
VectorType takeVector()
Clear the MapVector and return the underlying vector.
Definition: MapVector.h:56
InlineCost.h
MaxClonesThreshold
static cl::opt< unsigned > MaxClonesThreshold("func-specialization-max-clones", cl::Hidden, cl::desc("The maximum number of clones allowed for a single function " "specialization"), cl::init(3))
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:82
llvm::Value::user_empty
bool user_empty() const
Definition: Value.h:385
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::runFunctionSpecialization
bool runFunctionSpecialization(Module &M, const DataLayout &DL, std::function< TargetLibraryInfo &(Function &)> GetTLI, std::function< TargetTransformInfo &(Function &)> GetTTI, std::function< AssumptionCache &(Function &)> GetAC, function_ref< AnalysisResultsForFn(Function &)> GetAnalysis)
Definition: FunctionSpecialization.cpp:821
llvm::ConstantRange::getSingleElement
const APInt * getSingleElement() const
If this set contains a single element, return it, otherwise return null.
Definition: ConstantRange.h:246
llvm::LoopInfoBase::getLoopDepth
unsigned getLoopDepth(const BlockT *BB) const
Return the loop nesting level of the specified block.
Definition: LoopInfo.h:999
llvm::SCCPSolver::markArgInFuncSpecialization
void markArgInFuncSpecialization(Function *F, const SmallVectorImpl< ArgInfo > &Args)
Mark the constant arguments of a new function specialization.
Definition: SCCPSolver.cpp:1614
llvm::LoopInfo
Definition: LoopInfo.h:1108
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::TargetTransformInfo::TCK_SizeAndLatency
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Definition: TargetTransformInfo.h:222
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
getConstantStackValue
static Constant * getConstantStackValue(CallInst *Call, Value *Val, SCCPSolver &Solver)
Definition: FunctionSpecialization.cpp:170
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::SCCPSolver::removeLatticeValueFor
void removeLatticeValueFor(Value *V)
Definition: SCCPSolver.cpp:1578
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::ValueLatticeElement
This class represents lattice values for constants.
Definition: ValueLattice.h:29
constantArgPropagation
static void constantArgPropagation(FuncList &WorkList, Module &M, SCCPSolver &Solver)
Definition: FunctionSpecialization.cpp:204
llvm::CloneFunction
Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
Definition: CloneFunction.cpp:297
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
ValueLatticeUtils.h
llvm::ValueMap< const Value *, WeakTrackingVH >
for
this could be done in SelectionDAGISel along with other special for
Definition: README.txt:104
llvm::ArgInfo
Helper struct shared between Function Specialization and SCCP Solver.
Definition: SCCPSolver.h:49
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::SCCPSolver::isBlockExecutable
bool isBlockExecutable(BasicBlock *BB) const
Definition: SCCPSolver.cpp:1565
llvm::Value::stripPointerCasts
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:685
llvm::stable_sort
void stable_sort(R &&Range)
Definition: STLExtras.h:1922
SCCPSolver.h
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:226
llvm::SCCPSolver::resolvedUndefsIn
bool resolvedUndefsIn(Function &F)
resolvedUndefsIn - While solving the dataflow for a function, we assume that branches on undef values...
Definition: SCCPSolver.cpp:1561
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:614
llvm::codeview::ModifierOptions::Const
@ Const
llvm::InstructionCost::getInvalid
static InstructionCost getInvalid(CostType Val=0)
Definition: InstructionCost.h:74
getPromotableAlloca
static Constant * getPromotableAlloca(AllocaInst *Alloca, CallInst *Call)
Definition: FunctionSpecialization.cpp:141
llvm::Constant::getIntegerValue
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
Definition: Constants.cpp:378
removeSSACopy
static void removeSSACopy(Function &F)
Definition: FunctionSpecialization.cpp:249
llvm::canTrackArgumentsInterprocedurally
bool canTrackArgumentsInterprocedurally(Function *F)
Determine if the values of the given function's arguments can be tracked interprocedurally.
Definition: ValueLatticeUtils.cpp:19
llvm::ValueLatticeElement::isConstantRange
bool isConstantRange(bool UndefAllowed=true) const
Returns true if this value is a constant range.
Definition: ValueLattice.h:252
TargetTransformInfo.h
llvm::logicalview::LVComparePass::Added
@ Added
IV
static const uint32_t IV[8]
Definition: blake3_impl.h:85
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
llvm::SCCPSolver::trackValueOfGlobalVariable
void trackValueOfGlobalVariable(GlobalVariable *GV)
trackValueOfGlobalVariable - Clients can use this method to inform the SCCPSolver that it should trac...
Definition: SCCPSolver.cpp:1535
EnableSpecializationForLiteralConstant
static cl::opt< bool > EnableSpecializationForLiteralConstant("function-specialization-for-literal-constant", cl::init(false), cl::Hidden, cl::desc("Enable specialization of functions that take a literal constant " "as an argument."))
llvm::InlineCost::isVariable
bool isVariable() const
Definition: InlineCost.h:139
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1474
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::SCCPSolver::solve
void solve()
Solve - Solve for constants and executable blocks.
Definition: SCCPSolver.cpp:1559
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:394
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:59
ForceFunctionSpecialization
static cl::opt< bool > ForceFunctionSpecialization("force-function-specialization", cl::init(false), cl::Hidden, cl::desc("Force function specialization for every call site with a " "constant argument"))
llvm::cl::desc
Definition: CommandLine.h:413
llvm::SCCPSolver::addTrackedFunction
void addTrackedFunction(Function *F)
addTrackedFunction - If the SCCP solver is supposed to track calls into and out of the specified func...
Definition: SCCPSolver.cpp:1539
ValueLattice.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::SCCPSolver::getArgumentTrackedFunctions
SmallPtrSetImpl< Function * > & getArgumentTrackedFunctions()
Return a reference to the set of argument tracked functions.
Definition: SCCPSolver.cpp:1610
isOverdefined
static bool isOverdefined(const ValueLatticeElement &LV)
Definition: FunctionSpecialization.cpp:137
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43