LLVM  12.0.0git
PGOMemOPSizeOpt.cpp
Go to the documentation of this file.
1 //===-- PGOMemOPSizeOpt.cpp - Optimizations based on value profiling ===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the transformation that optimizes memory intrinsics
10 // such as memcpy using the size value profile. When memory intrinsic size
11 // value profile metadata is available, a single memory intrinsic is expanded
12 // to a sequence of guarded specialized versions that are called with the
13 // hottest size(s), for later expansion into more optimal inline sequences.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/ADT/Twine.h"
25 #include "llvm/IR/BasicBlock.h"
26 #include "llvm/IR/DerivedTypes.h"
27 #include "llvm/IR/Dominators.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/IR/IRBuilder.h"
30 #include "llvm/IR/InstVisitor.h"
31 #include "llvm/IR/InstrTypes.h"
32 #include "llvm/IR/Instruction.h"
33 #include "llvm/IR/Instructions.h"
34 #include "llvm/IR/LLVMContext.h"
35 #include "llvm/IR/PassManager.h"
36 #include "llvm/IR/Type.h"
37 #include "llvm/InitializePasses.h"
38 #include "llvm/Pass.h"
39 #include "llvm/PassRegistry.h"
41 #define INSTR_PROF_VALUE_PROF_MEMOP_API
43 #include "llvm/Support/Casting.h"
45 #include "llvm/Support/Debug.h"
51 #include <cassert>
52 #include <cstdint>
53 #include <vector>
54 
55 using namespace llvm;
56 
57 #define DEBUG_TYPE "pgo-memop-opt"
58 
59 STATISTIC(NumOfPGOMemOPOpt, "Number of memop intrinsics optimized.");
60 STATISTIC(NumOfPGOMemOPAnnotate, "Number of memop intrinsics annotated.");
61 
62 // The minimum call count to optimize memory intrinsic calls.
63 static cl::opt<unsigned>
64  MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::ZeroOrMore,
65  cl::init(1000),
66  cl::desc("The minimum count to optimize memory "
67  "intrinsic calls"));
68 
69 // Command line option to disable memory intrinsic optimization. The default is
70 // false. This is for debug purpose.
71 static cl::opt<bool> DisableMemOPOPT("disable-memop-opt", cl::init(false),
72  cl::Hidden, cl::desc("Disable optimize"));
73 
74 // The percent threshold to optimize memory intrinsic calls.
75 static cl::opt<unsigned>
76  MemOPPercentThreshold("pgo-memop-percent-threshold", cl::init(40),
78  cl::desc("The percentage threshold for the "
79  "memory intrinsic calls optimization"));
80 
81 // Maximum number of versions for optimizing memory intrinsic call.
82 static cl::opt<unsigned>
83  MemOPMaxVersion("pgo-memop-max-version", cl::init(3), cl::Hidden,
85  cl::desc("The max version for the optimized memory "
86  " intrinsic calls"));
87 
88 // Scale the counts from the annotation using the BB count value.
89 static cl::opt<bool>
90  MemOPScaleCount("pgo-memop-scale-count", cl::init(true), cl::Hidden,
91  cl::desc("Scale the memop size counts using the basic "
92  " block count value"));
93 
94 // FIXME: These are to be removed after switching to the new memop value
95 // profiling.
96 // This option sets the rangge of precise profile memop sizes.
98 
99 // This option sets the value that groups large memop sizes
101 
103 
105  MemOPOptMemcmpBcmp("pgo-memop-optimize-memcmp-bcmp", cl::init(true),
106  cl::Hidden,
107  cl::desc("Size-specialize memcmp and bcmp calls"));
108 
109 static cl::opt<unsigned>
110  MemOpMaxOptSize("memop-value-prof-max-opt-size", cl::Hidden, cl::init(128),
111  cl::desc("Optimize the memop size <= this value"));
112 
113 namespace {
114 class PGOMemOPSizeOptLegacyPass : public FunctionPass {
115 public:
116  static char ID;
117 
118  PGOMemOPSizeOptLegacyPass() : FunctionPass(ID) {
120  }
121 
122  StringRef getPassName() const override { return "PGOMemOPSize"; }
123 
124 private:
125  bool runOnFunction(Function &F) override;
126  void getAnalysisUsage(AnalysisUsage &AU) const override {
132  }
133 };
134 } // end anonymous namespace
135 
137 INITIALIZE_PASS_BEGIN(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt",
138  "Optimize memory intrinsic using its size value profile",
139  false, false)
142 INITIALIZE_PASS_END(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt",
143  "Optimize memory intrinsic using its size value profile",
144  false, false)
145 
147  return new PGOMemOPSizeOptLegacyPass();
148 }
149 
150 namespace {
151 
152 static const char *getMIName(const MemIntrinsic *MI) {
153  switch (MI->getIntrinsicID()) {
154  case Intrinsic::memcpy:
155  return "memcpy";
156  case Intrinsic::memmove:
157  return "memmove";
158  case Intrinsic::memset:
159  return "memset";
160  default:
161  return "unknown";
162  }
163 }
164 
165 // A class that abstracts a memop (memcpy, memmove, memset, memcmp and bcmp).
166 struct MemOp {
167  Instruction *I;
168  MemOp(MemIntrinsic *MI) : I(MI) {}
169  MemOp(CallInst *CI) : I(CI) {}
170  MemIntrinsic *asMI() { return dyn_cast<MemIntrinsic>(I); }
171  CallInst *asCI() { return cast<CallInst>(I); }
172  MemOp clone() {
173  if (auto MI = asMI())
174  return MemOp(cast<MemIntrinsic>(MI->clone()));
175  return MemOp(cast<CallInst>(asCI()->clone()));
176  }
177  Value *getLength() {
178  if (auto MI = asMI())
179  return MI->getLength();
180  return asCI()->getArgOperand(2);
181  }
182  void setLength(Value *Length) {
183  if (auto MI = asMI())
184  return MI->setLength(Length);
185  asCI()->setArgOperand(2, Length);
186  }
187  StringRef getFuncName() {
188  if (auto MI = asMI())
189  return MI->getCalledFunction()->getName();
190  return asCI()->getCalledFunction()->getName();
191  }
192  bool isMemmove() {
193  if (auto MI = asMI())
194  if (MI->getIntrinsicID() == Intrinsic::memmove)
195  return true;
196  return false;
197  }
198  bool isMemcmp(TargetLibraryInfo &TLI) {
199  LibFunc Func;
200  if (asMI() == nullptr && TLI.getLibFunc(*asCI(), Func) &&
201  Func == LibFunc_memcmp) {
202  return true;
203  }
204  return false;
205  }
206  bool isBcmp(TargetLibraryInfo &TLI) {
207  LibFunc Func;
208  if (asMI() == nullptr && TLI.getLibFunc(*asCI(), Func) &&
209  Func == LibFunc_bcmp) {
210  return true;
211  }
212  return false;
213  }
214  const char *getName(TargetLibraryInfo &TLI) {
215  if (auto MI = asMI())
216  return getMIName(MI);
217  LibFunc Func;
218  if (TLI.getLibFunc(*asCI(), Func)) {
219  if (Func == LibFunc_memcmp)
220  return "memcmp";
221  if (Func == LibFunc_bcmp)
222  return "bcmp";
223  }
224  llvm_unreachable("Must be MemIntrinsic or memcmp/bcmp CallInst");
225  return nullptr;
226  }
227 };
228 
229 class MemOPSizeOpt : public InstVisitor<MemOPSizeOpt> {
230 public:
231  MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI,
233  TargetLibraryInfo &TLI)
234  : Func(Func), BFI(BFI), ORE(ORE), DT(DT), TLI(TLI), Changed(false) {
235  ValueDataArray =
236  std::make_unique<InstrProfValueData[]>(MemOPMaxVersion + 2);
237  // Get the MemOPSize range information from option MemOPSizeRange,
238  getMemOPSizeRangeFromOption(MemOPSizeRange, PreciseRangeStart,
239  PreciseRangeLast);
240  }
241  bool isChanged() const { return Changed; }
242  void perform() {
243  WorkList.clear();
244  visit(Func);
245 
246  for (auto &MO : WorkList) {
247  ++NumOfPGOMemOPAnnotate;
248  if (perform(MO)) {
249  Changed = true;
250  ++NumOfPGOMemOPOpt;
251  LLVM_DEBUG(dbgs() << "MemOP call: " << MO.getFuncName()
252  << "is Transformed.\n");
253  }
254  }
255  }
256 
257  void visitMemIntrinsic(MemIntrinsic &MI) {
258  Value *Length = MI.getLength();
259  // Not perform on constant length calls.
260  if (dyn_cast<ConstantInt>(Length))
261  return;
262  WorkList.push_back(MemOp(&MI));
263  }
264 
265  void visitCallInst(CallInst &CI) {
266  LibFunc Func;
267  if (TLI.getLibFunc(CI, Func) &&
268  (Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
269  !dyn_cast<ConstantInt>(CI.getArgOperand(2))) {
270  WorkList.push_back(MemOp(&CI));
271  }
272  }
273 
274 private:
275  Function &Func;
278  DominatorTree *DT;
279  TargetLibraryInfo &TLI;
280  bool Changed;
281  std::vector<MemOp> WorkList;
282  // FIXME: These are to be removed after switching to the new memop value
283  // profiling.
284  // Start of the previse range.
285  int64_t PreciseRangeStart;
286  // Last value of the previse range.
287  int64_t PreciseRangeLast;
288  // The space to read the profile annotation.
289  std::unique_ptr<InstrProfValueData[]> ValueDataArray;
290  bool perform(MemOp MO);
291 
292  // FIXME: This is to be removed after switching to the new memop value
293  // profiling.
294  // This kind shows which group the value falls in. For PreciseValue, we have
295  // the profile count for that value. LargeGroup groups the values that are in
296  // range [LargeValue, +inf). NonLargeGroup groups the rest of values.
297  enum MemOPSizeKind { PreciseValue, NonLargeGroup, LargeGroup };
298 
299  MemOPSizeKind getMemOPSizeKind(int64_t Value) const {
300  if (Value == MemOPSizeLarge && MemOPSizeLarge != 0)
301  return LargeGroup;
302  if (Value == PreciseRangeLast + 1)
303  return NonLargeGroup;
304  return PreciseValue;
305  }
306 };
307 
308 static bool isProfitable(uint64_t Count, uint64_t TotalCount) {
309  assert(Count <= TotalCount);
310  if (Count < MemOPCountThreshold)
311  return false;
312  if (Count < TotalCount * MemOPPercentThreshold / 100)
313  return false;
314  return true;
315 }
316 
317 static inline uint64_t getScaledCount(uint64_t Count, uint64_t Num,
318  uint64_t Denom) {
319  if (!MemOPScaleCount)
320  return Count;
321  bool Overflowed;
322  uint64_t ScaleCount = SaturatingMultiply(Count, Num, &Overflowed);
323  return ScaleCount / Denom;
324 }
325 
326 bool MemOPSizeOpt::perform(MemOp MO) {
327  assert(MO.I);
328  if (MO.isMemmove())
329  return false;
330  if (!MemOPOptMemcmpBcmp && (MO.isMemcmp(TLI) || MO.isBcmp(TLI)))
331  return false;
332 
334  uint64_t TotalCount;
335  if (!getValueProfDataFromInst(*MO.I, IPVK_MemOPSize, MaxNumPromotions,
336  ValueDataArray.get(), NumVals, TotalCount))
337  return false;
338 
339  uint64_t ActualCount = TotalCount;
340  uint64_t SavedTotalCount = TotalCount;
341  if (MemOPScaleCount) {
342  auto BBEdgeCount = BFI.getBlockProfileCount(MO.I->getParent());
343  if (!BBEdgeCount)
344  return false;
345  ActualCount = *BBEdgeCount;
346  }
347 
348  ArrayRef<InstrProfValueData> VDs(ValueDataArray.get(), NumVals);
349  LLVM_DEBUG(dbgs() << "Read one memory intrinsic profile with count "
350  << ActualCount << "\n");
351  LLVM_DEBUG(
352  for (auto &VD
353  : VDs) { dbgs() << " (" << VD.Value << "," << VD.Count << ")\n"; });
354 
355  if (ActualCount < MemOPCountThreshold)
356  return false;
357  // Skip if the total value profiled count is 0, in which case we can't
358  // scale up the counts properly (and there is no profitable transformation).
359  if (TotalCount == 0)
360  return false;
361 
362  TotalCount = ActualCount;
363  if (MemOPScaleCount)
364  LLVM_DEBUG(dbgs() << "Scale counts: numerator = " << ActualCount
365  << " denominator = " << SavedTotalCount << "\n");
366 
367  // Keeping track of the count of the default case:
368  uint64_t RemainCount = TotalCount;
369  uint64_t SavedRemainCount = SavedTotalCount;
371  SmallVector<uint64_t, 16> CaseCounts;
372  uint64_t MaxCount = 0;
373  unsigned Version = 0;
374  // Default case is in the front -- save the slot here.
375  CaseCounts.push_back(0);
376  for (auto &VD : VDs) {
377  int64_t V = VD.Value;
378  uint64_t C = VD.Count;
379  if (MemOPScaleCount)
380  C = getScaledCount(C, ActualCount, SavedTotalCount);
381 
382  if (UseOldMemOpValueProf) {
383  // Only care precise value here.
384  if (getMemOPSizeKind(V) != PreciseValue)
385  continue;
386  } else if (!InstrProfIsSingleValRange(V) || V > MemOpMaxOptSize)
387  continue;
388 
389  // ValueCounts are sorted on the count. Break at the first un-profitable
390  // value.
391  if (!isProfitable(C, RemainCount))
392  break;
393 
394  SizeIds.push_back(V);
395  CaseCounts.push_back(C);
396  if (C > MaxCount)
397  MaxCount = C;
398 
399  assert(RemainCount >= C);
400  RemainCount -= C;
401  assert(SavedRemainCount >= VD.Count);
402  SavedRemainCount -= VD.Count;
403 
404  if (++Version > MemOPMaxVersion && MemOPMaxVersion != 0)
405  break;
406  }
407 
408  if (Version == 0)
409  return false;
410 
411  CaseCounts[0] = RemainCount;
412  if (RemainCount > MaxCount)
413  MaxCount = RemainCount;
414 
415  uint64_t SumForOpt = TotalCount - RemainCount;
416 
417  LLVM_DEBUG(dbgs() << "Optimize one memory intrinsic call to " << Version
418  << " Versions (covering " << SumForOpt << " out of "
419  << TotalCount << ")\n");
420 
421  // mem_op(..., size)
422  // ==>
423  // switch (size) {
424  // case s1:
425  // mem_op(..., s1);
426  // goto merge_bb;
427  // case s2:
428  // mem_op(..., s2);
429  // goto merge_bb;
430  // ...
431  // default:
432  // mem_op(..., size);
433  // goto merge_bb;
434  // }
435  // merge_bb:
436 
437  BasicBlock *BB = MO.I->getParent();
438  LLVM_DEBUG(dbgs() << "\n\n== Basic Block Before ==\n");
439  LLVM_DEBUG(dbgs() << *BB << "\n");
440  auto OrigBBFreq = BFI.getBlockFreq(BB);
441 
442  BasicBlock *DefaultBB = SplitBlock(BB, MO.I, DT);
443  BasicBlock::iterator It(*MO.I);
444  ++It;
445  assert(It != DefaultBB->end());
446  BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It), DT);
447  MergeBB->setName("MemOP.Merge");
448  BFI.setBlockFreq(MergeBB, OrigBBFreq.getFrequency());
449  DefaultBB->setName("MemOP.Default");
450 
452  auto &Ctx = Func.getContext();
453  IRBuilder<> IRB(BB);
455  Value *SizeVar = MO.getLength();
456  SwitchInst *SI = IRB.CreateSwitch(SizeVar, DefaultBB, SizeIds.size());
457  Type *MemOpTy = MO.I->getType();
458  PHINode *PHI = nullptr;
459  if (!MemOpTy->isVoidTy()) {
460  // Insert a phi for the return values at the merge block.
461  IRBuilder<> IRBM(MergeBB->getFirstNonPHI());
462  PHI = IRBM.CreatePHI(MemOpTy, SizeIds.size() + 1, "MemOP.RVMerge");
463  MO.I->replaceAllUsesWith(PHI);
464  PHI->addIncoming(MO.I, DefaultBB);
465  }
466 
467  // Clear the value profile data.
468  MO.I->setMetadata(LLVMContext::MD_prof, nullptr);
469  // If all promoted, we don't need the MD.prof metadata.
470  if (SavedRemainCount > 0 || Version != NumVals)
471  // Otherwise we need update with the un-promoted records back.
472  annotateValueSite(*Func.getParent(), *MO.I, VDs.slice(Version),
473  SavedRemainCount, IPVK_MemOPSize, NumVals);
474 
475  LLVM_DEBUG(dbgs() << "\n\n== Basic Block After==\n");
476 
477  std::vector<DominatorTree::UpdateType> Updates;
478  if (DT)
479  Updates.reserve(2 * SizeIds.size());
480 
481  for (uint64_t SizeId : SizeIds) {
482  BasicBlock *CaseBB = BasicBlock::Create(
483  Ctx, Twine("MemOP.Case.") + Twine(SizeId), &Func, DefaultBB);
484  MemOp NewMO = MO.clone();
485  // Fix the argument.
486  auto *SizeType = dyn_cast<IntegerType>(NewMO.getLength()->getType());
487  assert(SizeType && "Expected integer type size argument.");
488  ConstantInt *CaseSizeId = ConstantInt::get(SizeType, SizeId);
489  NewMO.setLength(CaseSizeId);
490  CaseBB->getInstList().push_back(NewMO.I);
491  IRBuilder<> IRBCase(CaseBB);
492  IRBCase.CreateBr(MergeBB);
493  SI->addCase(CaseSizeId, CaseBB);
494  if (!MemOpTy->isVoidTy())
495  PHI->addIncoming(NewMO.I, CaseBB);
496  if (DT) {
497  Updates.push_back({DominatorTree::Insert, CaseBB, MergeBB});
498  Updates.push_back({DominatorTree::Insert, BB, CaseBB});
499  }
500  LLVM_DEBUG(dbgs() << *CaseBB << "\n");
501  }
502  DTU.applyUpdates(Updates);
503  Updates.clear();
504 
505  setProfMetadata(Func.getParent(), SI, CaseCounts, MaxCount);
506 
507  LLVM_DEBUG(dbgs() << *BB << "\n");
508  LLVM_DEBUG(dbgs() << *DefaultBB << "\n");
509  LLVM_DEBUG(dbgs() << *MergeBB << "\n");
510 
511  ORE.emit([&]() {
512  using namespace ore;
513  return OptimizationRemark(DEBUG_TYPE, "memopt-opt", MO.I)
514  << "optimized " << NV("Memop", MO.getName(TLI)) << " with count "
515  << NV("Count", SumForOpt) << " out of " << NV("Total", TotalCount)
516  << " for " << NV("Versions", Version) << " versions";
517  });
518 
519  return true;
520 }
521 } // namespace
522 
525  DominatorTree *DT, TargetLibraryInfo &TLI) {
526  if (DisableMemOPOPT)
527  return false;
528 
529  if (F.hasFnAttribute(Attribute::OptimizeForSize))
530  return false;
531  MemOPSizeOpt MemOPSizeOpt(F, BFI, ORE, DT, TLI);
532  MemOPSizeOpt.perform();
533  return MemOPSizeOpt.isChanged();
534 }
535 
538  getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
539  auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
540  auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
541  DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
542  TargetLibraryInfo &TLI =
543  getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
544  return PGOMemOPSizeOptImpl(F, BFI, ORE, DT, TLI);
545 }
546 
547 namespace llvm {
549 
552  auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
554  auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
555  auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
556  bool Changed = PGOMemOPSizeOptImpl(F, BFI, ORE, DT, TLI);
557  if (!Changed)
558  return PreservedAnalyses::all();
559  auto PA = PreservedAnalyses();
560  PA.preserve<GlobalsAA>();
561  PA.preserve<DominatorTreeAnalysis>();
562  return PA;
563 }
564 } // namespace llvm
Legacy wrapper pass to provide the GlobalsAAResult object.
uint64_t CallInst * C
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:80
void setProfMetadata(Module *M, Instruction *TI, ArrayRef< uint64_t > EdgeCounts, uint64_t MaxCount)
LLVM_NODISCARD std::enable_if_t< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type > dyn_cast(const Y &Val)
Definition: Casting.h:334
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Base class for instruction visitors.
Definition: InstVisitor.h:79
DiagnosticInfoOptimizationBase::Argument NV
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:769
This class represents lattice values for constants.
Definition: AllocatorList.h:23
This is the interface for a simple mod/ref and alias analysis over globals.
FunctionPass * createPGOMemOPSizeOptLegacyPass()
#define DEBUG_TYPE
void push_back(const T &Elt)
Definition: SmallVector.h:246
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition: IRBuilder.h:981
void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
cl::opt< unsigned > MemOPSizeLarge
This class represents a function call, abstracting a target machine&#39;s calling convention.
std::enable_if_t< std::is_unsigned< T >::value, T > SaturatingMultiply(T X, T Y, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, of type T.
Definition: MathExtras.h:821
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:330
STATISTIC(NumFunctions, "Total number of functions")
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:233
F(f)
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:150
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction - everything before SplitPt stays in Old and e...
static cl::opt< unsigned > MemOPPercentThreshold("pgo-memop-percent-threshold", cl::init(40), cl::Hidden, cl::ZeroOrMore, cl::desc("The percentage threshold for the " "memory intrinsic calls optimization"))
Value * getLength() const
INITIALIZE_PASS_BEGIN(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt", "Optimize memory intrinsic using its size value profile", false, false) INITIALIZE_PASS_END(PGOMemOPSizeOptLegacyPass
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1259
AnalysisUsage & addRequired()
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
static StringRef getName(Value *V)
Legacy analysis pass which computes BlockFrequencyInfo.
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:342
Instruction * clone() const
Create a copy of &#39;this&#39; instruction that is identical in all ways except the following: ...
This file provides the interface for IR based instrumentation passes ( (profile-gen, and profile-use).
void setLength(Value *L)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional &#39;br label X&#39; instruction.
Definition: IRBuilder.h:952
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
pgo memop opt
bool isVoidTy() const
Return true if this is &#39;void&#39;.
Definition: Type.h:138
void getMemOPSizeRangeFromOption(StringRef Str, int64_t &RangeStart, int64_t &RangeLast)
Definition: InstrProf.cpp:1117
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:434
bool getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, InstrProfValueData ValueData[], uint32_t &ActualNumValueData, uint64_t &TotalC)
Extract the value profile data from Inst which is annotated with value profile meta data...
Definition: InstrProf.cpp:980
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:154
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
Diagnostic information for applied optimization remarks.
Represent the analysis usage information of a pass.
Analysis pass providing a never-invalidated alias analysis result.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:100
Class to represent integer types.
Definition: DerivedTypes.h:40
cl::opt< bool > MemOPOptMemcmpBcmp("pgo-memop-optimize-memcmp-bcmp", cl::init(true), cl::Hidden, cl::desc("Size-specialize memcmp and bcmp calls"))
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:935
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:160
static cl::opt< bool > MemOPScaleCount("pgo-memop-scale-count", cl::init(true), cl::Hidden, cl::desc("Scale the memop size counts using the basic " " block count value"))
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:51
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:354
Analysis pass which computes BlockFrequencyInfo.
This is the common base class for memset/memcpy/memmove.
Iterator for intrusive lists based on ilist_node.
This is the shared class of boolean and integer constants.
Definition: Constants.h:77
void applyUpdates(ArrayRef< DominatorTree::UpdateType > Updates)
Submit updates to all available trees.
static cl::opt< unsigned > MemOPMaxVersion("pgo-memop-max-version", cl::init(3), cl::Hidden, cl::ZeroOrMore, cl::desc("The max version for the optimized memory " " intrinsic calls"))
iterator end()
Definition: BasicBlock.h:291
Provides information about what library functions are available for the current target.
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:786
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
static cl::opt< bool > DisableMemOPOPT("disable-memop-opt", cl::init(false), cl::Hidden, cl::desc("Disable optimize"))
auto size(R &&Range, std::enable_if_t< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1473
void push_back(pointer val)
Definition: ilist.h:313
cl::opt< bool > UseOldMemOpValueProf
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
pgo instr Read PGO instrumentation profile
static cl::opt< unsigned > MaxNumPromotions("icp-max-prom", cl::init(3), cl::Hidden, cl::ZeroOrMore, cl::desc("Max number of promotions for a single indirect " "call callsite"))
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:270
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1314
char & PGOMemOPSizeOptID
#define I(x, y, z)
Definition: MD5.cpp:59
cl::opt< std::string > MemOPSizeRange
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:788
static cl::opt< unsigned > MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(1000), cl::desc("The minimum count to optimize memory " "intrinsic calls"))
size_t size() const
Definition: SmallVector.h:66
void initializePGOMemOPSizeOptLegacyPassPass(PassRegistry &)
Analysis pass providing the TargetLibraryInfo.
Multiway switch.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:74
OptimizationRemarkEmitter legacy analysis pass.
static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI, OptimizationRemarkEmitter &ORE, DominatorTree *DT, TargetLibraryInfo &TLI)
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:262
This header defines various interfaces for pass management in LLVM.
static cl::opt< unsigned > MemOpMaxOptSize("memop-value-prof-max-opt-size", cl::Hidden, cl::init(128), cl::desc("Optimize the memop size <= this value"))
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const uint64_t Version
Definition: InstrProf.h:996
The optimization diagnostic interface.
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)