LLVM  14.0.0git
PGOMemOPSizeOpt.cpp
Go to the documentation of this file.
1 //===-- PGOMemOPSizeOpt.cpp - Optimizations based on value profiling ===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the transformation that optimizes memory intrinsics
10 // such as memcpy using the size value profile. When memory intrinsic size
11 // value profile metadata is available, a single memory intrinsic is expanded
12 // to a sequence of guarded specialized versions that are called with the
13 // hottest size(s), for later expansion into more optimal inline sequences.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/ADT/Twine.h"
26 #include "llvm/IR/BasicBlock.h"
27 #include "llvm/IR/DerivedTypes.h"
28 #include "llvm/IR/Dominators.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/IRBuilder.h"
31 #include "llvm/IR/InstVisitor.h"
32 #include "llvm/IR/InstrTypes.h"
33 #include "llvm/IR/Instruction.h"
34 #include "llvm/IR/Instructions.h"
35 #include "llvm/IR/LLVMContext.h"
36 #include "llvm/IR/PassManager.h"
37 #include "llvm/IR/Type.h"
38 #include "llvm/InitializePasses.h"
39 #include "llvm/Pass.h"
40 #include "llvm/PassRegistry.h"
42 #define INSTR_PROF_VALUE_PROF_MEMOP_API
44 #include "llvm/Support/Casting.h"
46 #include "llvm/Support/Debug.h"
49 #include "llvm/Support/WithColor.h"
53 #include <cassert>
54 #include <cstdint>
55 #include <vector>
56 
57 using namespace llvm;
58 
59 #define DEBUG_TYPE "pgo-memop-opt"
60 
61 STATISTIC(NumOfPGOMemOPOpt, "Number of memop intrinsics optimized.");
62 STATISTIC(NumOfPGOMemOPAnnotate, "Number of memop intrinsics annotated.");
63 
64 // The minimum call count to optimize memory intrinsic calls.
65 static cl::opt<unsigned>
66  MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::ZeroOrMore,
67  cl::init(1000),
68  cl::desc("The minimum count to optimize memory "
69  "intrinsic calls"));
70 
71 // Command line option to disable memory intrinsic optimization. The default is
72 // false. This is for debug purpose.
73 static cl::opt<bool> DisableMemOPOPT("disable-memop-opt", cl::init(false),
74  cl::Hidden, cl::desc("Disable optimize"));
75 
76 // The percent threshold to optimize memory intrinsic calls.
77 static cl::opt<unsigned>
78  MemOPPercentThreshold("pgo-memop-percent-threshold", cl::init(40),
80  cl::desc("The percentage threshold for the "
81  "memory intrinsic calls optimization"));
82 
83 // Maximum number of versions for optimizing memory intrinsic call.
84 static cl::opt<unsigned>
85  MemOPMaxVersion("pgo-memop-max-version", cl::init(3), cl::Hidden,
87  cl::desc("The max version for the optimized memory "
88  " intrinsic calls"));
89 
90 // Scale the counts from the annotation using the BB count value.
91 static cl::opt<bool>
92  MemOPScaleCount("pgo-memop-scale-count", cl::init(true), cl::Hidden,
93  cl::desc("Scale the memop size counts using the basic "
94  " block count value"));
95 
97  MemOPOptMemcmpBcmp("pgo-memop-optimize-memcmp-bcmp", cl::init(true),
98  cl::Hidden,
99  cl::desc("Size-specialize memcmp and bcmp calls"));
100 
101 static cl::opt<unsigned>
102  MemOpMaxOptSize("memop-value-prof-max-opt-size", cl::Hidden, cl::init(128),
103  cl::desc("Optimize the memop size <= this value"));
104 
105 namespace {
106 class PGOMemOPSizeOptLegacyPass : public FunctionPass {
107 public:
108  static char ID;
109 
110  PGOMemOPSizeOptLegacyPass() : FunctionPass(ID) {
112  }
113 
114  StringRef getPassName() const override { return "PGOMemOPSize"; }
115 
116 private:
117  bool runOnFunction(Function &F) override;
118  void getAnalysisUsage(AnalysisUsage &AU) const override {
124  }
125 };
126 } // end anonymous namespace
127 
129 INITIALIZE_PASS_BEGIN(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt",
130  "Optimize memory intrinsic using its size value profile",
131  false, false)
134 INITIALIZE_PASS_END(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt",
135  "Optimize memory intrinsic using its size value profile",
136  false, false)
137 
139  return new PGOMemOPSizeOptLegacyPass();
140 }
141 
142 namespace {
143 
144 static const char *getMIName(const MemIntrinsic *MI) {
145  switch (MI->getIntrinsicID()) {
146  case Intrinsic::memcpy:
147  return "memcpy";
148  case Intrinsic::memmove:
149  return "memmove";
150  case Intrinsic::memset:
151  return "memset";
152  default:
153  return "unknown";
154  }
155 }
156 
157 // A class that abstracts a memop (memcpy, memmove, memset, memcmp and bcmp).
158 struct MemOp {
159  Instruction *I;
160  MemOp(MemIntrinsic *MI) : I(MI) {}
161  MemOp(CallInst *CI) : I(CI) {}
162  MemIntrinsic *asMI() { return dyn_cast<MemIntrinsic>(I); }
163  CallInst *asCI() { return cast<CallInst>(I); }
164  MemOp clone() {
165  if (auto MI = asMI())
166  return MemOp(cast<MemIntrinsic>(MI->clone()));
167  return MemOp(cast<CallInst>(asCI()->clone()));
168  }
169  Value *getLength() {
170  if (auto MI = asMI())
171  return MI->getLength();
172  return asCI()->getArgOperand(2);
173  }
174  void setLength(Value *Length) {
175  if (auto MI = asMI())
176  return MI->setLength(Length);
177  asCI()->setArgOperand(2, Length);
178  }
179  StringRef getFuncName() {
180  if (auto MI = asMI())
181  return MI->getCalledFunction()->getName();
182  return asCI()->getCalledFunction()->getName();
183  }
184  bool isMemmove() {
185  if (auto MI = asMI())
186  if (MI->getIntrinsicID() == Intrinsic::memmove)
187  return true;
188  return false;
189  }
190  bool isMemcmp(TargetLibraryInfo &TLI) {
191  LibFunc Func;
192  if (asMI() == nullptr && TLI.getLibFunc(*asCI(), Func) &&
193  Func == LibFunc_memcmp) {
194  return true;
195  }
196  return false;
197  }
198  bool isBcmp(TargetLibraryInfo &TLI) {
199  LibFunc Func;
200  if (asMI() == nullptr && TLI.getLibFunc(*asCI(), Func) &&
201  Func == LibFunc_bcmp) {
202  return true;
203  }
204  return false;
205  }
206  const char *getName(TargetLibraryInfo &TLI) {
207  if (auto MI = asMI())
208  return getMIName(MI);
209  LibFunc Func;
210  if (TLI.getLibFunc(*asCI(), Func)) {
211  if (Func == LibFunc_memcmp)
212  return "memcmp";
213  if (Func == LibFunc_bcmp)
214  return "bcmp";
215  }
216  llvm_unreachable("Must be MemIntrinsic or memcmp/bcmp CallInst");
217  return nullptr;
218  }
219 };
220 
221 class MemOPSizeOpt : public InstVisitor<MemOPSizeOpt> {
222 public:
223  MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI,
225  TargetLibraryInfo &TLI)
226  : Func(Func), BFI(BFI), ORE(ORE), DT(DT), TLI(TLI), Changed(false) {
227  ValueDataArray =
228  std::make_unique<InstrProfValueData[]>(INSTR_PROF_NUM_BUCKETS);
229  }
230  bool isChanged() const { return Changed; }
231  void perform() {
232  WorkList.clear();
233  visit(Func);
234 
235  for (auto &MO : WorkList) {
236  ++NumOfPGOMemOPAnnotate;
237  if (perform(MO)) {
238  Changed = true;
239  ++NumOfPGOMemOPOpt;
240  LLVM_DEBUG(dbgs() << "MemOP call: " << MO.getFuncName()
241  << "is Transformed.\n");
242  }
243  }
244  }
245 
246  void visitMemIntrinsic(MemIntrinsic &MI) {
247  Value *Length = MI.getLength();
248  // Not perform on constant length calls.
249  if (isa<ConstantInt>(Length))
250  return;
251  WorkList.push_back(MemOp(&MI));
252  }
253 
254  void visitCallInst(CallInst &CI) {
255  LibFunc Func;
256  if (TLI.getLibFunc(CI, Func) &&
257  (Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
258  !isa<ConstantInt>(CI.getArgOperand(2))) {
259  WorkList.push_back(MemOp(&CI));
260  }
261  }
262 
263 private:
264  Function &Func;
267  DominatorTree *DT;
268  TargetLibraryInfo &TLI;
269  bool Changed;
270  std::vector<MemOp> WorkList;
271  // The space to read the profile annotation.
272  std::unique_ptr<InstrProfValueData[]> ValueDataArray;
273  bool perform(MemOp MO);
274 };
275 
276 static bool isProfitable(uint64_t Count, uint64_t TotalCount) {
277  assert(Count <= TotalCount);
278  if (Count < MemOPCountThreshold)
279  return false;
280  if (Count < TotalCount * MemOPPercentThreshold / 100)
281  return false;
282  return true;
283 }
284 
285 static inline uint64_t getScaledCount(uint64_t Count, uint64_t Num,
286  uint64_t Denom) {
287  if (!MemOPScaleCount)
288  return Count;
289  bool Overflowed;
290  uint64_t ScaleCount = SaturatingMultiply(Count, Num, &Overflowed);
291  return ScaleCount / Denom;
292 }
293 
294 bool MemOPSizeOpt::perform(MemOp MO) {
295  assert(MO.I);
296  if (MO.isMemmove())
297  return false;
298  if (!MemOPOptMemcmpBcmp && (MO.isMemcmp(TLI) || MO.isBcmp(TLI)))
299  return false;
300 
301  uint32_t NumVals, MaxNumVals = INSTR_PROF_NUM_BUCKETS;
302  uint64_t TotalCount;
303  if (!getValueProfDataFromInst(*MO.I, IPVK_MemOPSize, MaxNumVals,
304  ValueDataArray.get(), NumVals, TotalCount))
305  return false;
306 
307  uint64_t ActualCount = TotalCount;
308  uint64_t SavedTotalCount = TotalCount;
309  if (MemOPScaleCount) {
310  auto BBEdgeCount = BFI.getBlockProfileCount(MO.I->getParent());
311  if (!BBEdgeCount)
312  return false;
313  ActualCount = *BBEdgeCount;
314  }
315 
316  ArrayRef<InstrProfValueData> VDs(ValueDataArray.get(), NumVals);
317  LLVM_DEBUG(dbgs() << "Read one memory intrinsic profile with count "
318  << ActualCount << "\n");
319  LLVM_DEBUG(
320  for (auto &VD
321  : VDs) { dbgs() << " (" << VD.Value << "," << VD.Count << ")\n"; });
322 
323  if (ActualCount < MemOPCountThreshold)
324  return false;
325  // Skip if the total value profiled count is 0, in which case we can't
326  // scale up the counts properly (and there is no profitable transformation).
327  if (TotalCount == 0)
328  return false;
329 
330  TotalCount = ActualCount;
331  if (MemOPScaleCount)
332  LLVM_DEBUG(dbgs() << "Scale counts: numerator = " << ActualCount
333  << " denominator = " << SavedTotalCount << "\n");
334 
335  // Keeping track of the count of the default case:
336  uint64_t RemainCount = TotalCount;
337  uint64_t SavedRemainCount = SavedTotalCount;
339  SmallVector<uint64_t, 16> CaseCounts;
340  uint64_t MaxCount = 0;
341  unsigned Version = 0;
342  int64_t LastV = -1;
343  // Default case is in the front -- save the slot here.
344  CaseCounts.push_back(0);
346  for (auto I = VDs.begin(), E = VDs.end(); I != E; ++I) {
347  auto &VD = *I;
348  int64_t V = VD.Value;
349  uint64_t C = VD.Count;
350  if (MemOPScaleCount)
351  C = getScaledCount(C, ActualCount, SavedTotalCount);
352 
353  if (!InstrProfIsSingleValRange(V) || V > MemOpMaxOptSize) {
354  RemainingVDs.push_back(VD);
355  continue;
356  }
357 
358  // ValueCounts are sorted on the count. Break at the first un-profitable
359  // value.
360  if (!isProfitable(C, RemainCount)) {
361  RemainingVDs.insert(RemainingVDs.end(), I, E);
362  break;
363  }
364 
365  if (V == LastV) {
366  LLVM_DEBUG(dbgs() << "Invalid Profile Data in Function " << Func.getName()
367  << ": Two consecutive, identical values in MemOp value"
368  "counts.\n");
369  return false;
370  }
371 
372  LastV = V;
373 
374  SizeIds.push_back(V);
375  CaseCounts.push_back(C);
376  if (C > MaxCount)
377  MaxCount = C;
378 
379  assert(RemainCount >= C);
380  RemainCount -= C;
381  assert(SavedRemainCount >= VD.Count);
382  SavedRemainCount -= VD.Count;
383 
384  if (++Version >= MemOPMaxVersion && MemOPMaxVersion != 0) {
385  RemainingVDs.insert(RemainingVDs.end(), I + 1, E);
386  break;
387  }
388  }
389 
390  if (Version == 0)
391  return false;
392 
393  CaseCounts[0] = RemainCount;
394  if (RemainCount > MaxCount)
395  MaxCount = RemainCount;
396 
397  uint64_t SumForOpt = TotalCount - RemainCount;
398 
399  LLVM_DEBUG(dbgs() << "Optimize one memory intrinsic call to " << Version
400  << " Versions (covering " << SumForOpt << " out of "
401  << TotalCount << ")\n");
402 
403  // mem_op(..., size)
404  // ==>
405  // switch (size) {
406  // case s1:
407  // mem_op(..., s1);
408  // goto merge_bb;
409  // case s2:
410  // mem_op(..., s2);
411  // goto merge_bb;
412  // ...
413  // default:
414  // mem_op(..., size);
415  // goto merge_bb;
416  // }
417  // merge_bb:
418 
419  BasicBlock *BB = MO.I->getParent();
420  LLVM_DEBUG(dbgs() << "\n\n== Basic Block Before ==\n");
421  LLVM_DEBUG(dbgs() << *BB << "\n");
422  auto OrigBBFreq = BFI.getBlockFreq(BB);
423 
424  BasicBlock *DefaultBB = SplitBlock(BB, MO.I, DT);
425  BasicBlock::iterator It(*MO.I);
426  ++It;
427  assert(It != DefaultBB->end());
428  BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It), DT);
429  MergeBB->setName("MemOP.Merge");
430  BFI.setBlockFreq(MergeBB, OrigBBFreq.getFrequency());
431  DefaultBB->setName("MemOP.Default");
432 
434  auto &Ctx = Func.getContext();
435  IRBuilder<> IRB(BB);
436  BB->getTerminator()->eraseFromParent();
437  Value *SizeVar = MO.getLength();
438  SwitchInst *SI = IRB.CreateSwitch(SizeVar, DefaultBB, SizeIds.size());
439  Type *MemOpTy = MO.I->getType();
440  PHINode *PHI = nullptr;
441  if (!MemOpTy->isVoidTy()) {
442  // Insert a phi for the return values at the merge block.
443  IRBuilder<> IRBM(MergeBB->getFirstNonPHI());
444  PHI = IRBM.CreatePHI(MemOpTy, SizeIds.size() + 1, "MemOP.RVMerge");
445  MO.I->replaceAllUsesWith(PHI);
446  PHI->addIncoming(MO.I, DefaultBB);
447  }
448 
449  // Clear the value profile data.
450  MO.I->setMetadata(LLVMContext::MD_prof, nullptr);
451  // If all promoted, we don't need the MD.prof metadata.
452  if (SavedRemainCount > 0 || Version != NumVals) {
453  // Otherwise we need update with the un-promoted records back.
454  ArrayRef<InstrProfValueData> RemVDs(RemainingVDs);
455  annotateValueSite(*Func.getParent(), *MO.I, RemVDs, SavedRemainCount,
456  IPVK_MemOPSize, NumVals);
457  }
458 
459  LLVM_DEBUG(dbgs() << "\n\n== Basic Block After==\n");
460 
461  std::vector<DominatorTree::UpdateType> Updates;
462  if (DT)
463  Updates.reserve(2 * SizeIds.size());
464 
465  for (uint64_t SizeId : SizeIds) {
466  BasicBlock *CaseBB = BasicBlock::Create(
467  Ctx, Twine("MemOP.Case.") + Twine(SizeId), &Func, DefaultBB);
468  MemOp NewMO = MO.clone();
469  // Fix the argument.
470  auto *SizeType = dyn_cast<IntegerType>(NewMO.getLength()->getType());
471  assert(SizeType && "Expected integer type size argument.");
472  ConstantInt *CaseSizeId = ConstantInt::get(SizeType, SizeId);
473  NewMO.setLength(CaseSizeId);
474  CaseBB->getInstList().push_back(NewMO.I);
475  IRBuilder<> IRBCase(CaseBB);
476  IRBCase.CreateBr(MergeBB);
477  SI->addCase(CaseSizeId, CaseBB);
478  if (!MemOpTy->isVoidTy())
479  PHI->addIncoming(NewMO.I, CaseBB);
480  if (DT) {
481  Updates.push_back({DominatorTree::Insert, CaseBB, MergeBB});
482  Updates.push_back({DominatorTree::Insert, BB, CaseBB});
483  }
484  LLVM_DEBUG(dbgs() << *CaseBB << "\n");
485  }
486  DTU.applyUpdates(Updates);
487  Updates.clear();
488 
489  setProfMetadata(Func.getParent(), SI, CaseCounts, MaxCount);
490 
491  LLVM_DEBUG(dbgs() << *BB << "\n");
492  LLVM_DEBUG(dbgs() << *DefaultBB << "\n");
493  LLVM_DEBUG(dbgs() << *MergeBB << "\n");
494 
495  ORE.emit([&]() {
496  using namespace ore;
497  return OptimizationRemark(DEBUG_TYPE, "memopt-opt", MO.I)
498  << "optimized " << NV("Memop", MO.getName(TLI)) << " with count "
499  << NV("Count", SumForOpt) << " out of " << NV("Total", TotalCount)
500  << " for " << NV("Versions", Version) << " versions";
501  });
502 
503  return true;
504 }
505 } // namespace
506 
509  DominatorTree *DT, TargetLibraryInfo &TLI) {
510  if (DisableMemOPOPT)
511  return false;
512 
513  if (F.hasFnAttribute(Attribute::OptimizeForSize))
514  return false;
515  MemOPSizeOpt MemOPSizeOpt(F, BFI, ORE, DT, TLI);
516  MemOPSizeOpt.perform();
517  return MemOPSizeOpt.isChanged();
518 }
519 
522  getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
523  auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
524  auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
525  DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
526  TargetLibraryInfo &TLI =
527  getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
528  return PGOMemOPSizeOptImpl(F, BFI, ORE, DT, TLI);
529 }
530 
531 namespace llvm {
533 
539  auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
540  bool Changed = PGOMemOPSizeOptImpl(F, BFI, ORE, DT, TLI);
541  if (!Changed)
542  return PreservedAnalyses::all();
543  auto PA = PreservedAnalyses();
544  PA.preserve<DominatorTreeAnalysis>();
545  return PA;
546 }
547 } // namespace llvm
llvm::SaturatingMultiply
std::enable_if_t< std::is_unsigned< T >::value, T > SaturatingMultiply(T X, T Y, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, of type T.
Definition: MathExtras.h:820
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
Instrumentation.h
llvm::BasicBlock::end
iterator end()
Definition: BasicBlock.h:298
getName
static StringRef getName(Value *V)
Definition: ProvenanceAnalysisEvaluator.cpp:42
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:105
MathExtras.h
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt", "Optimize memory intrinsic using its size value profile", false, false) INITIALIZE_PASS_END(PGOMemOPSizeOptLegacyPass
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:783
llvm::Function
Definition: Function.h:62
StringRef.h
Pass.h
llvm::BlockFrequencyInfoWrapperPass
Legacy analysis pass which computes BlockFrequencyInfo.
Definition: BlockFrequencyInfo.h:138
MemOpMaxOptSize
static cl::opt< unsigned > MemOpMaxOptSize("memop-value-prof-max-opt-size", cl::Hidden, cl::init(128), cl::desc("Optimize the memop size <= this value"))
llvm::SmallVector< uint64_t, 16 >
Statistic.h
ErrorHandling.h
llvm::IRBuilder<>
DomTreeUpdater.h
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
GlobalsModRef.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::MemOp
Definition: TargetLowering.h:112
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::DominatorTreeBase< BasicBlock, false >::Insert
static constexpr UpdateKind Insert
Definition: GenericDomTree.h:242
llvm::MemIntrinsic
This is the common base class for memset/memcpy/memmove.
Definition: IntrinsicInst.h:874
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
intrinsic
QP Compare Ordered outs ins xscmpudp No intrinsic
Definition: README_P9.txt:303
DEBUG_TYPE
#define DEBUG_TYPE
Definition: PGOMemOPSizeOpt.cpp:59
PassRegistry.h
MemOPCountThreshold
static cl::opt< unsigned > MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(1000), cl::desc("The minimum count to optimize memory " "intrinsic calls"))
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
opt
pgo memop opt
Definition: PGOMemOPSizeOpt.cpp:134
F
#define F(x, y, z)
Definition: MD5.cpp:56
InstrProfData.inc
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Instruction.h
CommandLine.h
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
PGOMemOPSizeOptImpl
static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI, OptimizationRemarkEmitter &ORE, DominatorTree *DT, TargetLibraryInfo &TLI)
Definition: PGOMemOPSizeOpt.cpp:507
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
profile
pgo instr Read PGO instrumentation profile
Definition: PGOInstrumentation.cpp:501
llvm::LibFunc
LibFunc
Definition: TargetLibraryInfo.h:34
MemOPPercentThreshold
static cl::opt< unsigned > MemOPPercentThreshold("pgo-memop-percent-threshold", cl::init(40), cl::Hidden, cl::ZeroOrMore, cl::desc("The percentage threshold for the " "memory intrinsic calls optimization"))
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
Twine.h
InstrTypes.h
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
TargetLibraryInfo.h
false
Definition: StackSlotColoring.cpp:142
llvm::TargetLibraryInfo::getLibFunc
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Definition: TargetLibraryInfo.h:291
llvm::Instruction
Definition: Instruction.h:45
InstrProf.h
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:287
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::Value::setName
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:376
llvm::DomTreeUpdater
Definition: DomTreeUpdater.h:28
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:925
llvm::createPGOMemOPSizeOptLegacyPass
FunctionPass * createPGOMemOPSizeOptLegacyPass()
llvm::BasicBlock::getFirstNonPHI
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:216
llvm::BlockFrequencyAnalysis
Analysis pass which computes BlockFrequencyInfo.
Definition: BlockFrequencyInfo.h:112
Type.h
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::cl::ZeroOrMore
@ ZeroOrMore
Definition: CommandLine.h:120
llvm::PGOMemOPSizeOptID
char & PGOMemOPSizeOptID
Definition: PGOMemOPSizeOpt.cpp:532
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1432
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:465
uint64_t
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2816
MemOPScaleCount
static cl::opt< bool > MemOPScaleCount("pgo-memop-scale-count", cl::init(true), cl::Hidden, cl::desc("Scale the memop size counts using the basic " " block count value"))
PGOInstrumentation.h
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
ArrayRef.h
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
SI
StandardInstrumentations SI(Debug, VerifyEach)
MemOPMaxVersion
static cl::opt< unsigned > MemOPMaxVersion("pgo-memop-max-version", cl::init(3), cl::Hidden, cl::ZeroOrMore, cl::desc("The max version for the optimized memory " " intrinsic calls"))
llvm::Type::isVoidTy
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:138
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1562
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
InstVisitor.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::BasicBlock::Create
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:100
uint32_t
llvm::InstVisitor
Base class for instruction visitors.
Definition: InstVisitor.h:79
llvm::DomTreeUpdater::UpdateStrategy::Eager
@ Eager
BlockFrequencyInfo.h
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:431
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::annotateValueSite
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:988
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
WithColor.h
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::OptimizationRemarkEmitterWrapperPass
OptimizationRemarkEmitter legacy analysis pass.
Definition: OptimizationRemarkEmitter.h:146
DisableMemOPOPT
static cl::opt< bool > DisableMemOPOPT("disable-memop-opt", cl::init(false), cl::Hidden, cl::desc("Disable optimize"))
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::AnalysisManager::getCachedResult
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:802
Casting.h
Function.h
PassManager.h
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:221
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:252
llvm::BasicBlock::getInstList
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:363
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:685
Instructions.h
Version
uint64_t Version
Definition: RawMemProfReader.cpp:25
Dominators.h
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1343
llvm::GlobalsAAWrapperPass
Legacy wrapper pass to provide the GlobalsAAResult object.
Definition: GlobalsModRef.h:143
llvm::PHINode
Definition: Instructions.h:2666
llvm::setProfMetadata
void setProfMetadata(Module *M, Instruction *TI, ArrayRef< uint64_t > EdgeCounts, uint64_t MaxCount)
Definition: PGOInstrumentation.cpp:2039
DerivedTypes.h
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1487
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::getValueProfDataFromInst
bool getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, InstrProfValueData ValueData[], uint32_t &ActualNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst which is annotated with value profile meta data.
Definition: InstrProf.cpp:1033
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
LLVMContext.h
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3245
llvm::cl::desc
Definition: CommandLine.h:412
BasicBlockUtils.h
llvm::SplitBlock
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Definition: BasicBlockUtils.cpp:839
InitializePasses.h
llvm::OptimizationRemarkEmitterAnalysis
Definition: OptimizationRemarkEmitter.h:164
MemOPOptMemcmpBcmp
cl::opt< bool > MemOPOptMemcmpBcmp("pgo-memop-optimize-memcmp-bcmp", cl::init(true), cl::Hidden, cl::desc("Size-specialize memcmp and bcmp calls"))
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:440
llvm::initializePGOMemOPSizeOptLegacyPassPass
void initializePGOMemOPSizeOptLegacyPassPass(PassRegistry &)
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::PGOMemOPSizeOpt::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: PGOMemOPSizeOpt.cpp:534
llvm::SmallVectorImpl::insert
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:773