LLVM  15.0.0git
PGOMemOPSizeOpt.cpp
Go to the documentation of this file.
1 //===-- PGOMemOPSizeOpt.cpp - Optimizations based on value profiling ===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the transformation that optimizes memory intrinsics
10 // such as memcpy using the size value profile. When memory intrinsic size
11 // value profile metadata is available, a single memory intrinsic is expanded
12 // to a sequence of guarded specialized versions that are called with the
13 // hottest size(s), for later expansion into more optimal inline sequences.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/ADT/Twine.h"
25 #include "llvm/IR/BasicBlock.h"
26 #include "llvm/IR/DerivedTypes.h"
27 #include "llvm/IR/Dominators.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/IR/IRBuilder.h"
30 #include "llvm/IR/InstVisitor.h"
31 #include "llvm/IR/Instruction.h"
32 #include "llvm/IR/Instructions.h"
33 #include "llvm/IR/LLVMContext.h"
34 #include "llvm/IR/PassManager.h"
35 #include "llvm/IR/Type.h"
37 #define INSTR_PROF_VALUE_PROF_MEMOP_API
39 #include "llvm/Support/Casting.h"
41 #include "llvm/Support/Debug.h"
46 #include <cassert>
47 #include <cstdint>
48 #include <vector>
49 
50 using namespace llvm;
51 
52 #define DEBUG_TYPE "pgo-memop-opt"
53 
54 STATISTIC(NumOfPGOMemOPOpt, "Number of memop intrinsics optimized.");
55 STATISTIC(NumOfPGOMemOPAnnotate, "Number of memop intrinsics annotated.");
56 
57 // The minimum call count to optimize memory intrinsic calls.
58 static cl::opt<unsigned>
59  MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::ZeroOrMore,
60  cl::init(1000),
61  cl::desc("The minimum count to optimize memory "
62  "intrinsic calls"));
63 
64 // Command line option to disable memory intrinsic optimization. The default is
65 // false. This is for debug purpose.
66 static cl::opt<bool> DisableMemOPOPT("disable-memop-opt", cl::init(false),
67  cl::Hidden, cl::desc("Disable optimize"));
68 
69 // The percent threshold to optimize memory intrinsic calls.
70 static cl::opt<unsigned>
71  MemOPPercentThreshold("pgo-memop-percent-threshold", cl::init(40),
73  cl::desc("The percentage threshold for the "
74  "memory intrinsic calls optimization"));
75 
76 // Maximum number of versions for optimizing memory intrinsic call.
77 static cl::opt<unsigned>
78  MemOPMaxVersion("pgo-memop-max-version", cl::init(3), cl::Hidden,
80  cl::desc("The max version for the optimized memory "
81  " intrinsic calls"));
82 
83 // Scale the counts from the annotation using the BB count value.
84 static cl::opt<bool>
85  MemOPScaleCount("pgo-memop-scale-count", cl::init(true), cl::Hidden,
86  cl::desc("Scale the memop size counts using the basic "
87  " block count value"));
88 
90  MemOPOptMemcmpBcmp("pgo-memop-optimize-memcmp-bcmp", cl::init(true),
91  cl::Hidden,
92  cl::desc("Size-specialize memcmp and bcmp calls"));
93 
94 static cl::opt<unsigned>
95  MemOpMaxOptSize("memop-value-prof-max-opt-size", cl::Hidden, cl::init(128),
96  cl::desc("Optimize the memop size <= this value"));
97 
98 namespace {
99 
100 static const char *getMIName(const MemIntrinsic *MI) {
101  switch (MI->getIntrinsicID()) {
102  case Intrinsic::memcpy:
103  return "memcpy";
104  case Intrinsic::memmove:
105  return "memmove";
106  case Intrinsic::memset:
107  return "memset";
108  default:
109  return "unknown";
110  }
111 }
112 
113 // A class that abstracts a memop (memcpy, memmove, memset, memcmp and bcmp).
114 struct MemOp {
115  Instruction *I;
116  MemOp(MemIntrinsic *MI) : I(MI) {}
117  MemOp(CallInst *CI) : I(CI) {}
118  MemIntrinsic *asMI() { return dyn_cast<MemIntrinsic>(I); }
119  CallInst *asCI() { return cast<CallInst>(I); }
120  MemOp clone() {
121  if (auto MI = asMI())
122  return MemOp(cast<MemIntrinsic>(MI->clone()));
123  return MemOp(cast<CallInst>(asCI()->clone()));
124  }
125  Value *getLength() {
126  if (auto MI = asMI())
127  return MI->getLength();
128  return asCI()->getArgOperand(2);
129  }
130  void setLength(Value *Length) {
131  if (auto MI = asMI())
132  return MI->setLength(Length);
133  asCI()->setArgOperand(2, Length);
134  }
135  StringRef getFuncName() {
136  if (auto MI = asMI())
137  return MI->getCalledFunction()->getName();
138  return asCI()->getCalledFunction()->getName();
139  }
140  bool isMemmove() {
141  if (auto MI = asMI())
142  if (MI->getIntrinsicID() == Intrinsic::memmove)
143  return true;
144  return false;
145  }
146  bool isMemcmp(TargetLibraryInfo &TLI) {
147  LibFunc Func;
148  if (asMI() == nullptr && TLI.getLibFunc(*asCI(), Func) &&
149  Func == LibFunc_memcmp) {
150  return true;
151  }
152  return false;
153  }
154  bool isBcmp(TargetLibraryInfo &TLI) {
155  LibFunc Func;
156  if (asMI() == nullptr && TLI.getLibFunc(*asCI(), Func) &&
157  Func == LibFunc_bcmp) {
158  return true;
159  }
160  return false;
161  }
162  const char *getName(TargetLibraryInfo &TLI) {
163  if (auto MI = asMI())
164  return getMIName(MI);
165  LibFunc Func;
166  if (TLI.getLibFunc(*asCI(), Func)) {
167  if (Func == LibFunc_memcmp)
168  return "memcmp";
169  if (Func == LibFunc_bcmp)
170  return "bcmp";
171  }
172  llvm_unreachable("Must be MemIntrinsic or memcmp/bcmp CallInst");
173  return nullptr;
174  }
175 };
176 
177 class MemOPSizeOpt : public InstVisitor<MemOPSizeOpt> {
178 public:
179  MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI,
181  TargetLibraryInfo &TLI)
182  : Func(Func), BFI(BFI), ORE(ORE), DT(DT), TLI(TLI), Changed(false) {
183  ValueDataArray =
184  std::make_unique<InstrProfValueData[]>(INSTR_PROF_NUM_BUCKETS);
185  }
186  bool isChanged() const { return Changed; }
187  void perform() {
188  WorkList.clear();
189  visit(Func);
190 
191  for (auto &MO : WorkList) {
192  ++NumOfPGOMemOPAnnotate;
193  if (perform(MO)) {
194  Changed = true;
195  ++NumOfPGOMemOPOpt;
196  LLVM_DEBUG(dbgs() << "MemOP call: " << MO.getFuncName()
197  << "is Transformed.\n");
198  }
199  }
200  }
201 
202  void visitMemIntrinsic(MemIntrinsic &MI) {
203  Value *Length = MI.getLength();
204  // Not perform on constant length calls.
205  if (isa<ConstantInt>(Length))
206  return;
207  WorkList.push_back(MemOp(&MI));
208  }
209 
210  void visitCallInst(CallInst &CI) {
211  LibFunc Func;
212  if (TLI.getLibFunc(CI, Func) &&
213  (Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
214  !isa<ConstantInt>(CI.getArgOperand(2))) {
215  WorkList.push_back(MemOp(&CI));
216  }
217  }
218 
219 private:
220  Function &Func;
223  DominatorTree *DT;
224  TargetLibraryInfo &TLI;
225  bool Changed;
226  std::vector<MemOp> WorkList;
227  // The space to read the profile annotation.
228  std::unique_ptr<InstrProfValueData[]> ValueDataArray;
229  bool perform(MemOp MO);
230 };
231 
232 static bool isProfitable(uint64_t Count, uint64_t TotalCount) {
233  assert(Count <= TotalCount);
234  if (Count < MemOPCountThreshold)
235  return false;
236  if (Count < TotalCount * MemOPPercentThreshold / 100)
237  return false;
238  return true;
239 }
240 
241 static inline uint64_t getScaledCount(uint64_t Count, uint64_t Num,
242  uint64_t Denom) {
243  if (!MemOPScaleCount)
244  return Count;
245  bool Overflowed;
246  uint64_t ScaleCount = SaturatingMultiply(Count, Num, &Overflowed);
247  return ScaleCount / Denom;
248 }
249 
250 bool MemOPSizeOpt::perform(MemOp MO) {
251  assert(MO.I);
252  if (MO.isMemmove())
253  return false;
254  if (!MemOPOptMemcmpBcmp && (MO.isMemcmp(TLI) || MO.isBcmp(TLI)))
255  return false;
256 
257  uint32_t NumVals, MaxNumVals = INSTR_PROF_NUM_BUCKETS;
258  uint64_t TotalCount;
259  if (!getValueProfDataFromInst(*MO.I, IPVK_MemOPSize, MaxNumVals,
260  ValueDataArray.get(), NumVals, TotalCount))
261  return false;
262 
263  uint64_t ActualCount = TotalCount;
264  uint64_t SavedTotalCount = TotalCount;
265  if (MemOPScaleCount) {
266  auto BBEdgeCount = BFI.getBlockProfileCount(MO.I->getParent());
267  if (!BBEdgeCount)
268  return false;
269  ActualCount = *BBEdgeCount;
270  }
271 
272  ArrayRef<InstrProfValueData> VDs(ValueDataArray.get(), NumVals);
273  LLVM_DEBUG(dbgs() << "Read one memory intrinsic profile with count "
274  << ActualCount << "\n");
275  LLVM_DEBUG(
276  for (auto &VD
277  : VDs) { dbgs() << " (" << VD.Value << "," << VD.Count << ")\n"; });
278 
279  if (ActualCount < MemOPCountThreshold)
280  return false;
281  // Skip if the total value profiled count is 0, in which case we can't
282  // scale up the counts properly (and there is no profitable transformation).
283  if (TotalCount == 0)
284  return false;
285 
286  TotalCount = ActualCount;
287  if (MemOPScaleCount)
288  LLVM_DEBUG(dbgs() << "Scale counts: numerator = " << ActualCount
289  << " denominator = " << SavedTotalCount << "\n");
290 
291  // Keeping track of the count of the default case:
292  uint64_t RemainCount = TotalCount;
293  uint64_t SavedRemainCount = SavedTotalCount;
295  SmallVector<uint64_t, 16> CaseCounts;
296  uint64_t MaxCount = 0;
297  unsigned Version = 0;
298  int64_t LastV = -1;
299  // Default case is in the front -- save the slot here.
300  CaseCounts.push_back(0);
302  for (auto I = VDs.begin(), E = VDs.end(); I != E; ++I) {
303  auto &VD = *I;
304  int64_t V = VD.Value;
305  uint64_t C = VD.Count;
306  if (MemOPScaleCount)
307  C = getScaledCount(C, ActualCount, SavedTotalCount);
308 
309  if (!InstrProfIsSingleValRange(V) || V > MemOpMaxOptSize) {
310  RemainingVDs.push_back(VD);
311  continue;
312  }
313 
314  // ValueCounts are sorted on the count. Break at the first un-profitable
315  // value.
316  if (!isProfitable(C, RemainCount)) {
317  RemainingVDs.insert(RemainingVDs.end(), I, E);
318  break;
319  }
320 
321  if (V == LastV) {
322  LLVM_DEBUG(dbgs() << "Invalid Profile Data in Function " << Func.getName()
323  << ": Two consecutive, identical values in MemOp value"
324  "counts.\n");
325  return false;
326  }
327 
328  LastV = V;
329 
330  SizeIds.push_back(V);
331  CaseCounts.push_back(C);
332  if (C > MaxCount)
333  MaxCount = C;
334 
335  assert(RemainCount >= C);
336  RemainCount -= C;
337  assert(SavedRemainCount >= VD.Count);
338  SavedRemainCount -= VD.Count;
339 
340  if (++Version >= MemOPMaxVersion && MemOPMaxVersion != 0) {
341  RemainingVDs.insert(RemainingVDs.end(), I + 1, E);
342  break;
343  }
344  }
345 
346  if (Version == 0)
347  return false;
348 
349  CaseCounts[0] = RemainCount;
350  if (RemainCount > MaxCount)
351  MaxCount = RemainCount;
352 
353  uint64_t SumForOpt = TotalCount - RemainCount;
354 
355  LLVM_DEBUG(dbgs() << "Optimize one memory intrinsic call to " << Version
356  << " Versions (covering " << SumForOpt << " out of "
357  << TotalCount << ")\n");
358 
359  // mem_op(..., size)
360  // ==>
361  // switch (size) {
362  // case s1:
363  // mem_op(..., s1);
364  // goto merge_bb;
365  // case s2:
366  // mem_op(..., s2);
367  // goto merge_bb;
368  // ...
369  // default:
370  // mem_op(..., size);
371  // goto merge_bb;
372  // }
373  // merge_bb:
374 
375  BasicBlock *BB = MO.I->getParent();
376  LLVM_DEBUG(dbgs() << "\n\n== Basic Block Before ==\n");
377  LLVM_DEBUG(dbgs() << *BB << "\n");
378  auto OrigBBFreq = BFI.getBlockFreq(BB);
379 
380  BasicBlock *DefaultBB = SplitBlock(BB, MO.I, DT);
381  BasicBlock::iterator It(*MO.I);
382  ++It;
383  assert(It != DefaultBB->end());
384  BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It), DT);
385  MergeBB->setName("MemOP.Merge");
386  BFI.setBlockFreq(MergeBB, OrigBBFreq.getFrequency());
387  DefaultBB->setName("MemOP.Default");
388 
390  auto &Ctx = Func.getContext();
391  IRBuilder<> IRB(BB);
392  BB->getTerminator()->eraseFromParent();
393  Value *SizeVar = MO.getLength();
394  SwitchInst *SI = IRB.CreateSwitch(SizeVar, DefaultBB, SizeIds.size());
395  Type *MemOpTy = MO.I->getType();
396  PHINode *PHI = nullptr;
397  if (!MemOpTy->isVoidTy()) {
398  // Insert a phi for the return values at the merge block.
399  IRBuilder<> IRBM(MergeBB->getFirstNonPHI());
400  PHI = IRBM.CreatePHI(MemOpTy, SizeIds.size() + 1, "MemOP.RVMerge");
401  MO.I->replaceAllUsesWith(PHI);
402  PHI->addIncoming(MO.I, DefaultBB);
403  }
404 
405  // Clear the value profile data.
406  MO.I->setMetadata(LLVMContext::MD_prof, nullptr);
407  // If all promoted, we don't need the MD.prof metadata.
408  if (SavedRemainCount > 0 || Version != NumVals) {
409  // Otherwise we need update with the un-promoted records back.
410  ArrayRef<InstrProfValueData> RemVDs(RemainingVDs);
411  annotateValueSite(*Func.getParent(), *MO.I, RemVDs, SavedRemainCount,
412  IPVK_MemOPSize, NumVals);
413  }
414 
415  LLVM_DEBUG(dbgs() << "\n\n== Basic Block After==\n");
416 
417  std::vector<DominatorTree::UpdateType> Updates;
418  if (DT)
419  Updates.reserve(2 * SizeIds.size());
420 
421  for (uint64_t SizeId : SizeIds) {
422  BasicBlock *CaseBB = BasicBlock::Create(
423  Ctx, Twine("MemOP.Case.") + Twine(SizeId), &Func, DefaultBB);
424  MemOp NewMO = MO.clone();
425  // Fix the argument.
426  auto *SizeType = dyn_cast<IntegerType>(NewMO.getLength()->getType());
427  assert(SizeType && "Expected integer type size argument.");
428  ConstantInt *CaseSizeId = ConstantInt::get(SizeType, SizeId);
429  NewMO.setLength(CaseSizeId);
430  CaseBB->getInstList().push_back(NewMO.I);
431  IRBuilder<> IRBCase(CaseBB);
432  IRBCase.CreateBr(MergeBB);
433  SI->addCase(CaseSizeId, CaseBB);
434  if (!MemOpTy->isVoidTy())
435  PHI->addIncoming(NewMO.I, CaseBB);
436  if (DT) {
437  Updates.push_back({DominatorTree::Insert, CaseBB, MergeBB});
438  Updates.push_back({DominatorTree::Insert, BB, CaseBB});
439  }
440  LLVM_DEBUG(dbgs() << *CaseBB << "\n");
441  }
442  DTU.applyUpdates(Updates);
443  Updates.clear();
444 
445  setProfMetadata(Func.getParent(), SI, CaseCounts, MaxCount);
446 
447  LLVM_DEBUG(dbgs() << *BB << "\n");
448  LLVM_DEBUG(dbgs() << *DefaultBB << "\n");
449  LLVM_DEBUG(dbgs() << *MergeBB << "\n");
450 
451  ORE.emit([&]() {
452  using namespace ore;
453  return OptimizationRemark(DEBUG_TYPE, "memopt-opt", MO.I)
454  << "optimized " << NV("Memop", MO.getName(TLI)) << " with count "
455  << NV("Count", SumForOpt) << " out of " << NV("Total", TotalCount)
456  << " for " << NV("Versions", Version) << " versions";
457  });
458 
459  return true;
460 }
461 } // namespace
462 
465  DominatorTree *DT, TargetLibraryInfo &TLI) {
466  if (DisableMemOPOPT)
467  return false;
468 
469  if (F.hasFnAttribute(Attribute::OptimizeForSize))
470  return false;
471  MemOPSizeOpt MemOPSizeOpt(F, BFI, ORE, DT, TLI);
472  MemOPSizeOpt.perform();
473  return MemOPSizeOpt.isChanged();
474 }
475 
481  auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
482  bool Changed = PGOMemOPSizeOptImpl(F, BFI, ORE, DT, TLI);
483  if (!Changed)
484  return PreservedAnalyses::all();
485  auto PA = PreservedAnalyses();
486  PA.preserve<DominatorTreeAnalysis>();
487  return PA;
488 }
llvm::SaturatingMultiply
std::enable_if_t< std::is_unsigned< T >::value, T > SaturatingMultiply(T X, T Y, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, of type T.
Definition: MathExtras.h:847
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm::BasicBlock::end
iterator end()
Definition: BasicBlock.h:299
getName
static StringRef getName(Value *V)
Definition: ProvenanceAnalysisEvaluator.cpp:42
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:104
MathExtras.h
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:780
llvm::Function
Definition: Function.h:60
StringRef.h
MemOpMaxOptSize
static cl::opt< unsigned > MemOpMaxOptSize("memop-value-prof-max-opt-size", cl::Hidden, cl::init(128), cl::desc("Optimize the memop size <= this value"))
llvm::SmallVector< uint64_t, 16 >
Statistic.h
ErrorHandling.h
llvm::IRBuilder<>
DomTreeUpdater.h
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::MemOp
Definition: TargetLowering.h:111
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::DominatorTreeBase< BasicBlock, false >::Insert
static constexpr UpdateKind Insert
Definition: GenericDomTree.h:242
llvm::MemIntrinsic
This is the common base class for memset/memcpy/memmove.
Definition: IntrinsicInst.h:962
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
DEBUG_TYPE
#define DEBUG_TYPE
Definition: PGOMemOPSizeOpt.cpp:52
MemOPCountThreshold
static cl::opt< unsigned > MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(1000), cl::desc("The minimum count to optimize memory " "intrinsic calls"))
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
InstrProfData.inc
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Instruction.h
CommandLine.h
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
PGOMemOPSizeOptImpl
static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI, OptimizationRemarkEmitter &ORE, DominatorTree *DT, TargetLibraryInfo &TLI)
Definition: PGOMemOPSizeOpt.cpp:463
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::LibFunc
LibFunc
Definition: TargetLibraryInfo.h:35
MemOPPercentThreshold
static cl::opt< unsigned > MemOPPercentThreshold("pgo-memop-percent-threshold", cl::init(40), cl::Hidden, cl::ZeroOrMore, cl::desc("The percentage threshold for the " "memory intrinsic calls optimization"))
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
Twine.h
TargetLibraryInfo.h
false
Definition: StackSlotColoring.cpp:141
llvm::TargetLibraryInfo::getLibFunc
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Definition: TargetLibraryInfo.h:294
llvm::Instruction
Definition: Instruction.h:42
InstrProf.h
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::Value::setName
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:372
llvm::DomTreeUpdater
Definition: DomTreeUpdater.h:28
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:919
llvm::BasicBlock::getFirstNonPHI
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:209
llvm::BlockFrequencyAnalysis
Analysis pass which computes BlockFrequencyInfo.
Definition: BlockFrequencyInfo.h:112
Type.h
llvm::cl::ZeroOrMore
@ ZeroOrMore
Definition: CommandLine.h:116
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1392
uint64_t
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2814
MemOPScaleCount
static cl::opt< bool > MemOPScaleCount("pgo-memop-scale-count", cl::init(true), cl::Hidden, cl::desc("Scale the memop size counts using the basic " " block count value"))
PGOInstrumentation.h
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
ArrayRef.h
IRBuilder.h
llvm::PGOMemOPSizeOpt::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: PGOMemOPSizeOpt.cpp:476
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
SI
StandardInstrumentations SI(Debug, VerifyEach)
MemOPMaxVersion
static cl::opt< unsigned > MemOPMaxVersion("pgo-memop-max-version", cl::init(3), cl::Hidden, cl::ZeroOrMore, cl::desc("The max version for the optimized memory " " intrinsic calls"))
llvm::Type::isVoidTy
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
InstVisitor.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::BasicBlock::Create
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:97
uint32_t
llvm::InstVisitor
Base class for instruction visitors.
Definition: InstVisitor.h:78
llvm::DomTreeUpdater::UpdateStrategy::Eager
@ Eager
BlockFrequencyInfo.h
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:429
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::annotateValueSite
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:994
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
DisableMemOPOPT
static cl::opt< bool > DisableMemOPOPT("disable-memop-opt", cl::init(false), cl::Hidden, cl::desc("Disable optimize"))
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::AnalysisManager::getCachedResult
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:799
Casting.h
Function.h
PassManager.h
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:222
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:267
llvm::BasicBlock::getInstList
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:364
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:690
Instructions.h
Version
uint64_t Version
Definition: RawMemProfReader.cpp:41
Dominators.h
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1341
llvm::PHINode
Definition: Instructions.h:2664
llvm::setProfMetadata
void setProfMetadata(Module *M, Instruction *TI, ArrayRef< uint64_t > EdgeCounts, uint64_t MaxCount)
Definition: PGOInstrumentation.cpp:1953
DerivedTypes.h
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1474
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::getValueProfDataFromInst
bool getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, InstrProfValueData ValueData[], uint32_t &ActualNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst which is annotated with value profile meta data.
Definition: InstrProf.cpp:1039
LLVMContext.h
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3243
llvm::cl::desc
Definition: CommandLine.h:405
BasicBlockUtils.h
llvm::SplitBlock
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Definition: BasicBlockUtils.cpp:837
llvm::OptimizationRemarkEmitterAnalysis
Definition: OptimizationRemarkEmitter.h:164
MemOPOptMemcmpBcmp
cl::opt< bool > MemOPOptMemcmpBcmp("pgo-memop-optimize-memcmp-bcmp", cl::init(true), cl::Hidden, cl::desc("Size-specialize memcmp and bcmp calls"))
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:443
llvm::SmallVectorImpl::insert
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:792