Go to the documentation of this file.
31 #include <unordered_set>
35 #define DEBUG_TYPE "sample-profile-probe"
38 "Number of probes that have an artificial debug line");
42 cl::desc(
"Do pseudo probe verification"));
46 cl::desc(
"The option to specify the name of the functions to verify."));
50 cl::desc(
"Update pseudo probe distribution factor"));
54 const DILocation *InlinedAt = DIL ? DIL->getInlinedAt() :
nullptr;
58 const DISubprogram *SP = InlinedAt->getScope()->getSubprogram();
60 auto Name = SP->getLinkageName();
64 InlinedAt = InlinedAt->getInlinedAt();
73 bool PseudoProbeVerifier::shouldVerifyFunction(
const Function *
F) {
75 if (
F->isDeclaration())
79 if (
F->hasAvailableExternallyLinkage())
82 static std::unordered_set<std::string> VerifyFuncNames(
84 return VerifyFuncNames.empty() || VerifyFuncNames.count(
F->getName().str());
99 "\n*** Pseudo Probe Verification After " + PassID.
str() +
" ***\n";
101 if (any_isa<const Module *>(
IR))
103 else if (any_isa<const Function *>(
IR))
105 else if (any_isa<const LazyCallGraph::SCC *>(
IR))
107 else if (any_isa<const Loop *>(
IR))
124 if (!shouldVerifyFunction(
F))
127 for (
const auto &
BB : *
F)
128 collectProbeFactors(&
BB, ProbeFactors);
129 verifyProbeFactors(
F, ProbeFactors);
137 void PseudoProbeVerifier::collectProbeFactors(
const BasicBlock *Block,
139 for (
const auto &
I : *Block) {
142 ProbeFactors[{Probe->Id, Hash}] += Probe->Factor;
147 void PseudoProbeVerifier::verifyProbeFactors(
149 bool BannerPrinted =
false;
150 auto &PrevProbeFactors = FunctionProbeFactors[
F->getName()];
151 for (
const auto &
I : ProbeFactors) {
152 float CurProbeFactor =
I.second;
153 if (PrevProbeFactors.count(
I.first)) {
154 float PrevProbeFactor = PrevProbeFactors[
I.first];
155 if (
std::abs(CurProbeFactor - PrevProbeFactor) >
156 DistributionFactorVariance) {
157 if (!BannerPrinted) {
158 dbgs() <<
"Function " <<
F->getName() <<
":\n";
159 BannerPrinted =
true;
161 dbgs() <<
"Probe " <<
I.first.first <<
"\tprevious factor "
162 <<
format(
"%0.2f", PrevProbeFactor) <<
"\tcurrent factor "
163 <<
format(
"%0.2f", CurProbeFactor) <<
"\n";
168 PrevProbeFactors[
I.first] =
I.second;
174 for (
const auto *Operand : FuncInfo->operands()) {
175 const auto *MD = cast<MDNode>(Operand);
177 mdconst::dyn_extract<ConstantInt>(MD->getOperand(0))->getZExtValue();
179 mdconst::dyn_extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
186 PseudoProbeManager::getDesc(
const Function &
F)
const {
187 auto I = GUIDToProbeDescMap.find(
189 return I == GUIDToProbeDescMap.end() ? nullptr : &
I->second;
198 const auto *Desc = getDesc(
F);
200 LLVM_DEBUG(
dbgs() <<
"Probe descriptor missing for Function " <<
F.getName()
214 const std::string &CurModuleUniqueId)
215 :
F(&Func), CurModuleUniqueId(CurModuleUniqueId) {
216 BlockProbeIds.clear();
217 CallProbeIds.clear();
219 computeProbeIdForBlocks();
220 computeProbeIdForCallsites();
228 void SampleProfileProber::computeCFGHash() {
229 std::vector<uint8_t> Indexes;
231 for (
auto &
BB : *F) {
232 auto *TI =
BB.getTerminator();
233 for (
unsigned I = 0,
E = TI->getNumSuccessors();
I !=
E; ++
I) {
234 auto *Succ = TI->getSuccessor(
I);
235 auto Index = getBlockId(Succ);
236 for (
int J = 0; J < 4; J++)
237 Indexes.push_back((uint8_t)(
Index >> (J * 8)));
243 FunctionHash = (
uint64_t)CallProbeIds.size() << 48 |
246 FunctionHash &= 0x0FFFFFFFFFFFFFFF;
247 assert(FunctionHash &&
"Function checksum should not be zero");
248 LLVM_DEBUG(
dbgs() <<
"\nFunction Hash Computation for " <<
F->getName()
250 <<
" CRC = " << JC.
getCRC() <<
", Edges = "
251 << Indexes.size() <<
", ICSites = " << CallProbeIds.size()
252 <<
", Hash = " << FunctionHash <<
"\n");
255 void SampleProfileProber::computeProbeIdForBlocks() {
256 for (
auto &
BB : *F) {
257 BlockProbeIds[&
BB] = ++LastProbeId;
261 void SampleProfileProber::computeProbeIdForCallsites() {
262 for (
auto &
BB : *F) {
264 if (!isa<CallBase>(
I))
266 if (isa<IntrinsicInst>(&
I))
268 CallProbeIds[&
I] = ++LastProbeId;
275 return I == BlockProbeIds.end() ? 0 :
I->second;
279 auto Iter = CallProbeIds.find(
const_cast<Instruction *
>(Call));
280 return Iter == CallProbeIds.end() ? 0 : Iter->second;
296 assert((isa<PseudoProbeInst>(
I) || isa<CallBase>(
I)) &&
297 "Expecting pseudo probe or call instructions");
298 if (!
I->getDebugLoc()) {
299 if (
auto *SP =
F.getSubprogram()) {
304 dbgs() <<
"\nIn Function " <<
F.getName()
305 <<
" Probe gets an artificial debug line\n";
313 for (
auto &
I : BlockProbeIds) {
323 return !isa<PHINode>(J) && !isa<DbgInfoIntrinsic>(J) &&
324 !J->isLifetimeStartOrEnd() && J->getDebugLoc();
328 while (J !=
BB->getTerminator() && !HasValidDbgLine(J)) {
334 "Cannot get the probing point");
341 AssignDebugLoc(Probe);
347 for (
auto &
I : CallProbeIds) {
348 auto *Call =
I.first;
350 uint32_t Type = cast<CallBase>(Call)->getCalledFunction()
353 AssignDebugLoc(Call);
359 if (
auto DIL = Call->getDebugLoc()) {
360 DIL = DIL->cloneWithDiscriminator(V);
361 Call->setDebugLoc(DIL);
370 auto Hash = getFunctionHash();
373 assert(NMD &&
"llvm.pseudo_probe_desc should be pre-created");
386 if (!
F.isDeclarationForLinker()) {
388 auto Triple =
TM->getTargetTriple();
404 if (
F.isDeclaration())
413 void PseudoProbeUpdatePass::runOnFunction(
Function &
F,
417 return BFI.getBlockProfileCount(
BB).value_or(0);
422 for (
auto &Block :
F) {
423 for (
auto &
I : Block) {
426 ProbeFactors[{Probe->Id, Hash}] += BBProfileCount(&Block);
432 for (
auto &Block :
F) {
433 for (
auto &
I : Block) {
436 float Sum = ProbeFactors[{Probe->Id, Hash}];
448 if (
F.isDeclaration())
452 runOnFunction(
F,
FAM);
A set of analyses that are preserved following a run of a transformation pass.
constexpr static uint64_t PseudoProbeFullDistributionFactor
This is an optimization pass for GlobalISel generic memory operations.
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
constexpr static uint8_t FullDistributionFactor
void update(ArrayRef< uint8_t > Data)
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
const Function * getParent() const
Return the enclosing method, or null if none.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represents a single loop in the control flow graph.
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
MDNode * createPseudoProbeDesc(uint64_t GUID, uint64_t Hash, Function *F)
Return metadata containing the pseudo probe descriptor for a function.
bool profileIsValid(const Function &F, const FunctionSamples &Samples) const
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Triple - Helper class for working with autoconf configuration names.
FunctionAnalysisManager FAM
StringRef getName() const
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
The instances of the Type class are immutable: once they are created, they are never changed.
PassInstrumentationCallbacks PIC
Sample profile pseudo prober.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static uint32_t packProbeData(uint32_t Index, uint32_t Type, uint32_t Flags, uint32_t Factor)
LLVM Basic Block Representation.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
An SCC of the call graph.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
(vector float) vec_cmpeq(*A, *B) C
constexpr const char * PseudoProbeDescMetadataName
bool supportsCOMDAT() const
Tests whether the target supports comdat.
STATISTIC(NumFunctions, "Total number of functions")
void setProbeDistributionFactor(Instruction &Inst, float Factor)
Statically lint checks LLVM IR
LLVM_NODISCARD std::string str() const
str - Get the contents as an std::string.
Analysis pass which computes BlockFrequencyInfo.
void runAfterPass(StringRef PassID, Any IR)
static uint64_t getCallStackHash(const DILocation *DIL)
Representation of the samples collected for a function.
initializer< Ty > init(const Ty &Val)
uint64_t getFunctionHash() const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Primary interface to the complete machine description for the target machine.
A Module instance is used to store all the information related to an LLVM module.
A node in the call graph.
SampleProfileProber(Function &F, const std::string &CurModuleUniqueId)
StringRef - Represent a constant reference to a string, i.e.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
PseudoProbeManager(const Module &M)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
void registerCallbacks(PassInstrumentationCallbacks &PIC)
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
Comdat * getOrCreateFunctionComdat(Function &F, Triple &T)
BlockT * getHeader() const
std::unordered_map< std::pair< uint64_t, uint64_t >, float, pair_hash< uint64_t, uint64_t > > ProbeFactorMap
std::string getUniqueModuleId(Module *M)
Produce a unique identifier for this module by taking the MD5 sum of the names of the module's strong...
static cl::list< std::string > VerifyPseudoProbeFuncList("verify-pseudo-probe-funcs", cl::Hidden, cl::desc("The option to specify the name of the functions to verify."))
Optional< PseudoProbe > extractProbe(const Instruction &Inst)
void registerAfterPassCallback(CallableT C)
static uint64_t computeCallStackHash(const Instruction &Inst)
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
void instrumentOneFunc(Function &F, TargetMachine *TM)
This class manages callbacks registration, as well as provides a way for PassInstrumentation to pass ...
std::string to_string(const T &Value)
A container for analyses that lazily runs them and caches their results.
const char LLVMTargetMachineRef TM
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
static cl::opt< bool > UpdatePseudoProbe("update-pseudo-probe", cl::init(true), cl::Hidden, cl::desc("Update pseudo probe distribution factor"))
APFloat abs(APFloat X)
Returns the absolute value of the argument.
static cl::opt< bool > VerifyPseudoProbe("verify-pseudo-probe", cl::init(false), cl::Hidden, cl::desc("Do pseudo probe verification"))
LLVM Value Representation.
bool moduleIsProbed(const Module &M) const
uint64_t MD5Hash(StringRef Str)
Helper to compute and return lower 64 bits of the given string's MD5 hash.