LLVM 23.0.0git
InstrProfiling.cpp
Go to the documentation of this file.
1//===-- InstrProfiling.cpp - Frontend instrumentation based profiling -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass lowers instrprof_* intrinsics emitted by an instrumentor.
10// It also builds the data structures and initialization code needed for
11// updating execution counts and emitting the profile at runtime.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/ADT/Twine.h"
23#include "llvm/Analysis/CFG.h"
27#include "llvm/IR/Attributes.h"
28#include "llvm/IR/BasicBlock.h"
29#include "llvm/IR/CFG.h"
30#include "llvm/IR/Constant.h"
31#include "llvm/IR/Constants.h"
32#include "llvm/IR/DIBuilder.h"
35#include "llvm/IR/Dominators.h"
36#include "llvm/IR/Function.h"
37#include "llvm/IR/GlobalAlias.h"
38#include "llvm/IR/GlobalValue.h"
40#include "llvm/IR/IRBuilder.h"
42#include "llvm/IR/Instruction.h"
45#include "llvm/IR/Intrinsics.h"
46#include "llvm/IR/MDBuilder.h"
47#include "llvm/IR/Module.h"
49#include "llvm/IR/Type.h"
50#include "llvm/Pass.h"
56#include "llvm/Support/Error.h"
64#include <algorithm>
65#include <cassert>
66#include <cstdint>
67#include <string>
68
69using namespace llvm;
70
71#define DEBUG_TYPE "instrprof"
72
73namespace llvm {
74// Command line option to enable vtable value profiling. Defined in
75// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling=
78 "profile-correlate",
79 cl::desc("Use debug info or binary file to correlate profiles."),
82 "No profile correlation"),
84 "Use debug info to correlate"),
86 "Use binary to correlate")));
87} // namespace llvm
88
89namespace {
90
91cl::opt<bool> DoHashBasedCounterSplit(
92 "hash-based-counter-split",
93 cl::desc("Rename counter variable of a comdat function based on cfg hash"),
94 cl::init(true));
95
97 RuntimeCounterRelocation("runtime-counter-relocation",
98 cl::desc("Enable relocating counters at runtime."),
99 cl::init(false));
100
101cl::opt<bool> ValueProfileStaticAlloc(
102 "vp-static-alloc",
103 cl::desc("Do static counter allocation for value profiler"),
104 cl::init(true));
105
106cl::opt<double> NumCountersPerValueSite(
107 "vp-counters-per-site",
108 cl::desc("The average number of profile counters allocated "
109 "per value profiling site."),
110 // This is set to a very small value because in real programs, only
111 // a very small percentage of value sites have non-zero targets, e.g, 1/30.
112 // For those sites with non-zero profile, the average number of targets
113 // is usually smaller than 2.
114 cl::init(1.0));
115
116cl::opt<bool> AtomicCounterUpdateAll(
117 "instrprof-atomic-counter-update-all",
118 cl::desc("Make all profile counter updates atomic (for testing only)"),
119 cl::init(false));
120
121cl::opt<bool> VerifyAtomicPromotion(
122 "verify-atomic-counter-promoted",
123 cl::desc("Check that all profile counter updates were made atomic; no-op "
124 "if atomic updates are not requested (-fprofile-update=atomic)"),
125 cl::init(false));
126
127cl::opt<bool> AtomicCounterUpdatePromoted(
128 "atomic-counter-update-promoted",
129 cl::desc("Do counter update using atomic fetch add "
130 " for promoted counters only"),
131 cl::init(false));
132
133cl::opt<bool> AtomicFirstCounter(
134 "atomic-first-counter",
135 cl::desc("Use atomic fetch add for first counter in a function (usually "
136 "the entry counter)"),
137 cl::init(false));
138
139cl::opt<bool> ConditionalCounterUpdate(
140 "conditional-counter-update",
141 cl::desc("Do conditional counter updates in single byte counters mode)"),
142 cl::init(false));
143
144// If the option is not specified, the default behavior about whether
145// counter promotion is done depends on how instrumentation lowering
146// pipeline is setup, i.e., the default value of true of this option
147// does not mean the promotion will be done by default. Explicitly
148// setting this option can override the default behavior.
149cl::opt<bool> DoCounterPromotion("do-counter-promotion",
150 cl::desc("Do counter register promotion"),
151 cl::init(false));
152cl::opt<unsigned> MaxNumOfPromotionsPerLoop(
153 "max-counter-promotions-per-loop", cl::init(20),
154 cl::desc("Max number counter promotions per loop to avoid"
155 " increasing register pressure too much"));
156
157// A debug option
159 MaxNumOfPromotions("max-counter-promotions", cl::init(-1),
160 cl::desc("Max number of allowed counter promotions"));
161
162cl::opt<unsigned> SpeculativeCounterPromotionMaxExiting(
163 "speculative-counter-promotion-max-exiting", cl::init(3),
164 cl::desc("The max number of exiting blocks of a loop to allow "
165 " speculative counter promotion"));
166
167cl::opt<bool> SpeculativeCounterPromotionToLoop(
168 "speculative-counter-promotion-to-loop",
169 cl::desc("When the option is false, if the target block is in a loop, "
170 "the promotion will be disallowed unless the promoted counter "
171 " update can be further/iteratively promoted into an acyclic "
172 " region."));
173
174cl::opt<bool> IterativeCounterPromotion(
175 "iterative-counter-promotion", cl::init(true),
176 cl::desc("Allow counter promotion across the whole loop nest."));
177
178cl::opt<bool> SkipRetExitBlock(
179 "skip-ret-exit-block", cl::init(true),
180 cl::desc("Suppress counter promotion if exit blocks contain ret."));
181
182static cl::opt<bool> SampledInstr("sampled-instrumentation",
183 cl::desc("Do PGO instrumentation sampling"));
184
185static cl::opt<unsigned> SampledInstrPeriod(
186 "sampled-instr-period",
187 cl::desc("Set the profile instrumentation sample period. A sample period "
188 "of 0 is invalid. For each sample period, a fixed number of "
189 "consecutive samples will be recorded. The number is controlled "
190 "by 'sampled-instr-burst-duration' flag. The default sample "
191 "period of 65536 is optimized for generating efficient code that "
192 "leverages unsigned short integer wrapping in overflow, but this "
193 "is disabled under simple sampling (burst duration = 1)."),
194 cl::init(USHRT_MAX + 1));
195
196static cl::opt<unsigned> SampledInstrBurstDuration(
197 "sampled-instr-burst-duration",
198 cl::desc("Set the profile instrumentation burst duration, which can range "
199 "from 1 to the value of 'sampled-instr-period' (0 is invalid). "
200 "This number of samples will be recorded for each "
201 "'sampled-instr-period' count update. Setting to 1 enables simple "
202 "sampling, in which case it is recommended to set "
203 "'sampled-instr-period' to a prime number."),
204 cl::init(200));
205
206struct SampledInstrumentationConfig {
207 unsigned BurstDuration;
208 unsigned Period;
209 bool UseShort;
210 bool IsSimpleSampling;
211 bool IsFastSampling;
212};
213
214static SampledInstrumentationConfig getSampledInstrumentationConfig() {
215 SampledInstrumentationConfig config;
216 config.BurstDuration = SampledInstrBurstDuration.getValue();
217 config.Period = SampledInstrPeriod.getValue();
218 if (config.BurstDuration > config.Period)
220 "SampledBurstDuration must be less than or equal to SampledPeriod");
221 if (config.Period == 0 || config.BurstDuration == 0)
223 "SampledPeriod and SampledBurstDuration must be greater than 0");
224 config.IsSimpleSampling = (config.BurstDuration == 1);
225 // If (BurstDuration == 1 && Period == 65536), generate the simple sampling
226 // style code.
227 config.IsFastSampling =
228 (!config.IsSimpleSampling && config.Period == USHRT_MAX + 1);
229 config.UseShort = (config.Period <= USHRT_MAX) || config.IsFastSampling;
230 return config;
231}
232
233using LoadStorePair = std::pair<Instruction *, Instruction *>;
234
235static void makeAtomic(Instruction *Load, Instruction *Store) {
236 auto *Addition = dyn_cast<BinaryOperator>(Store->getOperand(0));
237 assert(Addition && Addition->getOpcode() == Instruction::BinaryOps::Add);
238 auto *Addend = Addition->getOperand(1);
239
240 IRBuilder<> Builder(Load);
241 Builder.CreateAtomicRMW(AtomicRMWInst::Add, Store->getOperand(1), Addend,
243 Store->eraseFromParent();
244 Addition->eraseFromParent();
245 Load->eraseFromParent();
246}
247
248static uint64_t getIntModuleFlagOrZero(const Module &M, StringRef Flag) {
249 auto *MD = dyn_cast_or_null<ConstantAsMetadata>(M.getModuleFlag(Flag));
250 if (!MD)
251 return 0;
252
253 // If the flag is a ConstantAsMetadata, it should be an integer representable
254 // in 64-bits.
255 return cast<ConstantInt>(MD->getValue())->getZExtValue();
256}
257
258static bool enablesValueProfiling(const Module &M) {
259 return isIRPGOFlagSet(&M) ||
260 getIntModuleFlagOrZero(M, "EnableValueProfiling") != 0;
261}
262
263// Conservatively returns true if value profiling is enabled.
264static bool profDataReferencedByCode(const Module &M) {
265 return enablesValueProfiling(M);
266}
267
268class InstrLowerer final {
269public:
270 InstrLowerer(Module &M, const InstrProfOptions &Options,
271 std::function<const TargetLibraryInfo &(Function &F)> GetTLI,
272 bool IsCS)
273 : M(M), Options(Options), TT(M.getTargetTriple()), IsCS(IsCS),
274 GetTLI(GetTLI), DataReferencedByCode(profDataReferencedByCode(M)) {}
275
276 bool lower();
277
278private:
279 Module &M;
280 const InstrProfOptions Options;
281 const Triple TT;
282 // Is this lowering for the context-sensitive instrumentation.
283 const bool IsCS;
284
285 std::function<const TargetLibraryInfo &(Function &F)> GetTLI;
286
287 const bool DataReferencedByCode;
288
289 struct PerFunctionProfileData {
290 uint32_t NumValueSites[IPVK_Last + 1] = {};
291 GlobalVariable *RegionCounters = nullptr;
292 GlobalVariable *DataVar = nullptr;
293 GlobalVariable *RegionBitmaps = nullptr;
294 uint32_t NumBitmapBytes = 0;
295
296 PerFunctionProfileData() = default;
297 };
298 DenseMap<GlobalVariable *, PerFunctionProfileData> ProfileDataMap;
299 // Key is virtual table variable, value is 'VTableProfData' in the form of
300 // GlobalVariable.
301 DenseMap<GlobalVariable *, GlobalVariable *> VTableDataMap;
302 /// If runtime relocation is enabled, this maps functions to the load
303 /// instruction that produces the profile relocation bias.
304 DenseMap<const Function *, LoadInst *> FunctionToProfileBiasMap;
305 std::vector<GlobalValue *> CompilerUsedVars;
306 std::vector<GlobalValue *> UsedVars;
307 std::vector<GlobalVariable *> ReferencedNames;
308 // The list of virtual table variables of which the VTableProfData is
309 // collected.
310 std::vector<GlobalVariable *> ReferencedVTables;
311 GlobalVariable *NamesVar = nullptr;
312 size_t NamesSize = 0;
313
314 StructType *ProfileDataTy = nullptr;
315
316 // vector of counter load/store pairs to be register promoted.
317 std::vector<LoadStorePair> PromotionCandidates;
318
319 int64_t TotalCountersPromoted = 0;
320
321 /// Lower instrumentation intrinsics in the function. Returns true if there
322 /// any lowering.
323 bool lowerIntrinsics(Function *F);
324
325 /// Register-promote counter loads and stores in loops.
326 void promoteCounterLoadStores(Function *F);
327
328 /// Returns true if relocating counters at runtime is enabled.
329 bool isRuntimeCounterRelocationEnabled() const;
330
331 /// Returns true if profile counter update register promotion is enabled.
332 bool isCounterPromotionEnabled() const;
333
334 /// Returns true if profile counter updates should be atomic.
335 bool isAtomic() const;
336
337 /// Return true if profile sampling is enabled.
338 bool isSamplingEnabled() const;
339
340 /// Count the number of instrumented value sites for the function.
341 void computeNumValueSiteCounts(InstrProfValueProfileInst *Ins);
342
343 /// Replace instrprof.value.profile with a call to runtime library.
344 void lowerValueProfileInst(InstrProfValueProfileInst *Ins);
345
346 /// Replace instrprof.cover with a store instruction to the coverage byte.
347 void lowerCover(InstrProfCoverInst *Inc);
348
349 /// Replace instrprof.timestamp with a call to
350 /// INSTR_PROF_PROFILE_SET_TIMESTAMP.
351 void lowerTimestamp(InstrProfTimestampInst *TimestampInstruction);
352
353 /// Replace instrprof.increment with an increment of the appropriate value.
354 void lowerIncrement(InstrProfIncrementInst *Inc);
355
356 /// Force emitting of name vars for unused functions.
357 void lowerCoverageData(GlobalVariable *CoverageNamesVar);
358
359 /// Replace instrprof.mcdc.tvbitmask.update with a shift and or instruction
360 /// using the index represented by the a temp value into a bitmap.
361 void lowerMCDCTestVectorBitmapUpdate(InstrProfMCDCTVBitmapUpdate *Ins);
362
363 /// Get the Bias value for data to access mmap-ed area.
364 /// Create it if it hasn't been seen.
365 GlobalVariable *getOrCreateBiasVar(StringRef VarName);
366
367 /// Compute the address of the counter value that this profiling instruction
368 /// acts on.
369 Value *getCounterAddress(InstrProfCntrInstBase *I);
370
371 /// Lower the incremental instructions under profile sampling predicates.
372 void doSampling(Instruction *I);
373
374 /// Get the region counters for an increment, creating them if necessary.
375 ///
376 /// If the counter array doesn't yet exist, the profile data variables
377 /// referring to them will also be created.
378 GlobalVariable *getOrCreateRegionCounters(InstrProfCntrInstBase *Inc);
379
380 /// Create the region counters.
381 GlobalVariable *createRegionCounters(InstrProfCntrInstBase *Inc,
382 StringRef Name,
384
385 /// Compute the address of the test vector bitmap that this profiling
386 /// instruction acts on.
387 Value *getBitmapAddress(InstrProfMCDCTVBitmapUpdate *I);
388
389 /// Get the region bitmaps for an increment, creating them if necessary.
390 ///
391 /// If the bitmap array doesn't yet exist, the profile data variables
392 /// referring to them will also be created.
393 GlobalVariable *getOrCreateRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc);
394
395 /// Create the MC/DC bitmap as a byte-aligned array of bytes associated with
396 /// an MC/DC Decision region. The number of bytes required is indicated by
397 /// the intrinsic used (type InstrProfMCDCBitmapInstBase). This is called
398 /// as part of setupProfileSection() and is conceptually very similar to
399 /// what is done for profile data counters in createRegionCounters().
400 GlobalVariable *createRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc,
401 StringRef Name,
403
404 /// Set Comdat property of GV, if required.
405 void maybeSetComdat(GlobalVariable *GV, GlobalObject *GO, StringRef VarName);
406
407 /// Setup the sections into which counters and bitmaps are allocated.
408 GlobalVariable *setupProfileSection(InstrProfInstBase *Inc,
409 InstrProfSectKind IPSK);
410
411 /// Create INSTR_PROF_DATA variable for counters and bitmaps.
412 void createDataVariable(InstrProfCntrInstBase *Inc);
413
414 /// Get the counters for virtual table values, creating them if necessary.
415 void getOrCreateVTableProfData(GlobalVariable *GV);
416
417 /// Emit the section with compressed function names.
418 void emitNameData();
419
420 /// Emit the section with compressed vtable names.
421 void emitVTableNames();
422
423 /// Emit value nodes section for value profiling.
424 void emitVNodes();
425
426 /// Emit runtime registration functions for each profile data variable.
427 void emitRegistration();
428
429 /// Emit the necessary plumbing to pull in the runtime initialization.
430 /// Returns true if a change was made.
431 bool emitRuntimeHook();
432
433 /// Add uses of our data variables and runtime hook.
434 void emitUses();
435
436 /// Create a static initializer for our data, on platforms that need it,
437 /// and for any profile output file that was specified.
438 void emitInitialization();
439
440 /// Return the __llvm_profile_data struct type.
441 StructType *getProfileDataTy();
442};
443
444///
445/// A helper class to promote one counter RMW operation in the loop
446/// into register update.
447///
448/// RWM update for the counter will be sinked out of the loop after
449/// the transformation.
450///
451class PGOCounterPromoterHelper : public LoadAndStorePromoter {
452public:
453 PGOCounterPromoterHelper(
454 Instruction *L, Instruction *S, SSAUpdater &SSA, Value *Init,
455 BasicBlock *PH, ArrayRef<BasicBlock *> ExitBlocks,
456 ArrayRef<Instruction *> InsertPts,
457 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
458 LoopInfo &LI, bool IsAtomic)
459 : LoadAndStorePromoter({L, S}, SSA), Store(S), ExitBlocks(ExitBlocks),
460 InsertPts(InsertPts), LoopToCandidates(LoopToCands), LI(LI),
461 IsAtomic(IsAtomic) {
464 SSA.AddAvailableValue(PH, Init);
465 }
466
467 void doExtraRewritesBeforeFinalDeletion() override {
468 for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
469 BasicBlock *ExitBlock = ExitBlocks[i];
470 Instruction *InsertPos = InsertPts[i];
471 // Get LiveIn value into the ExitBlock. If there are multiple
472 // predecessors, the value is defined by a PHI node in this
473 // block.
474 Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
475 Value *Addr = cast<StoreInst>(Store)->getPointerOperand();
476 Type *Ty = LiveInValue->getType();
477 IRBuilder<> Builder(InsertPos);
478 if (auto *AddrInst = dyn_cast_or_null<IntToPtrInst>(Addr)) {
479 // If isRuntimeCounterRelocationEnabled() is true then the address of
480 // the store instruction is computed with two instructions in
481 // InstrProfiling::getCounterAddress(). We need to copy those
482 // instructions to this block to compute Addr correctly.
483 // %BiasAdd = add i64 ptrtoint <__profc_>, <__llvm_profile_counter_bias>
484 // %Addr = inttoptr i64 %BiasAdd to i64*
485 auto *OrigBiasInst = dyn_cast<BinaryOperator>(AddrInst->getOperand(0));
486 assert(OrigBiasInst->getOpcode() == Instruction::BinaryOps::Add);
487 Value *BiasInst = Builder.Insert(OrigBiasInst->clone());
488 Addr = Builder.CreateIntToPtr(BiasInst,
489 PointerType::getUnqual(Ty->getContext()));
490 }
491 auto *TargetLoop =
492 IterativeCounterPromotion ? LI.getLoopFor(ExitBlock) : nullptr;
493 // Generate the relaxed atomic RMW if we've asked for it and no more
494 // promotion is possible.
495 if ((IsAtomic && !TargetLoop) || AtomicCounterUpdatePromoted)
496 Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, LiveInValue,
497 MaybeAlign(), AtomicOrdering::Monotonic);
498 else {
499 LoadInst *OldVal = Builder.CreateLoad(Ty, Addr, "pgocount.promoted");
500 auto *NewVal = Builder.CreateAdd(OldVal, LiveInValue);
501 auto *NewStore = Builder.CreateStore(NewVal, Addr);
502
503 // Now update the parent loop's candidate list:
504 if (TargetLoop)
505 LoopToCandidates[TargetLoop].emplace_back(OldVal, NewStore);
506 }
507 }
508 }
509
510private:
511 Instruction *Store;
512 ArrayRef<BasicBlock *> ExitBlocks;
513 ArrayRef<Instruction *> InsertPts;
514 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
515 LoopInfo &LI;
516 const bool IsAtomic;
517};
518
519/// A helper class to do register promotion for all profile counter
520/// updates in a loop.
521///
522class PGOCounterPromoter {
523public:
524 PGOCounterPromoter(
525 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
526 Loop &CurLoop, LoopInfo &LI, BlockFrequencyInfo *BFI, bool IsAtomic)
527 : LoopToCandidates(LoopToCands), L(CurLoop), LI(LI), BFI(BFI),
528 IsAtomic(IsAtomic) {
529
530 // Skip collection of ExitBlocks and InsertPts for loops that will not be
531 // able to have counters promoted.
532 SmallVector<BasicBlock *, 8> LoopExitBlocks;
533 SmallPtrSet<BasicBlock *, 8> BlockSet;
534
535 L.getExitBlocks(LoopExitBlocks);
536 if (!isPromotionPossible(&L, LoopExitBlocks))
537 return;
538
539 for (BasicBlock *ExitBlock : LoopExitBlocks) {
540 if (BlockSet.insert(ExitBlock).second &&
541 llvm::none_of(predecessors(ExitBlock), [&](const BasicBlock *Pred) {
542 return llvm::isPresplitCoroSuspendExitEdge(*Pred, *ExitBlock);
543 })) {
544 ExitBlocks.push_back(ExitBlock);
545 InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
546 }
547 }
548 }
549
550 bool run(int64_t *NumPromoted) {
551 bool RC = promoteCandidates(NumPromoted);
552 // In certain case, e.g. with -fprofile-update=atomic, we want to generate
553 // atomic updates of the PGO counters, but also perform promotion of these
554 // updates out of loops to reduce train time. The strategy is:
555 // 1) generate non-atomic load-increment-store sequence of instructions
556 // during lowerIntrinsics phase,
557 // 2) perform the promotion (in promoteCandidates function), then
558 // 3) convert all (promoted and unpromotable) updates to atomicRMW.
559 // This requires that promoted candidates are set to nullptr in the
560 // LoopToCandidates[&L] array by the promoteCandidates() function.
561 if (IsAtomic)
562 for (auto &Cand : LoopToCandidates[&L])
563 if (Cand.first != nullptr && Cand.second != nullptr)
564 makeAtomic(Cand.first, Cand.second);
565 return RC;
566 }
567
568private:
569 bool promoteCandidates(int64_t *NumPromoted) {
570 // Skip 'infinite' loops:
571 if (ExitBlocks.size() == 0)
572 return false;
573
574 // Skip if any of the ExitBlocks contains a ret instruction.
575 // This is to prevent dumping of incomplete profile -- if the
576 // the loop is a long running loop and dump is called in the middle
577 // of the loop, the result profile is incomplete.
578 // FIXME: add other heuristics to detect long running loops.
579 if (SkipRetExitBlock) {
580 for (auto *BB : ExitBlocks)
581 if (isa<ReturnInst>(BB->getTerminator()))
582 return false;
583 }
584
585 unsigned MaxProm = getMaxNumOfPromotionsInLoop(&L);
586 if (MaxProm == 0)
587 return false;
588
589 [[maybe_unused]] auto *Ptr = LoopToCandidates.getPointerIntoBucketsArray();
590 unsigned Promoted = 0;
591 for (auto &Cand : LoopToCandidates[&L]) {
593 SSAUpdater SSA(&NewPHIs);
594 Value *InitVal = ConstantInt::get(Cand.first->getType(), 0);
595
596 // If BFI is set, we will use it to guide the promotions.
597 if (BFI) {
598 auto *BB = Cand.first->getParent();
599 auto InstrCount = BFI->getBlockProfileCount(BB);
600 if (!InstrCount)
601 continue;
602 auto PreheaderCount = BFI->getBlockProfileCount(L.getLoopPreheader());
603 // If the average loop trip count is not greater than 1.5, we skip
604 // promotion.
605 if (PreheaderCount && (*PreheaderCount * 3) >= (*InstrCount * 2))
606 continue;
607 }
608
609 PGOCounterPromoterHelper Promoter(
610 Cand.first, Cand.second, SSA, InitVal, L.getLoopPreheader(),
611 ExitBlocks, InsertPts, LoopToCandidates, LI, IsAtomic);
612 Promoter.run(SmallVector<Instruction *, 2>({Cand.first, Cand.second}));
613
614 assert(LoopToCandidates.isPointerIntoBucketsArray(Ptr) &&
615 "References into LoopToCandidates might be invalid");
616 Cand = {nullptr, nullptr};
617
618 Promoted++;
619 if (Promoted >= MaxProm)
620 break;
621
622 (*NumPromoted)++;
623 if (MaxNumOfPromotions != -1 && *NumPromoted >= MaxNumOfPromotions)
624 break;
625 }
626
627 LLVM_DEBUG(dbgs() << Promoted << " counters promoted for loop (depth="
628 << L.getLoopDepth() << ")\n");
629 return Promoted != 0;
630 }
631
632private:
633 bool allowSpeculativeCounterPromotion(Loop *LP) {
634 SmallVector<BasicBlock *, 8> ExitingBlocks;
635 L.getExitingBlocks(ExitingBlocks);
636 // Not considierered speculative.
637 if (ExitingBlocks.size() == 1)
638 return true;
639 if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
640 return false;
641 return true;
642 }
643
644 // Check whether the loop satisfies the basic conditions needed to perform
645 // Counter Promotions.
646 bool
647 isPromotionPossible(Loop *LP,
648 const SmallVectorImpl<BasicBlock *> &LoopExitBlocks) {
649 // We can't insert into a catchswitch.
650 if (llvm::any_of(LoopExitBlocks, [](BasicBlock *Exit) {
651 return isa<CatchSwitchInst>(Exit->getTerminator());
652 }))
653 return false;
654
655 if (!LP->hasDedicatedExits())
656 return false;
657
658 BasicBlock *PH = LP->getLoopPreheader();
659 if (!PH)
660 return false;
661
662 return true;
663 }
664
665 // Returns the max number of Counter Promotions for LP.
666 unsigned getMaxNumOfPromotionsInLoop(Loop *LP) {
667 SmallVector<BasicBlock *, 8> LoopExitBlocks;
668 LP->getExitBlocks(LoopExitBlocks);
669 if (!isPromotionPossible(LP, LoopExitBlocks))
670 return 0;
671
672 SmallVector<BasicBlock *, 8> ExitingBlocks;
673 LP->getExitingBlocks(ExitingBlocks);
674
675 // If BFI is set, we do more aggressive promotions based on BFI.
676 if (BFI)
677 return (unsigned)-1;
678
679 // Not considierered speculative.
680 if (ExitingBlocks.size() == 1)
681 return MaxNumOfPromotionsPerLoop;
682
683 if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
684 return 0;
685
686 // Whether the target block is in a loop does not matter:
687 if (SpeculativeCounterPromotionToLoop)
688 return MaxNumOfPromotionsPerLoop;
689
690 // Now check the target block:
691 unsigned MaxProm = MaxNumOfPromotionsPerLoop;
692 for (auto *TargetBlock : LoopExitBlocks) {
693 auto *TargetLoop = LI.getLoopFor(TargetBlock);
694 if (!TargetLoop)
695 continue;
696 unsigned MaxPromForTarget = getMaxNumOfPromotionsInLoop(TargetLoop);
697 unsigned PendingCandsInTarget = LoopToCandidates[TargetLoop].size();
698 MaxProm =
699 std::min(MaxProm, std::max(MaxPromForTarget, PendingCandsInTarget) -
700 PendingCandsInTarget);
701 }
702 return MaxProm;
703 }
704
705 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
706 SmallVector<BasicBlock *, 8> ExitBlocks;
707 SmallVector<Instruction *, 8> InsertPts;
708 Loop &L;
709 LoopInfo &LI;
710 BlockFrequencyInfo *BFI;
711 const bool IsAtomic; // Whether to convert counter updates to atomics.
712};
713
714enum class ValueProfilingCallType {
715 // Individual values are tracked. Currently used for indiret call target
716 // profiling.
717 Default,
718
719 // MemOp: the memop size value profiling.
720 MemOp
721};
722
723} // end anonymous namespace
724
729 auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
730 return FAM.getResult<TargetLibraryAnalysis>(F);
731 };
732 InstrLowerer Lowerer(M, Options, GetTLI, IsCS);
733 if (!Lowerer.lower())
734 return PreservedAnalyses::all();
735
737}
738
739//
740// Perform instrumentation sampling.
741//
742// There are 3 favors of sampling:
743// (1) Full burst sampling: We transform:
744// Increment_Instruction;
745// to:
746// if (__llvm_profile_sampling__ <= SampledInstrBurstDuration - 1) {
747// Increment_Instruction;
748// }
749// __llvm_profile_sampling__ += 1;
750// if (__llvm_profile_sampling__ >= SampledInstrPeriod) {
751// __llvm_profile_sampling__ = 0;
752// }
753//
754// "__llvm_profile_sampling__" is a thread-local global shared by all PGO
755// counters (value-instrumentation and edge instrumentation).
756//
757// (2) Fast burst sampling:
758// "__llvm_profile_sampling__" variable is an unsigned type, meaning it will
759// wrap around to zero when overflows. In this case, the second check is
760// unnecessary, so we won't generate check2 when the SampledInstrPeriod is
761// set to 65536 (64K). The code after:
762// if (__llvm_profile_sampling__ <= SampledInstrBurstDuration - 1) {
763// Increment_Instruction;
764// }
765// __llvm_profile_sampling__ += 1;
766//
767// (3) Simple sampling:
768// When SampledInstrBurstDuration is set to 1, we do a simple sampling:
769// __llvm_profile_sampling__ += 1;
770// if (__llvm_profile_sampling__ >= SampledInstrPeriod) {
771// __llvm_profile_sampling__ = 0;
772// Increment_Instruction;
773// }
774//
775// Note that, the code snippet after the transformation can still be counter
776// promoted. However, with sampling enabled, counter updates are expected to
777// be infrequent, making the benefits of counter promotion negligible.
778// Moreover, counter promotion can potentially cause issues in server
779// applications, particularly when the counters are dumped without a clean
780// exit. To mitigate this risk, counter promotion is disabled by default when
781// sampling is enabled. This behavior can be overridden using the internal
782// option.
783void InstrLowerer::doSampling(Instruction *I) {
784 if (!isSamplingEnabled())
785 return;
786
787 SampledInstrumentationConfig config = getSampledInstrumentationConfig();
788 auto GetConstant = [&config](IRBuilder<> &Builder, uint32_t C) {
789 if (config.UseShort)
790 return Builder.getInt16(C);
791 else
792 return Builder.getInt32(C);
793 };
794
795 IntegerType *SamplingVarTy;
796 if (config.UseShort)
797 SamplingVarTy = Type::getInt16Ty(M.getContext());
798 else
799 SamplingVarTy = Type::getInt32Ty(M.getContext());
800 auto *SamplingVar =
802 assert(SamplingVar && "SamplingVar not set properly");
803
804 // Create the condition for checking the burst duration.
805 Instruction *SamplingVarIncr;
806 Value *NewSamplingVarVal;
807 MDBuilder MDB(I->getContext());
808 MDNode *BranchWeight;
809 IRBuilder<> CondBuilder(I);
810 auto *LoadSamplingVar = CondBuilder.CreateLoad(SamplingVarTy, SamplingVar);
811 if (config.IsSimpleSampling) {
812 // For the simple sampling, just create the load and increments.
813 IRBuilder<> IncBuilder(I);
814 NewSamplingVarVal =
815 IncBuilder.CreateAdd(LoadSamplingVar, GetConstant(IncBuilder, 1));
816 SamplingVarIncr = IncBuilder.CreateStore(NewSamplingVarVal, SamplingVar);
817 } else {
818 // For the burst-sampling, create the conditional update.
819 auto *DurationCond = CondBuilder.CreateICmpULE(
820 LoadSamplingVar, GetConstant(CondBuilder, config.BurstDuration - 1));
821 BranchWeight = MDB.createBranchWeights(
822 config.BurstDuration, config.Period - config.BurstDuration);
824 DurationCond, I, /* Unreachable */ false, BranchWeight);
825 IRBuilder<> IncBuilder(I);
826 NewSamplingVarVal =
827 IncBuilder.CreateAdd(LoadSamplingVar, GetConstant(IncBuilder, 1));
828 SamplingVarIncr = IncBuilder.CreateStore(NewSamplingVarVal, SamplingVar);
829 I->moveBefore(ThenTerm->getIterator());
830 }
831
832 if (config.IsFastSampling)
833 return;
834
835 // Create the condition for checking the period.
836 Instruction *ThenTerm, *ElseTerm;
837 IRBuilder<> PeriodCondBuilder(SamplingVarIncr);
838 auto *PeriodCond = PeriodCondBuilder.CreateICmpUGE(
839 NewSamplingVarVal, GetConstant(PeriodCondBuilder, config.Period));
840 BranchWeight = MDB.createBranchWeights(1, config.Period - 1);
841 SplitBlockAndInsertIfThenElse(PeriodCond, SamplingVarIncr, &ThenTerm,
842 &ElseTerm, BranchWeight);
843
844 // For the simple sampling, the counter update happens in sampling var reset.
845 if (config.IsSimpleSampling)
846 I->moveBefore(ThenTerm->getIterator());
847
848 IRBuilder<> ResetBuilder(ThenTerm);
849 ResetBuilder.CreateStore(GetConstant(ResetBuilder, 0), SamplingVar);
850 SamplingVarIncr->moveBefore(ElseTerm->getIterator());
851}
852
853bool InstrLowerer::lowerIntrinsics(Function *F) {
854 bool MadeChange = false;
855 PromotionCandidates.clear();
857
858 // To ensure compatibility with sampling, we save the intrinsics into
859 // a buffer to prevent potential breakage of the iterator (as the
860 // intrinsics will be moved to a different BB).
861 for (BasicBlock &BB : *F) {
862 for (Instruction &Instr : llvm::make_early_inc_range(BB)) {
863 if (auto *IP = dyn_cast<InstrProfInstBase>(&Instr))
864 InstrProfInsts.push_back(IP);
865 }
866 }
867
868 for (auto *Instr : InstrProfInsts) {
869 doSampling(Instr);
870 if (auto *IPIS = dyn_cast<InstrProfIncrementInstStep>(Instr)) {
871 lowerIncrement(IPIS);
872 MadeChange = true;
873 } else if (auto *IPI = dyn_cast<InstrProfIncrementInst>(Instr)) {
874 lowerIncrement(IPI);
875 MadeChange = true;
876 } else if (auto *IPC = dyn_cast<InstrProfTimestampInst>(Instr)) {
877 lowerTimestamp(IPC);
878 MadeChange = true;
879 } else if (auto *IPC = dyn_cast<InstrProfCoverInst>(Instr)) {
880 lowerCover(IPC);
881 MadeChange = true;
882 } else if (auto *IPVP = dyn_cast<InstrProfValueProfileInst>(Instr)) {
883 lowerValueProfileInst(IPVP);
884 MadeChange = true;
885 } else if (auto *IPMP = dyn_cast<InstrProfMCDCBitmapParameters>(Instr)) {
886 IPMP->eraseFromParent();
887 MadeChange = true;
888 } else if (auto *IPBU = dyn_cast<InstrProfMCDCTVBitmapUpdate>(Instr)) {
889 lowerMCDCTestVectorBitmapUpdate(IPBU);
890 MadeChange = true;
891 }
892 }
893
894 if (!MadeChange)
895 return false;
896
897 promoteCounterLoadStores(F);
898 return true;
899}
900
901bool InstrLowerer::isRuntimeCounterRelocationEnabled() const {
902 // Mach-O don't support weak external references.
903 if (TT.isOSBinFormatMachO())
904 return false;
905
906 if (RuntimeCounterRelocation.getNumOccurrences() > 0)
907 return RuntimeCounterRelocation;
908
909 // Fuchsia uses runtime counter relocation by default.
910 return TT.isOSFuchsia();
911}
912
913bool InstrLowerer::isSamplingEnabled() const {
914 if (SampledInstr.getNumOccurrences() > 0)
915 return SampledInstr;
916 return Options.Sampling;
917}
918
919bool InstrLowerer::isCounterPromotionEnabled() const {
920 if (DoCounterPromotion.getNumOccurrences() > 0)
921 return DoCounterPromotion;
922 return Options.DoCounterPromotion;
923}
924
925bool InstrLowerer::isAtomic() const {
926 return Options.Atomic || AtomicCounterUpdateAll;
927}
928
929static void doAtomicCheck(Function *F) {
930 for (const llvm::Instruction &I : llvm::instructions(F)) {
931 const Value *Addr = nullptr;
932 if (const LoadInst *LI = dyn_cast<LoadInst>(&I))
933 Addr = LI->getOperand(0);
934 else if (const StoreInst *LI = dyn_cast<StoreInst>(&I))
935 Addr = LI->getOperand(1);
936
937 if (Addr && Addr->stripInBoundsOffsets()->getName().starts_with(
939 LLVM_DEBUG(dbgs() << "Missed candidate: "; I.dump());
940 report_fatal_error("Candidate load/store not converted to atomic");
941 }
942 }
943}
944
945void InstrLowerer::promoteCounterLoadStores(Function *F) {
946 if (!isCounterPromotionEnabled())
947 return;
948
949 DominatorTree DT(*F);
950 LoopInfo LI(DT);
951 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> LoopPromotionCandidates;
952
953 std::unique_ptr<BlockFrequencyInfo> BFI;
954 if (Options.UseBFIInPromotion) {
955 std::unique_ptr<BranchProbabilityInfo> BPI;
956 BPI.reset(new BranchProbabilityInfo(*F, LI, &GetTLI(*F)));
957 BFI.reset(new BlockFrequencyInfo(*F, *BPI, LI));
958 }
959
960 for (const auto &LoadStore : PromotionCandidates) {
961 auto *CounterLoad = LoadStore.first;
962 auto *CounterStore = LoadStore.second;
963 BasicBlock *BB = CounterLoad->getParent();
964 Loop *ParentLoop = LI.getLoopFor(BB);
965 if (!ParentLoop) {
966 if (isAtomic())
967 makeAtomic(CounterLoad, CounterStore);
968 continue;
969 }
970 LoopPromotionCandidates[ParentLoop].emplace_back(CounterLoad, CounterStore);
971 }
972
974
975 // Do a post-order traversal of the loops so that counter updates can be
976 // iteratively hoisted outside the loop nest.
977 for (auto *Loop : llvm::reverse(Loops)) {
978 PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI, BFI.get(),
979 isAtomic());
980 Promoter.run(&TotalCountersPromoted);
981 }
982
983 if (isAtomic() && VerifyAtomicPromotion)
985}
986
988 // On Fuchsia, we only need runtime hook if any counters are present.
989 if (TT.isOSFuchsia())
990 return false;
991
992 return true;
993}
994
995/// Check if the module contains uses of any profiling intrinsics.
997 auto containsIntrinsic = [&](int ID) {
998 if (auto *F = Intrinsic::getDeclarationIfExists(&M, ID))
999 return !F->use_empty();
1000 return false;
1001 };
1002 return containsIntrinsic(Intrinsic::instrprof_cover) ||
1003 containsIntrinsic(Intrinsic::instrprof_increment) ||
1004 containsIntrinsic(Intrinsic::instrprof_increment_step) ||
1005 containsIntrinsic(Intrinsic::instrprof_timestamp) ||
1006 containsIntrinsic(Intrinsic::instrprof_value_profile);
1007}
1008
1009bool InstrLowerer::lower() {
1010 bool MadeChange = false;
1011 bool NeedsRuntimeHook = needsRuntimeHookUnconditionally(TT);
1012 if (NeedsRuntimeHook)
1013 MadeChange = emitRuntimeHook();
1014
1015 if (!IsCS && isSamplingEnabled())
1017
1018 bool ContainsProfiling = containsProfilingIntrinsics(M);
1019 GlobalVariable *CoverageNamesVar =
1020 M.getNamedGlobal(getCoverageUnusedNamesVarName());
1021 // Improve compile time by avoiding linear scans when there is no work.
1022 if (!ContainsProfiling && !CoverageNamesVar)
1023 return MadeChange;
1024
1025 // We did not know how many value sites there would be inside
1026 // the instrumented function. This is counting the number of instrumented
1027 // target value sites to enter it as field in the profile data variable.
1028 for (Function &F : M) {
1029 InstrProfCntrInstBase *FirstProfInst = nullptr;
1030 for (BasicBlock &BB : F) {
1031 for (auto I = BB.begin(), E = BB.end(); I != E; I++) {
1032 if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(I))
1033 computeNumValueSiteCounts(Ind);
1034 else {
1035 if (FirstProfInst == nullptr &&
1037 FirstProfInst = dyn_cast<InstrProfCntrInstBase>(I);
1038 // If the MCDCBitmapParameters intrinsic seen, create the bitmaps.
1039 if (const auto &Params = dyn_cast<InstrProfMCDCBitmapParameters>(I))
1040 static_cast<void>(getOrCreateRegionBitmaps(Params));
1041 }
1042 }
1043 }
1044
1045 // Use a profile intrinsic to create the region counters and data variable.
1046 // Also create the data variable based on the MCDCParams.
1047 if (FirstProfInst != nullptr) {
1048 static_cast<void>(getOrCreateRegionCounters(FirstProfInst));
1049 }
1050 }
1051
1053 for (GlobalVariable &GV : M.globals())
1054 // Global variables with type metadata are virtual table variables.
1055 if (GV.hasMetadata(LLVMContext::MD_type))
1056 getOrCreateVTableProfData(&GV);
1057
1058 for (Function &F : M)
1059 MadeChange |= lowerIntrinsics(&F);
1060
1061 if (CoverageNamesVar) {
1062 lowerCoverageData(CoverageNamesVar);
1063 MadeChange = true;
1064 }
1065
1066 if (!MadeChange)
1067 return false;
1068
1069 emitVNodes();
1070 emitNameData();
1071 emitVTableNames();
1072
1073 // Emit runtime hook for the cases where the target does not unconditionally
1074 // require pulling in profile runtime, and coverage is enabled on code that is
1075 // not eliminated by the front-end, e.g. unused functions with internal
1076 // linkage.
1077 if (!NeedsRuntimeHook && ContainsProfiling)
1078 emitRuntimeHook();
1079
1080 emitRegistration();
1081 emitUses();
1082 emitInitialization();
1083 return true;
1084}
1085
1087 Module &M, const TargetLibraryInfo &TLI,
1088 ValueProfilingCallType CallType = ValueProfilingCallType::Default) {
1089 LLVMContext &Ctx = M.getContext();
1090 auto *ReturnTy = Type::getVoidTy(M.getContext());
1091
1092 AttributeList AL;
1093 if (auto AK = TLI.getExtAttrForI32Param(false))
1094 AL = AL.addParamAttribute(M.getContext(), 2, AK);
1095
1096 assert((CallType == ValueProfilingCallType::Default ||
1097 CallType == ValueProfilingCallType::MemOp) &&
1098 "Must be Default or MemOp");
1099 Type *ParamTypes[] = {
1100#define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
1102 };
1103 auto *ValueProfilingCallTy =
1104 FunctionType::get(ReturnTy, ArrayRef(ParamTypes), false);
1105 StringRef FuncName = CallType == ValueProfilingCallType::Default
1108 return M.getOrInsertFunction(FuncName, ValueProfilingCallTy, AL);
1109}
1110
1111void InstrLowerer::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) {
1112 GlobalVariable *Name = Ind->getName();
1113 uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
1114 uint64_t Index = Ind->getIndex()->getZExtValue();
1115 auto &PD = ProfileDataMap[Name];
1116 PD.NumValueSites[ValueKind] =
1117 std::max(PD.NumValueSites[ValueKind], (uint32_t)(Index + 1));
1118}
1119
1120void InstrLowerer::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
1121 // TODO: Value profiling heavily depends on the data section which is omitted
1122 // in lightweight mode. We need to move the value profile pointer to the
1123 // Counter struct to get this working.
1124 assert(
1126 "Value profiling is not yet supported with lightweight instrumentation");
1127 GlobalVariable *Name = Ind->getName();
1128 auto It = ProfileDataMap.find(Name);
1129 assert(It != ProfileDataMap.end() && It->second.DataVar &&
1130 "value profiling detected in function with no counter increment");
1131
1132 GlobalVariable *DataVar = It->second.DataVar;
1133 uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
1134 uint64_t Index = Ind->getIndex()->getZExtValue();
1135 for (uint32_t Kind = IPVK_First; Kind < ValueKind; ++Kind)
1136 Index += It->second.NumValueSites[Kind];
1137
1138 IRBuilder<> Builder(Ind);
1139 bool IsMemOpSize = (Ind->getValueKind()->getZExtValue() ==
1140 llvm::InstrProfValueKind::IPVK_MemOPSize);
1141 CallInst *Call = nullptr;
1142 auto *TLI = &GetTLI(*Ind->getFunction());
1143 auto *NormalizedDataVarPtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
1144 DataVar, PointerType::get(M.getContext(), 0));
1145
1146 // To support value profiling calls within Windows exception handlers, funclet
1147 // information contained within operand bundles needs to be copied over to
1148 // the library call. This is required for the IR to be processed by the
1149 // WinEHPrepare pass.
1151 Ind->getOperandBundlesAsDefs(OpBundles);
1152 if (!IsMemOpSize) {
1153 Value *Args[3] = {Ind->getTargetValue(), NormalizedDataVarPtr,
1154 Builder.getInt32(Index)};
1155 Call = Builder.CreateCall(getOrInsertValueProfilingCall(M, *TLI), Args,
1156 OpBundles);
1157 } else {
1158 Value *Args[3] = {Ind->getTargetValue(), NormalizedDataVarPtr,
1159 Builder.getInt32(Index)};
1160 Call = Builder.CreateCall(
1161 getOrInsertValueProfilingCall(M, *TLI, ValueProfilingCallType::MemOp),
1162 Args, OpBundles);
1163 }
1164 if (auto AK = TLI->getExtAttrForI32Param(false))
1165 Call->addParamAttr(2, AK);
1167 Ind->eraseFromParent();
1168}
1169
1170GlobalVariable *InstrLowerer::getOrCreateBiasVar(StringRef VarName) {
1171 GlobalVariable *Bias = M.getGlobalVariable(VarName);
1172 if (Bias)
1173 return Bias;
1174
1175 Type *Int64Ty = Type::getInt64Ty(M.getContext());
1176
1177 // Compiler must define this variable when runtime counter relocation
1178 // is being used. Runtime has a weak external reference that is used
1179 // to check whether that's the case or not.
1180 Bias = new GlobalVariable(M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage,
1181 Constant::getNullValue(Int64Ty), VarName);
1183 // A definition that's weak (linkonce_odr) without being in a COMDAT
1184 // section wouldn't lead to link errors, but it would lead to a dead
1185 // data word from every TU but one. Putting it in COMDAT ensures there
1186 // will be exactly one data slot in the link.
1187 if (TT.supportsCOMDAT())
1188 Bias->setComdat(M.getOrInsertComdat(VarName));
1189
1190 return Bias;
1191}
1192
1193Value *InstrLowerer::getCounterAddress(InstrProfCntrInstBase *I) {
1194 auto *Counters = getOrCreateRegionCounters(I);
1195 IRBuilder<> Builder(I);
1196
1198 Counters->setAlignment(Align(8));
1199
1200 auto *Addr = Builder.CreateConstInBoundsGEP2_32(
1201 Counters->getValueType(), Counters, 0, I->getIndex()->getZExtValue());
1202
1203 if (!isRuntimeCounterRelocationEnabled())
1204 return Addr;
1205
1206 Type *Int64Ty = Type::getInt64Ty(M.getContext());
1207 Function *Fn = I->getParent()->getParent();
1208 LoadInst *&BiasLI = FunctionToProfileBiasMap[Fn];
1209 if (!BiasLI) {
1210 IRBuilder<> EntryBuilder(&Fn->getEntryBlock().front());
1211 auto *Bias = getOrCreateBiasVar(getInstrProfCounterBiasVarName());
1212 BiasLI = EntryBuilder.CreateLoad(Int64Ty, Bias, "profc_bias");
1213 // Bias doesn't change after startup.
1214 BiasLI->setMetadata(LLVMContext::MD_invariant_load,
1215 MDNode::get(M.getContext(), {}));
1216 }
1217 auto *Add = Builder.CreateAdd(Builder.CreatePtrToInt(Addr, Int64Ty), BiasLI);
1218 return Builder.CreateIntToPtr(Add, Addr->getType());
1219}
1220
1221Value *InstrLowerer::getBitmapAddress(InstrProfMCDCTVBitmapUpdate *I) {
1222 auto *Bitmaps = getOrCreateRegionBitmaps(I);
1223 if (!isRuntimeCounterRelocationEnabled())
1224 return Bitmaps;
1225
1226 // Put BiasLI onto the entry block.
1227 Type *Int64Ty = Type::getInt64Ty(M.getContext());
1228 Function *Fn = I->getFunction();
1229 IRBuilder<> EntryBuilder(&Fn->getEntryBlock().front());
1230 auto *Bias = getOrCreateBiasVar(getInstrProfBitmapBiasVarName());
1231 auto *BiasLI = EntryBuilder.CreateLoad(Int64Ty, Bias, "profbm_bias");
1232 // Assume BiasLI invariant (in the function at least)
1233 BiasLI->setMetadata(LLVMContext::MD_invariant_load,
1234 MDNode::get(M.getContext(), {}));
1235
1236 // Add Bias to Bitmaps and put it before the intrinsic.
1237 IRBuilder<> Builder(I);
1238 return Builder.CreatePtrAdd(Bitmaps, BiasLI, "profbm_addr");
1239}
1240
1241void InstrLowerer::lowerCover(InstrProfCoverInst *CoverInstruction) {
1242 auto *Addr = getCounterAddress(CoverInstruction);
1243 IRBuilder<> Builder(CoverInstruction);
1244 if (ConditionalCounterUpdate) {
1245 Instruction *SplitBefore = CoverInstruction->getNextNode();
1246 auto &Ctx = CoverInstruction->getParent()->getContext();
1247 auto *Int8Ty = llvm::Type::getInt8Ty(Ctx);
1248 Value *Load = Builder.CreateLoad(Int8Ty, Addr, "pgocount");
1249 Value *Cmp = Builder.CreateIsNotNull(Load, "pgocount.ifnonzero");
1250 Instruction *ThenBranch =
1251 SplitBlockAndInsertIfThen(Cmp, SplitBefore, false);
1252 Builder.SetInsertPoint(ThenBranch);
1253 }
1254
1255 // We store zero to represent that this block is covered.
1256 Builder.CreateStore(Builder.getInt8(0), Addr);
1257 CoverInstruction->eraseFromParent();
1258}
1259
1260void InstrLowerer::lowerTimestamp(
1261 InstrProfTimestampInst *TimestampInstruction) {
1262 assert(TimestampInstruction->getIndex()->isNullValue() &&
1263 "timestamp probes are always the first probe for a function");
1264 auto &Ctx = M.getContext();
1265 auto *TimestampAddr = getCounterAddress(TimestampInstruction);
1266 IRBuilder<> Builder(TimestampInstruction);
1267 auto *CalleeTy =
1268 FunctionType::get(Type::getVoidTy(Ctx), TimestampAddr->getType(), false);
1269 auto Callee = M.getOrInsertFunction(
1271 Builder.CreateCall(Callee, {TimestampAddr});
1272 TimestampInstruction->eraseFromParent();
1273}
1274
1275void InstrLowerer::lowerIncrement(InstrProfIncrementInst *Inc) {
1276 auto *Addr = getCounterAddress(Inc);
1277 IRBuilder<> Builder(Inc);
1278 if (isGPUProfTarget(M)) {
1279 auto *Int64Ty = Builder.getInt64Ty();
1280 auto *PtrTy = Builder.getPtrTy();
1281 auto *CalleeTy = FunctionType::get(Type::getVoidTy(M.getContext()),
1282 {PtrTy, PtrTy, Int64Ty}, false);
1285 RTLIB::impl___llvm_profile_instrument_gpu),
1286 CalleeTy);
1287 Value *CastAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, PtrTy);
1288 Value *Uniform =
1290 Value *StepI64 =
1291 Builder.CreateZExtOrTrunc(Inc->getStep(), Int64Ty, "step.i64");
1292 Builder.CreateCall(Callee, {CastAddr, Uniform, StepI64});
1293 }
1294 // If promotion is enabled then delay generating atomic updates until
1295 // after promotion is done.
1296 else if ((!isCounterPromotionEnabled() && isAtomic()) ||
1297 (Inc->getIndex()->isNullValue() && AtomicFirstCounter)) {
1298 Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, Inc->getStep(),
1300 } else {
1301 Value *IncStep = Inc->getStep();
1302 Value *Load = Builder.CreateLoad(IncStep->getType(), Addr, "pgocount");
1303 auto *Count = Builder.CreateAdd(Load, Inc->getStep());
1304 auto *Store = Builder.CreateStore(Count, Addr);
1305 if (isCounterPromotionEnabled())
1306 PromotionCandidates.emplace_back(cast<Instruction>(Load), Store);
1307 }
1308 Inc->eraseFromParent();
1309}
1310
1311void InstrLowerer::lowerCoverageData(GlobalVariable *CoverageNamesVar) {
1312 ConstantArray *Names =
1313 cast<ConstantArray>(CoverageNamesVar->getInitializer());
1314 for (unsigned I = 0, E = Names->getNumOperands(); I < E; ++I) {
1315 Constant *NC = Names->getOperand(I);
1316 Value *V = NC->stripPointerCasts();
1317 assert(isa<GlobalVariable>(V) && "Missing reference to function name");
1319
1320 Name->setLinkage(GlobalValue::PrivateLinkage);
1321 ReferencedNames.push_back(Name);
1322 if (isa<ConstantExpr>(NC))
1323 NC->dropAllReferences();
1324 }
1325 CoverageNamesVar->eraseFromParent();
1326}
1327
1328void InstrLowerer::lowerMCDCTestVectorBitmapUpdate(
1330 auto &Ctx = M.getContext();
1331 IRBuilder<> Builder(Update);
1332 auto *Int8Ty = Type::getInt8Ty(Ctx);
1333 auto *Int32Ty = Type::getInt32Ty(Ctx);
1334 auto *MCDCCondBitmapAddr = Update->getMCDCCondBitmapAddr();
1335 auto *BitmapAddr = getBitmapAddress(Update);
1336
1337 // Load Temp Val + BitmapIdx.
1338 // %mcdc.temp = load i32, ptr %mcdc.addr, align 4
1339 auto *Temp = Builder.CreateAdd(
1340 Builder.CreateLoad(Int32Ty, MCDCCondBitmapAddr, "mcdc.temp"),
1341 Update->getBitmapIndex());
1342
1343 // Calculate byte offset using div8.
1344 // %1 = lshr i32 %mcdc.temp, 3
1345 auto *BitmapByteOffset = Builder.CreateLShr(Temp, 0x3);
1346
1347 // Add byte offset to section base byte address.
1348 // %4 = getelementptr inbounds i8, ptr @__profbm_test, i32 %1
1349 auto *BitmapByteAddr =
1350 Builder.CreateInBoundsPtrAdd(BitmapAddr, BitmapByteOffset);
1351
1352 // Calculate bit offset into bitmap byte by using div8 remainder (AND ~8)
1353 // %5 = and i32 %mcdc.temp, 7
1354 // %6 = trunc i32 %5 to i8
1355 auto *BitToSet = Builder.CreateTrunc(Builder.CreateAnd(Temp, 0x7), Int8Ty);
1356
1357 // Shift bit offset left to form a bitmap.
1358 // %7 = shl i8 1, %6
1359 auto *ShiftedVal = Builder.CreateShl(Builder.getInt8(0x1), BitToSet);
1360
1361 // Load profile bitmap byte.
1362 // %mcdc.bits = load i8, ptr %4, align 1
1363 auto *Bitmap = Builder.CreateLoad(Int8Ty, BitmapByteAddr, "mcdc.bits");
1364
1365 if (isAtomic()) {
1366 // If ((Bitmap & Val) != Val), then execute atomic (Bitmap |= Val).
1367 // Note, just-loaded Bitmap might not be up-to-date. Use it just for
1368 // early testing.
1369 auto *Masked = Builder.CreateAnd(Bitmap, ShiftedVal);
1370 auto *ShouldStore = Builder.CreateICmpNE(Masked, ShiftedVal);
1371
1372 // Assume updating will be rare.
1373 auto *Unlikely = MDBuilder(Ctx).createUnlikelyBranchWeights();
1374 Instruction *ThenBranch =
1375 SplitBlockAndInsertIfThen(ShouldStore, Update, false, Unlikely);
1376
1377 // Execute if (unlikely(ShouldStore)).
1378 Builder.SetInsertPoint(ThenBranch);
1379 Builder.CreateAtomicRMW(AtomicRMWInst::Or, BitmapByteAddr, ShiftedVal,
1381 } else {
1382 // Perform logical OR of profile bitmap byte and shifted bit offset.
1383 // %8 = or i8 %mcdc.bits, %7
1384 auto *Result = Builder.CreateOr(Bitmap, ShiftedVal);
1385
1386 // Store the updated profile bitmap byte.
1387 // store i8 %8, ptr %3, align 1
1388 Builder.CreateStore(Result, BitmapByteAddr);
1389 }
1390
1391 Update->eraseFromParent();
1392}
1393
1394/// Get the name of a profiling variable for a particular function.
1395static std::string getVarName(InstrProfInstBase *Inc, StringRef Prefix,
1396 bool &Renamed) {
1397 StringRef NamePrefix = getInstrProfNameVarPrefix();
1398 StringRef Name = Inc->getName()->getName().substr(NamePrefix.size());
1399 Function *F = Inc->getParent()->getParent();
1400 Module *M = F->getParent();
1401 if (!DoHashBasedCounterSplit || !isIRPGOFlagSet(M) ||
1403 Renamed = false;
1404 return (Prefix + Name).str();
1405 }
1406 Renamed = true;
1408 SmallVector<char, 24> HashPostfix;
1409 if (Name.ends_with((Twine(".") + Twine(FuncHash)).toStringRef(HashPostfix)))
1410 return (Prefix + Name).str();
1411 return (Prefix + Name + "." + Twine(FuncHash)).str();
1412}
1413
1415 // Only record function addresses if IR PGO is enabled or if clang value
1416 // profiling is enabled. Recording function addresses greatly increases object
1417 // file size, because it prevents the inliner from deleting functions that
1418 // have been inlined everywhere.
1419 if (!profDataReferencedByCode(*F->getParent()))
1420 return false;
1421
1422 // Check the linkage
1423 bool HasAvailableExternallyLinkage = F->hasAvailableExternallyLinkage();
1424 if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() &&
1425 !HasAvailableExternallyLinkage)
1426 return true;
1427
1428 // A function marked 'alwaysinline' with available_externally linkage can't
1429 // have its address taken. Doing so would create an undefined external ref to
1430 // the function, which would fail to link.
1431 if (HasAvailableExternallyLinkage &&
1432 F->hasFnAttribute(Attribute::AlwaysInline))
1433 return false;
1434
1435 // Prohibit function address recording if the function is both internal and
1436 // COMDAT. This avoids the profile data variable referencing internal symbols
1437 // in COMDAT.
1438 if (F->hasLocalLinkage() && F->hasComdat())
1439 return false;
1440
1441 // Check uses of this function for other than direct calls or invokes to it.
1442 // Inline virtual functions have linkeOnceODR linkage. When a key method
1443 // exists, the vtable will only be emitted in the TU where the key method
1444 // is defined. In a TU where vtable is not available, the function won't
1445 // be 'addresstaken'. If its address is not recorded here, the profile data
1446 // with missing address may be picked by the linker leading to missing
1447 // indirect call target info.
1448 return F->hasAddressTaken() || F->hasLinkOnceLinkage();
1449}
1450
1451static inline bool shouldUsePublicSymbol(Function *Fn) {
1452 // It isn't legal to make an alias of this function at all
1453 if (Fn->isDeclarationForLinker())
1454 return true;
1455
1456 // Symbols with local linkage can just use the symbol directly without
1457 // introducing relocations
1458 if (Fn->hasLocalLinkage())
1459 return true;
1460
1461 // PGO + ThinLTO + CFI cause duplicate symbols to be introduced due to some
1462 // unfavorable interaction between the new alias and the alias renaming done
1463 // in LowerTypeTests under ThinLTO. For comdat functions that would normally
1464 // be deduplicated, but the renaming scheme ends up preventing renaming, since
1465 // it creates unique names for each alias, resulting in duplicated symbols. In
1466 // the future, we should update the CFI related passes to migrate these
1467 // aliases to the same module as the jump-table they refer to will be defined.
1468 if (Fn->hasMetadata(LLVMContext::MD_type))
1469 return true;
1470
1471 // For comdat functions, an alias would need the same linkage as the original
1472 // function and hidden visibility. There is no point in adding an alias with
1473 // identical linkage an visibility to avoid introducing symbolic relocations.
1474 if (Fn->hasComdat() &&
1476 return true;
1477
1478 // its OK to use an alias
1479 return false;
1480}
1481
1483 auto *Int8PtrTy = PointerType::getUnqual(Fn->getContext());
1484 // Store a nullptr in __llvm_profd, if we shouldn't use a real address
1485 if (!shouldRecordFunctionAddr(Fn))
1486 return ConstantPointerNull::get(Int8PtrTy);
1487
1488 // If we can't use an alias, we must use the public symbol, even though this
1489 // may require a symbolic relocation.
1490 if (shouldUsePublicSymbol(Fn))
1491 return Fn;
1492
1493 // For GPU targets, weak functions cannot use private aliases because
1494 // LTO may pick a different TU's copy, leaving the alias undefined
1495 if (isGPUProfTarget(*Fn->getParent()) &&
1497 return Fn;
1498
1499 // When possible use a private alias to avoid symbolic relocations.
1501 Fn->getName() + ".local", Fn);
1502
1503 // When the instrumented function is a COMDAT function, we cannot use a
1504 // private alias. If we did, we would create reference to a local label in
1505 // this function's section. If this version of the function isn't selected by
1506 // the linker, then the metadata would introduce a reference to a discarded
1507 // section. So, for COMDAT functions, we need to adjust the linkage of the
1508 // alias. Using hidden visibility avoids a dynamic relocation and an entry in
1509 // the dynamic symbol table.
1510 //
1511 // Note that this handles COMDAT functions with visibility other than Hidden,
1512 // since that case is covered in shouldUsePublicSymbol()
1513 if (Fn->hasComdat()) {
1514 GA->setLinkage(Fn->getLinkage());
1516 }
1517
1518 // appendToCompilerUsed(*Fn->getParent(), {GA});
1519
1520 return GA;
1521}
1522
1524 // NVPTX is an ELF target but PTX does not expose sections or linker symbols.
1525 if (TT.isNVPTX())
1526 return true;
1527
1528 // compiler-rt uses linker support to get data/counters/name start/end for
1529 // ELF, COFF, Mach-O, XCOFF, and Wasm.
1530 if (TT.isOSBinFormatELF() || TT.isOSBinFormatCOFF() ||
1531 TT.isOSBinFormatMachO() || TT.isOSBinFormatXCOFF() ||
1532 TT.isOSBinFormatWasm())
1533 return false;
1534
1535 return true;
1536}
1537
1538void InstrLowerer::maybeSetComdat(GlobalVariable *GV, GlobalObject *GO,
1539 StringRef CounterGroupName) {
1540 // Place lowered global variables in a comdat group if the associated function
1541 // or global variable is a COMDAT. This will make sure that only one copy of
1542 // global variable (e.g. function counters) of the COMDAT function will be
1543 // emitted after linking.
1544 bool NeedComdat = needsComdatForCounter(*GO, M);
1545 bool UseComdat = (NeedComdat || TT.isOSBinFormatELF());
1546
1547 if (!UseComdat)
1548 return;
1549
1550 // Keep in mind that this pass may run before the inliner, so we need to
1551 // create a new comdat group (for counters, profiling data, etc). If we use
1552 // the comdat of the parent function, that will result in relocations against
1553 // discarded sections.
1554 //
1555 // If the data variable is referenced by code, non-counter variables (notably
1556 // profiling data) and counters have to be in different comdats for COFF
1557 // because the Visual C++ linker will report duplicate symbol errors if there
1558 // are multiple external symbols with the same name marked
1559 // IMAGE_COMDAT_SELECT_ASSOCIATIVE.
1560 StringRef GroupName = TT.isOSBinFormatCOFF() && DataReferencedByCode
1561 ? GV->getName()
1562 : CounterGroupName;
1563 Comdat *C = M.getOrInsertComdat(GroupName);
1564
1565 if (!NeedComdat) {
1566 // Object file format must be ELF since `UseComdat && !NeedComdat` is true.
1567 //
1568 // For ELF, when not using COMDAT, put counters, data and values into a
1569 // nodeduplicate COMDAT which is lowered to a zero-flag section group. This
1570 // allows -z start-stop-gc to discard the entire group when the function is
1571 // discarded.
1572 C->setSelectionKind(Comdat::NoDeduplicate);
1573 }
1574 GV->setComdat(C);
1575 // COFF doesn't allow the comdat group leader to have private linkage, so
1576 // upgrade private linkage to internal linkage to produce a symbol table
1577 // entry.
1578 if (TT.isOSBinFormatCOFF() && GV->hasPrivateLinkage())
1580}
1581
1583 if (!profDataReferencedByCode(*GV->getParent()))
1584 return false;
1585
1586 if (!GV->hasLinkOnceLinkage() && !GV->hasLocalLinkage() &&
1588 return true;
1589
1590 // This avoids the profile data from referencing internal symbols in
1591 // COMDAT.
1592 if (GV->hasLocalLinkage() && GV->hasComdat())
1593 return false;
1594
1595 return true;
1596}
1597
1598// FIXME: Introduce an internal alias like what's done for functions to reduce
1599// the number of relocation entries.
1601 // Store a nullptr in __profvt_ if a real address shouldn't be used.
1602 if (!shouldRecordVTableAddr(GV))
1604
1605 return GV;
1606}
1607
1608void InstrLowerer::getOrCreateVTableProfData(GlobalVariable *GV) {
1610 "Value profiling is not supported with lightweight instrumentation");
1612 return;
1613
1614 // Skip llvm internal global variable or __prof variables.
1615 if (GV->getName().starts_with("llvm.") ||
1616 GV->getName().starts_with("__llvm") ||
1617 GV->getName().starts_with("__prof"))
1618 return;
1619
1620 // VTableProfData already created
1621 auto It = VTableDataMap.find(GV);
1622 if (It != VTableDataMap.end() && It->second)
1623 return;
1624
1627
1628 // This is to keep consistent with per-function profile data
1629 // for correctness.
1630 if (TT.isOSBinFormatXCOFF()) {
1632 Visibility = GlobalValue::DefaultVisibility;
1633 }
1634
1635 LLVMContext &Ctx = M.getContext();
1636 Type *DataTypes[] = {
1637#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Init) LLVMType,
1639#undef INSTR_PROF_VTABLE_DATA
1640 };
1641
1642 auto *DataTy = StructType::get(Ctx, ArrayRef(DataTypes));
1643
1644 // Used by INSTR_PROF_VTABLE_DATA MACRO
1645 Constant *VTableAddr = getVTableAddrForProfData(GV);
1646 const std::string PGOVTableName = getPGOName(*GV);
1647 // Record the length of the vtable. This is needed since vtable pointers
1648 // loaded from C++ objects might be from the middle of a vtable definition.
1649 uint32_t VTableSizeVal = GV->getGlobalSize(M.getDataLayout());
1650
1651 Constant *DataVals[] = {
1652#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Init) Init,
1654#undef INSTR_PROF_VTABLE_DATA
1655 };
1656
1657 auto *Data =
1658 new GlobalVariable(M, DataTy, /*constant=*/false, Linkage,
1659 ConstantStruct::get(DataTy, DataVals),
1660 getInstrProfVTableVarPrefix() + PGOVTableName);
1661
1662 Data->setVisibility(Visibility);
1663 Data->setSection(getInstrProfSectionName(IPSK_vtab, TT.getObjectFormat()));
1664 Data->setAlignment(Align(8));
1665
1666 maybeSetComdat(Data, GV, Data->getName());
1667
1668 VTableDataMap[GV] = Data;
1669
1670 ReferencedVTables.push_back(GV);
1671
1672 // VTable <Hash, Addr> is used by runtime but not referenced by other
1673 // sections. Conservatively mark it linker retained.
1674 UsedVars.push_back(Data);
1675}
1676
1677GlobalVariable *InstrLowerer::setupProfileSection(InstrProfInstBase *Inc,
1678 InstrProfSectKind IPSK) {
1679 GlobalVariable *NamePtr = Inc->getName();
1680
1681 // Match the linkage and visibility of the name global.
1682 Function *Fn = Inc->getParent()->getParent();
1684 GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility();
1685
1686 // Use internal rather than private linkage so the counter variable shows up
1687 // in the symbol table when using debug info for correlation.
1689 TT.isOSBinFormatMachO() && Linkage == GlobalValue::PrivateLinkage)
1691
1692 // Due to the limitation of binder as of 2021/09/28, the duplicate weak
1693 // symbols in the same csect won't be discarded. When there are duplicate weak
1694 // symbols, we can NOT guarantee that the relocations get resolved to the
1695 // intended weak symbol, so we can not ensure the correctness of the relative
1696 // CounterPtr, so we have to use private linkage for counter and data symbols.
1697 if (TT.isOSBinFormatXCOFF()) {
1699 Visibility = GlobalValue::DefaultVisibility;
1700 }
1701 // Move the name variable to the right section.
1702 bool Renamed;
1703 GlobalVariable *Ptr;
1704 StringRef VarPrefix;
1705 std::string VarName;
1706 if (IPSK == IPSK_cnts) {
1707 VarPrefix = getInstrProfCountersVarPrefix();
1708 VarName = getVarName(Inc, VarPrefix, Renamed);
1710 Ptr = createRegionCounters(CntrIncrement, VarName, Linkage);
1711 } else if (IPSK == IPSK_bitmap) {
1712 VarPrefix = getInstrProfBitmapVarPrefix();
1713 VarName = getVarName(Inc, VarPrefix, Renamed);
1714 InstrProfMCDCBitmapInstBase *BitmapUpdate =
1716 Ptr = createRegionBitmaps(BitmapUpdate, VarName, Linkage);
1717 } else {
1718 llvm_unreachable("Profile Section must be for Counters or Bitmaps");
1719 }
1720
1721 Ptr->setVisibility(Visibility);
1722 Ptr->setSection(getInstrProfSectionName(IPSK, TT.getObjectFormat()));
1723 Ptr->setLinkage(Linkage);
1724 if (isGPUProfTarget(M) && !Ptr->hasComdat()) {
1725 Ptr->setComdat(M.getOrInsertComdat(VarName));
1728 } else {
1729 maybeSetComdat(Ptr, Fn, VarName);
1730 }
1731 return Ptr;
1732}
1733
1735InstrLowerer::createRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc,
1736 StringRef Name,
1738 uint64_t NumBytes = Inc->getNumBitmapBytes();
1739 auto *BitmapTy = ArrayType::get(Type::getInt8Ty(M.getContext()), NumBytes);
1740 auto GV = new GlobalVariable(M, BitmapTy, false, Linkage,
1741 Constant::getNullValue(BitmapTy), Name);
1742 GV->setAlignment(Align(1));
1743 return GV;
1744}
1745
1747InstrLowerer::getOrCreateRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc) {
1748 GlobalVariable *NamePtr = Inc->getName();
1749 auto &PD = ProfileDataMap[NamePtr];
1750 if (PD.RegionBitmaps)
1751 return PD.RegionBitmaps;
1752
1753 // If RegionBitmaps doesn't already exist, create it by first setting up
1754 // the corresponding profile section.
1755 auto *BitmapPtr = setupProfileSection(Inc, IPSK_bitmap);
1756 PD.RegionBitmaps = BitmapPtr;
1757 PD.NumBitmapBytes = Inc->getNumBitmapBytes();
1758 return PD.RegionBitmaps;
1759}
1760
1762InstrLowerer::createRegionCounters(InstrProfCntrInstBase *Inc, StringRef Name,
1764 uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
1765 auto &Ctx = M.getContext();
1766 GlobalVariable *GV;
1767 if (isa<InstrProfCoverInst>(Inc)) {
1768 auto *CounterTy = Type::getInt8Ty(Ctx);
1769 auto *CounterArrTy = ArrayType::get(CounterTy, NumCounters);
1770 // TODO: `Constant::getAllOnesValue()` does not yet accept an array type.
1771 std::vector<Constant *> InitialValues(NumCounters,
1772 Constant::getAllOnesValue(CounterTy));
1773 GV = new GlobalVariable(M, CounterArrTy, false, Linkage,
1774 ConstantArray::get(CounterArrTy, InitialValues),
1775 Name);
1776 GV->setAlignment(Align(1));
1777 } else {
1778 auto *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters);
1779 GV = new GlobalVariable(M, CounterTy, false, Linkage,
1780 Constant::getNullValue(CounterTy), Name);
1781 GV->setAlignment(Align(8));
1782 }
1783 return GV;
1784}
1785
1787InstrLowerer::getOrCreateRegionCounters(InstrProfCntrInstBase *Inc) {
1788 GlobalVariable *NamePtr = Inc->getName();
1789 auto &PD = ProfileDataMap[NamePtr];
1790 if (PD.RegionCounters)
1791 return PD.RegionCounters;
1792
1793 // If RegionCounters doesn't already exist, create it by first setting up
1794 // the corresponding profile section.
1795 auto *CounterPtr = setupProfileSection(Inc, IPSK_cnts);
1796 PD.RegionCounters = CounterPtr;
1797
1799 LLVMContext &Ctx = M.getContext();
1800 Function *Fn = Inc->getParent()->getParent();
1801 if (auto *SP = Fn->getSubprogram()) {
1802 DIBuilder DB(M, true, SP->getUnit());
1803 Metadata *FunctionNameAnnotation[] = {
1806 };
1807 Metadata *CFGHashAnnotation[] = {
1810 };
1811 Metadata *NumCountersAnnotation[] = {
1814 };
1815 auto Annotations = DB.getOrCreateArray({
1816 MDNode::get(Ctx, FunctionNameAnnotation),
1817 MDNode::get(Ctx, CFGHashAnnotation),
1818 MDNode::get(Ctx, NumCountersAnnotation),
1819 });
1820 auto *DICounter = DB.createGlobalVariableExpression(
1821 SP, CounterPtr->getName(), /*LinkageName=*/StringRef(), SP->getFile(),
1822 /*LineNo=*/0, DB.createUnspecifiedType("Profile Data Type"),
1823 CounterPtr->hasLocalLinkage(), /*IsDefined=*/true, /*Expr=*/nullptr,
1824 /*Decl=*/nullptr, /*TemplateParams=*/nullptr, /*AlignInBits=*/0,
1825 Annotations);
1826 CounterPtr->addDebugInfo(DICounter);
1827 DB.finalize();
1828 }
1829
1830 // Mark the counter variable as used so that it isn't optimized out.
1831 CompilerUsedVars.push_back(PD.RegionCounters);
1832 }
1833
1834 // Create the data variable (if it doesn't already exist).
1835 createDataVariable(Inc);
1836
1837 return PD.RegionCounters;
1838}
1839
1840void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) {
1841 // When debug information is correlated to profile data, a data variable
1842 // is not needed.
1844 return;
1845
1846 GlobalVariable *NamePtr = Inc->getName();
1847 auto &PD = ProfileDataMap[NamePtr];
1848
1849 // Return if data variable was already created.
1850 if (PD.DataVar)
1851 return;
1852
1853 LLVMContext &Ctx = M.getContext();
1854
1855 Function *Fn = Inc->getParent()->getParent();
1857 GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility();
1858
1859 // Due to the limitation of binder as of 2021/09/28, the duplicate weak
1860 // symbols in the same csect won't be discarded. When there are duplicate weak
1861 // symbols, we can NOT guarantee that the relocations get resolved to the
1862 // intended weak symbol, so we can not ensure the correctness of the relative
1863 // CounterPtr, so we have to use private linkage for counter and data symbols.
1864 if (TT.isOSBinFormatXCOFF()) {
1866 Visibility = GlobalValue::DefaultVisibility;
1867 }
1868
1869 bool NeedComdat = needsComdatForCounter(*Fn, M);
1870 bool Renamed;
1871
1872 // The Data Variable section is anchored to profile counters.
1873 std::string CntsVarName =
1875 std::string DataVarName =
1876 getVarName(Inc, getInstrProfDataVarPrefix(), Renamed);
1877
1878 auto *Int8PtrTy = PointerType::getUnqual(Ctx);
1879 // Allocate statically the array of pointers to value profile nodes for
1880 // the current function.
1881 Constant *ValuesPtrExpr = ConstantPointerNull::get(Int8PtrTy);
1882 uint64_t NS = 0;
1883 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1884 NS += PD.NumValueSites[Kind];
1885 if (NS > 0 && ValueProfileStaticAlloc &&
1887 ArrayType *ValuesTy = ArrayType::get(Type::getInt64Ty(Ctx), NS);
1888 auto *ValuesVar = new GlobalVariable(
1889 M, ValuesTy, false, Linkage, Constant::getNullValue(ValuesTy),
1890 getVarName(Inc, getInstrProfValuesVarPrefix(), Renamed));
1891 ValuesVar->setVisibility(Visibility);
1892 setGlobalVariableLargeSection(TT, *ValuesVar);
1893 ValuesVar->setSection(
1894 getInstrProfSectionName(IPSK_vals, TT.getObjectFormat()));
1895 ValuesVar->setAlignment(Align(8));
1896 maybeSetComdat(ValuesVar, Fn, CntsVarName);
1898 ValuesVar, PointerType::get(Fn->getContext(), 0));
1899 }
1900
1901 uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
1902
1903 Constant *CounterPtr = PD.RegionCounters;
1904
1905 uint64_t NumBitmapBytes = PD.NumBitmapBytes;
1906
1907 // Create data variable.
1908 auto *IntPtrTy = M.getDataLayout().getIntPtrType(M.getContext());
1909 auto *Int16Ty = Type::getInt16Ty(Ctx);
1910 auto *Int16ArrayTy = ArrayType::get(Int16Ty, IPVK_Last + 1);
1911 auto *DataTy = getProfileDataTy();
1912
1913 Constant *FunctionAddr = getFuncAddrForProfData(Fn);
1914
1915 Constant *Int16ArrayVals[IPVK_Last + 1];
1916 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1917 Int16ArrayVals[Kind] = ConstantInt::get(Int16Ty, PD.NumValueSites[Kind]);
1918
1919 if (isGPUProfTarget(M)) {
1920 // For GPU targets, weak functions need weak linkage for their profile data
1921 // aliases to allow linker deduplication across TUs
1923 Linkage = Fn->getLinkage();
1924 else
1927 }
1928 // If the data variable is not referenced by code (if we don't emit
1929 // @llvm.instrprof.value.profile, NS will be 0), and the counter keeps the
1930 // data variable live under linker GC, the data variable can be private. This
1931 // optimization applies to ELF.
1932 //
1933 // On COFF, a comdat leader cannot be local so we require DataReferencedByCode
1934 // to be false.
1935 //
1936 // If profd is in a deduplicate comdat, NS==0 with a hash suffix guarantees
1937 // that other copies must have the same CFG and cannot have value profiling.
1938 // If no hash suffix, other profd copies may be referenced by code.
1939 if (!isGPUProfTarget(M) && NS == 0 &&
1940 !(DataReferencedByCode && NeedComdat && !Renamed) &&
1941 (TT.isOSBinFormatELF() ||
1942 (!DataReferencedByCode && TT.isOSBinFormatCOFF()))) {
1944 Visibility = GlobalValue::DefaultVisibility;
1945 }
1946 // GPU-target ELF objects are always ET_DYN, so non-local symbols with
1947 // default visibility are preemptible. The CounterPtr label difference
1948 // emits a REL32 relocation that lld rejects against preemptible targets.
1949 if (TT.isGPU() && TT.isOSBinFormatELF() &&
1952 auto *Data =
1953 new GlobalVariable(M, DataTy, false, Linkage, nullptr, DataVarName);
1954
1955 Constant *RelativeCounterPtr;
1956 GlobalVariable *BitmapPtr = PD.RegionBitmaps;
1957 Constant *RelativeBitmapPtr = ConstantInt::get(IntPtrTy, 0);
1958 InstrProfSectKind DataSectionKind;
1959 // With binary profile correlation, profile data is not loaded into memory.
1960 // profile data must reference profile counter with an absolute relocation.
1962 DataSectionKind = IPSK_covdata;
1963 RelativeCounterPtr = ConstantExpr::getPtrToInt(CounterPtr, IntPtrTy);
1964 if (BitmapPtr != nullptr)
1965 RelativeBitmapPtr = ConstantExpr::getPtrToInt(BitmapPtr, IntPtrTy);
1966 } else if (TT.isNVPTX()) {
1967 // The NVPTX target cannot handle self-referencing constant expressions in
1968 // global initializers at all. Use absolute pointers and have the runtime
1969 // registration convert them to relative offsets.
1970 DataSectionKind = IPSK_data;
1971 RelativeCounterPtr = ConstantExpr::getPtrToInt(CounterPtr, IntPtrTy);
1972 } else {
1973 // Reference the counter variable with a label difference (link-time
1974 // constant).
1975 DataSectionKind = IPSK_data;
1976 RelativeCounterPtr =
1977 ConstantExpr::getSub(ConstantExpr::getPtrToInt(CounterPtr, IntPtrTy),
1978 ConstantExpr::getPtrToInt(Data, IntPtrTy));
1979 if (BitmapPtr != nullptr)
1980 RelativeBitmapPtr =
1982 ConstantExpr::getPtrToInt(Data, IntPtrTy));
1983 }
1984
1985 Constant *DataVals[] = {
1986#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,
1988 };
1989 Data->setInitializer(ConstantStruct::get(DataTy, DataVals));
1990
1991 Data->setVisibility(Visibility);
1992 Data->setSection(
1993 getInstrProfSectionName(DataSectionKind, TT.getObjectFormat()));
1994 Data->setAlignment(Align(INSTR_PROF_DATA_ALIGNMENT));
1995 if (isGPUProfTarget(M) && !Data->hasComdat()) {
1996 Data->setComdat(M.getOrInsertComdat(CntsVarName));
1998 } else {
1999 maybeSetComdat(Data, Fn, CntsVarName);
2000 }
2001
2002 PD.DataVar = Data;
2003
2004 // Mark the data variable as used so that it isn't stripped out.
2005 CompilerUsedVars.push_back(Data);
2006 // Now that the linkage set by the FE has been passed to the data and counter
2007 // variables, reset Name variable's linkage and visibility to private so that
2008 // it can be removed later by the compiler.
2010 // Collect the referenced names to be used by emitNameData.
2011 ReferencedNames.push_back(NamePtr);
2012}
2013
2014void InstrLowerer::emitVNodes() {
2015 if (!ValueProfileStaticAlloc)
2016 return;
2017
2018 // For now only support this on platforms that do
2019 // not require runtime registration to discover
2020 // named section start/end.
2022 return;
2023
2024 size_t TotalNS = 0;
2025 for (auto &PD : ProfileDataMap) {
2026 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
2027 TotalNS += PD.second.NumValueSites[Kind];
2028 }
2029
2030 if (!TotalNS)
2031 return;
2032
2033 uint64_t NumCounters = TotalNS * NumCountersPerValueSite;
2034// Heuristic for small programs with very few total value sites.
2035// The default value of vp-counters-per-site is chosen based on
2036// the observation that large apps usually have a low percentage
2037// of value sites that actually have any profile data, and thus
2038// the average number of counters per site is low. For small
2039// apps with very few sites, this may not be true. Bump up the
2040// number of counters in this case.
2041#define INSTR_PROF_MIN_VAL_COUNTS 10
2044
2045 auto &Ctx = M.getContext();
2046 Type *VNodeTypes[] = {
2047#define INSTR_PROF_VALUE_NODE(Type, LLVMType, Name, Init) LLVMType,
2049 };
2050 auto *VNodeTy = StructType::get(Ctx, ArrayRef(VNodeTypes));
2051
2052 ArrayType *VNodesTy = ArrayType::get(VNodeTy, NumCounters);
2053 auto *VNodesVar = new GlobalVariable(
2054 M, VNodesTy, false, GlobalValue::PrivateLinkage,
2056 setGlobalVariableLargeSection(TT, *VNodesVar);
2057 VNodesVar->setSection(
2058 getInstrProfSectionName(IPSK_vnodes, TT.getObjectFormat()));
2059 VNodesVar->setAlignment(M.getDataLayout().getABITypeAlign(VNodesTy));
2060 // VNodesVar is used by runtime but not referenced via relocation by other
2061 // sections. Conservatively make it linker retained.
2062 UsedVars.push_back(VNodesVar);
2063}
2064
2065// Build the per-TU device-PGO sections struct: section start/stop bounds for
2066// names/counters/data plus the raw version. Returns null if it already exists.
2068 StringRef CUIDPostfix) {
2069 std::string Name = ("__llvm_profile_sections" + CUIDPostfix).str();
2070 if (M.getNamedValue(Name))
2071 return nullptr;
2072
2073 LLVMContext &Ctx = M.getContext();
2074 unsigned AS = M.getDataLayout().getDefaultGlobalsAddressSpace();
2075 auto Extern = [&](StringRef Sym, Type *Ty, bool IsConst,
2077 GlobalVariable *GV = M.getNamedGlobal(Sym);
2078 if (!GV) {
2079 GV = new GlobalVariable(M, Ty, IsConst, GlobalValue::ExternalLinkage,
2080 nullptr, Sym, nullptr,
2082 GV->setVisibility(Vis);
2083 }
2084 return GV;
2085 };
2086 // Section bounds are hidden i8 markers; raw_version is an i64 constant.
2087 auto *I8 = Type::getInt8Ty(Ctx);
2088 auto Hidden = GlobalValue::HiddenVisibility;
2089 Constant *Fields[] = {Extern("__start___llvm_prf_names", I8, false, Hidden),
2090 Extern("__stop___llvm_prf_names", I8, false, Hidden),
2091 Extern("__start___llvm_prf_cnts", I8, false, Hidden),
2092 Extern("__stop___llvm_prf_cnts", I8, false, Hidden),
2093 Extern("__start___llvm_prf_data", I8, false, Hidden),
2094 Extern("__stop___llvm_prf_data", I8, false, Hidden),
2095 Extern("__llvm_profile_raw_version",
2096 Type::getInt64Ty(Ctx), true,
2098 auto *PtrTy = PointerType::get(Ctx, AS);
2099 auto *STy =
2100 StructType::get(Ctx, {PtrTy, PtrTy, PtrTy, PtrTy, PtrTy, PtrTy, PtrTy});
2101 auto *GV = new GlobalVariable(M, STy, /*isConstant=*/true,
2103 ConstantStruct::get(STy, Fields), Name, nullptr,
2105 GV->setVisibility(GlobalValue::ProtectedVisibility);
2106 return GV;
2107}
2108
2109void InstrLowerer::emitNameData() {
2110 if (ReferencedNames.empty())
2111 return;
2112
2113 std::string CompressedNameStr;
2114 if (Error E = collectPGOFuncNameStrings(ReferencedNames, CompressedNameStr,
2116 report_fatal_error(Twine(toString(std::move(E))), false);
2117 }
2118
2119 auto &Ctx = M.getContext();
2120 auto *NamesVal =
2121 ConstantDataArray::getString(Ctx, StringRef(CompressedNameStr), false);
2122 std::string NamesVarName = std::string(getInstrProfNamesVarName());
2125 std::string GPUCUIDPostfix;
2126 if (isGPUProfTarget(M)) {
2127 if (auto *GV = M.getNamedGlobal(getInstrProfNamesVarPostfixVarName())) {
2128 if (auto *Init =
2130 if (Init->isCString()) {
2131 GPUCUIDPostfix = Init->getAsCString().str();
2132 NamesVarName += GPUCUIDPostfix;
2133 NamesLinkage = GlobalValue::ExternalLinkage;
2134 NamesVisibility = GlobalValue::ProtectedVisibility;
2136 M, [GV](Constant *C) { return C->stripPointerCasts() == GV; });
2137 GV->eraseFromParent();
2138 }
2139 }
2140 }
2141 }
2142 NamesVar = new GlobalVariable(M, NamesVal->getType(), true, NamesLinkage,
2143 NamesVal, NamesVarName);
2144 NamesVar->setVisibility(NamesVisibility);
2145
2146 NamesSize = CompressedNameStr.size();
2147 setGlobalVariableLargeSection(TT, *NamesVar);
2148 std::string NamesSectionName =
2150 ? getInstrProfSectionName(IPSK_covname, TT.getObjectFormat())
2151 : getInstrProfSectionName(IPSK_name, TT.getObjectFormat());
2152 NamesVar->setSection(NamesSectionName);
2153 // On COFF, it's important to reduce the alignment down to 1 to prevent the
2154 // linker from inserting padding before the start of the names section or
2155 // between names entries.
2156 NamesVar->setAlignment(Align(1));
2157 // NamesVar is used by runtime but not referenced via relocation by other
2158 // sections. Conservatively make it linker retained.
2159 UsedVars.push_back(NamesVar);
2160
2161 for (auto *NamePtr : ReferencedNames)
2162 NamePtr->eraseFromParent();
2163
2164 // Emit the device sections struct only when this TU produced profile data, so
2165 // its section start/stop references are backed by a real section.
2166 bool HasData = llvm::any_of(ProfileDataMap,
2167 [](const auto &KV) { return KV.second.DataVar; });
2168 if (!GPUCUIDPostfix.empty() && HasData)
2169 if (GlobalVariable *GV = emitGPUOffloadSectionsStruct(M, GPUCUIDPostfix))
2170 CompilerUsedVars.push_back(GV);
2171}
2172
2173void InstrLowerer::emitVTableNames() {
2174 if (!EnableVTableValueProfiling || ReferencedVTables.empty())
2175 return;
2176
2177 // Collect the PGO names of referenced vtables and compress them.
2178 std::string CompressedVTableNames;
2179 if (Error E = collectVTableStrings(ReferencedVTables, CompressedVTableNames,
2181 report_fatal_error(Twine(toString(std::move(E))), false);
2182 }
2183
2184 auto &Ctx = M.getContext();
2185 auto *VTableNamesVal = ConstantDataArray::getString(
2186 Ctx, StringRef(CompressedVTableNames), false /* AddNull */);
2187 GlobalVariable *VTableNamesVar =
2188 new GlobalVariable(M, VTableNamesVal->getType(), true /* constant */,
2189 GlobalValue::PrivateLinkage, VTableNamesVal,
2191 VTableNamesVar->setSection(
2192 getInstrProfSectionName(IPSK_vname, TT.getObjectFormat()));
2193 VTableNamesVar->setAlignment(Align(1));
2194 // Make VTableNames linker retained.
2195 UsedVars.push_back(VTableNamesVar);
2196}
2197
2198void InstrLowerer::emitRegistration() {
2200 return;
2201
2202 // Construct the function.
2203 auto *VoidTy = Type::getVoidTy(M.getContext());
2204 auto *VoidPtrTy = PointerType::getUnqual(M.getContext());
2205 auto *Int64Ty = Type::getInt64Ty(M.getContext());
2206 auto *RegisterFTy = FunctionType::get(VoidTy, false);
2207 auto *RegisterF = Function::Create(RegisterFTy, GlobalValue::InternalLinkage,
2209 RegisterF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
2210 if (Options.NoRedZone)
2211 RegisterF->addFnAttr(Attribute::NoRedZone);
2212
2213 auto *RuntimeRegisterTy = FunctionType::get(VoidTy, VoidPtrTy, false);
2214 auto *RuntimeRegisterF =
2217
2218 IRBuilder<> IRB(BasicBlock::Create(M.getContext(), "", RegisterF));
2219 for (Value *Data : CompilerUsedVars)
2220 if (!isa<Function>(Data))
2221 // Check for addrspace cast when profiling GPU
2222 IRB.CreateCall(RuntimeRegisterF,
2223 IRB.CreatePointerBitCastOrAddrSpaceCast(Data, VoidPtrTy));
2224 for (Value *Data : UsedVars)
2225 if (Data != NamesVar && !isa<Function>(Data))
2226 IRB.CreateCall(RuntimeRegisterF,
2227 IRB.CreatePointerBitCastOrAddrSpaceCast(Data, VoidPtrTy));
2228
2229 if (NamesVar) {
2230 Type *ParamTypes[] = {VoidPtrTy, Int64Ty};
2231 auto *NamesRegisterTy =
2232 FunctionType::get(VoidTy, ArrayRef(ParamTypes), false);
2233 auto *NamesRegisterF =
2236 IRB.CreateCall(NamesRegisterF, {IRB.CreatePointerBitCastOrAddrSpaceCast(
2237 NamesVar, VoidPtrTy),
2238 IRB.getInt64(NamesSize)});
2239 }
2240
2241 IRB.CreateRetVoid();
2242}
2243
2244bool InstrLowerer::emitRuntimeHook() {
2245 // GPU profiling data is read directly by the host offload runtime. We do not
2246 // need the standard runtime hook.
2247 if (TT.isGPU())
2248 return false;
2249
2250 // We expect the linker to be invoked with -u<hook_var> flag for Linux
2251 // in which case there is no need to emit the external variable.
2252 if (TT.isOSLinux() || TT.isOSAIX())
2253 return false;
2254
2255 // If the module's provided its own runtime, we don't need to do anything.
2256 if (M.getGlobalVariable(getInstrProfRuntimeHookVarName()))
2257 return false;
2258
2259 // Declare an external variable that will pull in the runtime initialization.
2260 auto *Int32Ty = Type::getInt32Ty(M.getContext());
2261 auto *Var =
2264 Var->setVisibility(GlobalValue::HiddenVisibility);
2265
2266 if (TT.isOSBinFormatELF() && !TT.isPS()) {
2267 // Mark the user variable as used so that it isn't stripped out.
2268 CompilerUsedVars.push_back(Var);
2269 } else {
2270 // Make a function that uses it.
2274 User->addFnAttr(Attribute::NoInline);
2275 if (Options.NoRedZone)
2276 User->addFnAttr(Attribute::NoRedZone);
2277 User->setVisibility(GlobalValue::HiddenVisibility);
2278 if (TT.supportsCOMDAT())
2279 User->setComdat(M.getOrInsertComdat(User->getName()));
2280 // Explicitly mark this function as cold since it is never called.
2281 User->setEntryCount(0);
2282
2283 IRBuilder<> IRB(BasicBlock::Create(M.getContext(), "", User));
2284 auto *Load = IRB.CreateLoad(Int32Ty, Var);
2285 IRB.CreateRet(Load);
2286
2287 // Mark the function as used so that it isn't stripped out.
2288 CompilerUsedVars.push_back(User);
2289 }
2290 return true;
2291}
2292
2293void InstrLowerer::emitUses() {
2294 // The metadata sections are parallel arrays. Optimizers (e.g.
2295 // GlobalOpt/ConstantMerge) may not discard associated sections as a unit, so
2296 // we conservatively retain all unconditionally in the compiler.
2297 //
2298 // On ELF and Mach-O, the linker can guarantee the associated sections will be
2299 // retained or discarded as a unit, so llvm.compiler.used is sufficient.
2300 // Similarly on COFF, if prof data is not referenced by code we use one comdat
2301 // and ensure this GC property as well. Otherwise, we have to conservatively
2302 // make all of the sections retained by the linker.
2303 if (TT.isOSBinFormatELF() || TT.isOSBinFormatMachO() ||
2304 (TT.isOSBinFormatCOFF() && !DataReferencedByCode))
2305 appendToCompilerUsed(M, CompilerUsedVars);
2306 else
2307 appendToUsed(M, CompilerUsedVars);
2308
2309 // We do not add proper references from used metadata sections to NamesVar and
2310 // VNodesVar, so we have to be conservative and place them in llvm.used
2311 // regardless of the target,
2312 appendToUsed(M, UsedVars);
2313}
2314
2315void InstrLowerer::emitInitialization() {
2316 // Create ProfileFileName variable. Don't don't this for the
2317 // context-sensitive instrumentation lowering: This lowering is after
2318 // LTO/ThinLTO linking. Pass PGOInstrumentationGenCreateVar should
2319 // have already create the variable before LTO/ThinLTO linking.
2320 if (!IsCS)
2321 createProfileFileNameVar(M, Options.InstrProfileOutput);
2322 Function *RegisterF = M.getFunction(getInstrProfRegFuncsName());
2323 if (!RegisterF)
2324 return;
2325
2326 // Create the initialization function.
2327 auto *VoidTy = Type::getVoidTy(M.getContext());
2328 auto *F = Function::Create(FunctionType::get(VoidTy, false),
2331 F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
2332 F->addFnAttr(Attribute::NoInline);
2333 if (Options.NoRedZone)
2334 F->addFnAttr(Attribute::NoRedZone);
2335
2336 // Add the basic block and the necessary calls.
2337 IRBuilder<> IRB(BasicBlock::Create(M.getContext(), "", F));
2338 IRB.CreateCall(RegisterF, {});
2339 IRB.CreateRetVoid();
2340
2341 appendToGlobalCtors(M, F, 0);
2342}
2343
2344namespace llvm {
2345// Create the variable for profile sampling.
2348 IntegerType *SamplingVarTy;
2349 Constant *ValueZero;
2350 if (getSampledInstrumentationConfig().UseShort) {
2351 SamplingVarTy = Type::getInt16Ty(M.getContext());
2352 ValueZero = Constant::getIntegerValue(SamplingVarTy, APInt(16, 0));
2353 } else {
2354 SamplingVarTy = Type::getInt32Ty(M.getContext());
2355 ValueZero = Constant::getIntegerValue(SamplingVarTy, APInt(32, 0));
2356 }
2357 auto SamplingVar = new GlobalVariable(
2358 M, SamplingVarTy, false, GlobalValue::WeakAnyLinkage, ValueZero, VarName);
2359 SamplingVar->setVisibility(GlobalValue::DefaultVisibility);
2360 SamplingVar->setThreadLocal(true);
2361 Triple TT(M.getTargetTriple());
2362 if (TT.supportsCOMDAT()) {
2363 SamplingVar->setLinkage(GlobalValue::ExternalLinkage);
2364 SamplingVar->setComdat(M.getOrInsertComdat(VarName));
2365 }
2366 appendToCompilerUsed(M, SamplingVar);
2367}
2368} // namespace llvm
2369
2370// For GPU targets: Allocate contiguous arrays for all profile data.
2371// This solves the linker reordering problem by using ONE symbol per section
2372// type, so there's nothing for the linker to reorder.
2373StructType *InstrLowerer::getProfileDataTy() {
2374 if (ProfileDataTy)
2375 return ProfileDataTy;
2376
2377 auto &Ctx = M.getContext();
2378 auto *IntPtrTy = M.getDataLayout().getIntPtrType(M.getContext());
2379 auto *Int16Ty = Type::getInt16Ty(Ctx);
2380 auto *Int16ArrayTy = ArrayType::get(Int16Ty, IPVK_Last + 1);
2381 Type *DataTypes[] = {
2382#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) LLVMType,
2384 };
2385 ProfileDataTy = StructType::get(Ctx, ArrayRef(DataTypes));
2386 return ProfileDataTy;
2387}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_ABI
Definition Compiler.h:215
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static unsigned InstrCount
DXIL Finalize Linkage
@ Default
Hexagon Hardware Loops
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
#define INSTR_PROF_QUOTE(x)
#define INSTR_PROF_DATA_ALIGNMENT
#define INSTR_PROF_PROFILE_SET_TIMESTAMP
#define INSTR_PROF_PROFILE_SAMPLING_VAR
static bool shouldRecordVTableAddr(GlobalVariable *GV)
static bool shouldRecordFunctionAddr(Function *F)
static bool needsRuntimeHookUnconditionally(const Triple &TT)
static bool containsProfilingIntrinsics(Module &M)
Check if the module contains uses of any profiling intrinsics.
static std::string getVarName(InstrProfInstBase *Inc, StringRef Prefix, bool &Renamed)
Get the name of a profiling variable for a particular function.
#define INSTR_PROF_MIN_VAL_COUNTS
static Constant * getFuncAddrForProfData(Function *Fn)
static bool shouldUsePublicSymbol(Function *Fn)
static FunctionCallee getOrInsertValueProfilingCall(Module &M, const TargetLibraryInfo &TLI, ValueProfilingCallType CallType=ValueProfilingCallType::Default)
static Constant * getVTableAddrForProfData(GlobalVariable *GV)
static void doAtomicCheck(Function *F)
static GlobalVariable * emitGPUOffloadSectionsStruct(Module &M, StringRef CUIDPostfix)
static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT)
This file provides the interface for LLVM's PGO Instrumentation lowering pass.
static LVOptions Options
Definition LVOptions.cpp:25
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
Memory SSA
Definition MemorySSA.cpp:72
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
FunctionAnalysisManager FAM
if(PassOpts->AAPipeline)
SmallPtrSet< BasicBlock *, 0 > BlockSet
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:119
Class for arbitrary precision integers.
Definition APInt.h:78
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Annotations lets you mark points and ranges inside source code, for tests:
Definition Annotations.h:67
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
@ Add
*p = old + v
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:474
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:461
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
const Instruction & front() const
Definition BasicBlock.h:484
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Analysis providing branch probability information.
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
This class represents a function call, abstracting a target machine's calling convention.
@ NoDeduplicate
No deduplication is performed.
Definition Comdat.h:40
ConstantArray - Constant Array Declarations.
Definition Constants.h:590
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:537
static LLVM_ABI Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true, bool ByteString=false)
This method constructs a CDS and initializes it with a text string.
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static LLVM_ABI Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constant.h:64
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:151
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition Function.h:168
const BasicBlock & getEntryBlock() const
Definition Function.h:783
DISubprogram * getSubprogram() const
Get the attached subprogram.
const Function & getFunction() const
Definition Function.h:166
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:353
static LLVM_ABI GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition Globals.cpp:621
bool hasMetadata() const
Return true if this GlobalObject has any metadata attached to it.
LLVM_ABI void setComdat(Comdat *C)
Definition Globals.cpp:223
bool hasComdat() const
LLVM_ABI void setSection(StringRef S)
Change the section for this global.
Definition Globals.cpp:284
bool hasLinkOnceLinkage() const
VisibilityTypes getVisibility() const
static bool isLocalLinkage(LinkageTypes Linkage)
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition Globals.cpp:337
LinkageTypes getLinkage() const
bool hasLocalLinkage() const
bool hasPrivateLinkage() const
void setLinkage(LinkageTypes LT)
bool isDeclarationForLinker() const
Module * getParent()
Get the module that this global value is contained inside of...
VisibilityTypes
An enumeration for the kinds of visibility of global values.
Definition GlobalValue.h:67
@ DefaultVisibility
The GV is visible.
Definition GlobalValue.h:68
@ HiddenVisibility
The GV is hidden.
Definition GlobalValue.h:69
@ ProtectedVisibility
The GV is protected.
Definition GlobalValue.h:70
void setVisibility(VisibilityTypes V)
static bool isWeakForLinker(LinkageTypes Linkage)
Whether the definition of this global may be replaced at link time.
bool hasAvailableExternallyLinkage() const
LinkageTypes
An enumeration for the kinds of linkage for global values.
Definition GlobalValue.h:52
@ PrivateLinkage
Like Internal, but omit from symbol table.
Definition GlobalValue.h:61
@ InternalLinkage
Rename collisions when linking (static functions).
Definition GlobalValue.h:60
@ ExternalLinkage
Externally visible function.
Definition GlobalValue.h:53
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
Definition GlobalValue.h:57
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Definition GlobalValue.h:56
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
LLVM_ABI uint64_t getGlobalSize(const DataLayout &DL) const
Get the size of this global variable in bytes.
Definition Globals.cpp:569
LLVM_ABI void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition Globals.cpp:538
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalVariable.
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition IRBuilder.h:2128
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2227
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition IRBuilder.h:1532
ConstantInt * getInt8(uint8_t C)
Get a constant 8-bit value.
Definition IRBuilder.h:467
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition IRBuilder.h:2081
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition IRBuilder.h:539
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2291
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2368
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition IRBuilder.h:477
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition IRBuilder.h:1906
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1511
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:1570
Value * CreateConstInBoundsGEP2_32(Type *Ty, Value *Ptr, unsigned Idx0, unsigned Idx1, const Twine &Name="")
Definition IRBuilder.h:2035
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition IRBuilder.h:1919
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1422
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2222
Value * CreateIsNotNull(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg != 0.
Definition IRBuilder.h:2697
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2543
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition IRBuilder.h:2096
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:577
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:181
Value * CreateInBoundsPtrAdd(Value *Ptr, Value *Offset, const Twine &Name="")
Definition IRBuilder.h:2086
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1592
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System, bool Elementwise=false)
Definition IRBuilder.h:1970
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2848
A base class for all instrprof counter intrinsics.
LLVM_ABI ConstantInt * getIndex() const
LLVM_ABI ConstantInt * getNumCounters() const
static LLVM_ABI const char * FunctionNameAttributeName
static LLVM_ABI const char * CFGHashAttributeName
static LLVM_ABI const char * NumCountersAttributeName
This represents the llvm.instrprof.cover intrinsic.
This represents the llvm.instrprof.increment intrinsic.
LLVM_ABI Value * getStep() const
A base class for all instrprof intrinsics.
GlobalVariable * getName() const
ConstantInt * getHash() const
A base class for instrprof mcdc intrinsics that require global bitmap bytes.
This represents the llvm.instrprof.mcdc.tvbitmap.update intrinsic.
ConstantInt * getBitmapIndex() const
This represents the llvm.instrprof.timestamp intrinsic.
This represents the llvm.instrprof.value.profile intrinsic.
ConstantInt * getIndex() const
ConstantInt * getValueKind() const
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
Helper class for promoting a collection of loads and stores into SSA Form using the SSAUpdater.
Definition SSAUpdater.h:149
An instruction for reading from memory.
void getExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all of the successor blocks of this loop.
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
bool hasDedicatedExits() const
Return true if no exit block for the loop has a predecessor that is outside the loop.
SmallVector< LoopT *, 4 > getLoopsInPreorder() const
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
LLVM_ABI MDNode * createUnlikelyBranchWeights()
Return metadata containing two branch weights, with significant bias towards false destination.
Definition MDBuilder.cpp:48
Metadata node.
Definition Metadata.h:1069
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1561
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:614
Root of the metadata hierarchy.
Definition Metadata.h:64
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:591
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:479
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:310
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:309
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:282
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:307
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
Definition Type.cpp:308
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:130
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
LLVM_ABI const Value * stripInBoundsOffsets(function_ref< void(const Value *)> Func=[](const Value *) {}) const
Strip off pointer casts and inbounds GEPs.
Definition Value.cpp:828
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:319
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getDeclarationIfExists(const Module *M, ID id)
Look up the Function declaration of the intrinsic id in the Module M and return it if it exists.
constexpr bool isAtomic(const T &...O)
Definition SIDefines.h:380
@ PD
PD - Prefix code for packed double precision vector floating point operations performed in the SSE re...
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
DXILDebugInfoMap run(Module &M)
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
FunctionAddr NumBitmapBytes
Definition InstrProf.h:95
StringRef getInstrProfNameVarPrefix()
Return the name prefix of variables containing instrumented function names.
Definition InstrProf.h:131
StringRef getInstrProfRuntimeHookVarName()
Return the name of the hook variable defined in profile runtime library.
Definition InstrProf.h:206
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:328
LLVM_ABI void createProfileSamplingVar(Module &M)
StringRef getInstrProfBitmapVarPrefix()
Return the name prefix of profile bitmap variables.
Definition InstrProf.h:143
LLVM_ABI cl::opt< bool > DoInstrProfNameCompression
FuncHash
Definition InstrProf.h:78
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
FunctionAddr Int16ArrayTy
Definition InstrProf.h:93
FunctionAddr NumCounters
Definition InstrProf.h:91
StringRef getInstrProfVTableNamesVarName()
Definition InstrProf.h:159
StringRef getInstrProfDataVarPrefix()
Return the name prefix of variables containing per-function control data.
Definition InstrProf.h:137
StringRef getCoverageUnusedNamesVarName()
Return the name of the internal variable recording the array of PGO name vars referenced by the cover...
Definition InstrProf.h:172
LLVM_ABI std::string getInstrProfSectionName(InstrProfSectKind IPSK, Triple::ObjectFormatType OF, bool AddSegmentInfo=true)
Return the name of the profile section corresponding to IPSK.
LLVM_ABI bool needsComdatForCounter(const GlobalObject &GV, const Module &M)
Check if we can use Comdat for profile variables.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
LLVM_ABI std::string getPGOName(const GlobalVariable &V, bool InLTO=false)
StringRef getInstrProfInitFuncName()
Return the name of the runtime initialization method that is generated by the compiler.
Definition InstrProf.h:201
StringRef getInstrProfValuesVarPrefix()
Return the name prefix of value profile variables.
Definition InstrProf.h:146
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
StringRef getInstrProfCounterBiasVarName()
Definition InstrProf.h:216
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
StringRef getInstrProfRuntimeHookVarUseFuncName()
Return the name of the compiler generated function that references the runtime hook variable.
Definition InstrProf.h:212
StringRef getInstrProfRegFuncsName()
Return the name of function that registers all the per-function control data at program startup time ...
Definition InstrProf.h:181
LLVM_ABI Error collectPGOFuncNameStrings(ArrayRef< GlobalVariable * > NameVars, std::string &Result, bool doCompression=true)
Produce Result string with the same format described above.
InstrProfSectKind
Definition InstrProf.h:91
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
StringRef getInstrProfCountersVarPrefix()
Return the name prefix of profile counter variables.
Definition InstrProf.h:140
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1753
inst_range instructions(Function *F)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
LLVM_ABI StringRef getPGOFuncNameVarInitializer(GlobalVariable *NameVar)
Return the initializer in string of the PGO name var NameVar.
StringRef getInstrProfBitmapBiasVarName()
Definition InstrProf.h:220
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
StringRef getInstrProfValueProfMemOpFuncName()
Return the name profile runtime entry point to do memop size value profiling.
Definition InstrProf.h:118
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI void removeFromUsedLists(Module &M, function_ref< bool(Constant *)> ShouldRemove)
Removes global values from the llvm.used and llvm.compiler.used arrays.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
StringRef getInstrProfNamesRegFuncName()
Return the name of the runtime interface that registers the PGO name strings.
Definition InstrProf.h:193
LLVM_ABI void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
@ Add
Sum of integers.
LLVM_ABI Error collectVTableStrings(ArrayRef< GlobalVariable * > VTables, std::string &Result, bool doCompression)
LLVM_ABI void setGlobalVariableLargeSection(const Triple &TargetTriple, GlobalVariable &GV)
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
LLVM_ABI bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken=false)
Check if we can safely rename this Comdat function.
LLVM_ABI void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput)
StringRef getInstrProfNamesVarPostfixVarName()
Definition InstrProf.h:155
LLVM_ABI void appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data=nullptr)
Append F to the list of global ctors of module M with the given Priority.
LLVM_ABI bool isPresplitCoroSuspendExitEdge(const BasicBlock &Src, const BasicBlock &Dest)
Definition CFG.cpp:424
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto predecessors(const MachineBasicBlock *BB)
StringRef getInstrProfValueProfFuncName()
Return the name profile runtime entry point to do value profiling for a given site.
Definition InstrProf.h:112
llvm::cl::opt< llvm::InstrProfCorrelator::ProfCorrelatorKind > ProfileCorrelate
StringRef getInstrProfRegFuncName()
Return the name of the runtime interface that registers per-function control data for one instrumente...
Definition InstrProf.h:187
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI void appendToUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.used list.
StringRef getInstrProfNamesVarName()
Return the name of the variable holding the strings (possibly compressed) of all function's PGO names...
Definition InstrProf.h:153
LLVM_ABI bool isGPUProfTarget(const Module &M)
Determines whether module targets a GPU eligable for PGO instrumentation.
LLVM_ABI bool isIRPGOFlagSet(const Module *M)
Check if INSTR_PROF_RAW_VERSION_VAR is defined.
StringRef getInstrProfVNodesVarName()
Return the name of value profile node array variables:
Definition InstrProf.h:149
StringRef toStringRef(bool B)
Construct a string ref from a boolean.
cl::opt< bool > EnableVTableValueProfiling("enable-vtable-value-profiling", cl::init(false), cl::desc("If true, the virtual table address will be instrumented to know " "the types of a C++ pointer. The information is used in indirect " "call promotion to do selective vtable-based comparison."))
@ Extern
Replace returns with jump to thunk, don't emit thunk.
Definition CodeGen.h:163
StringRef getInstrProfVTableVarPrefix()
Return the name prefix of variables containing virtual table profile data.
Definition InstrProf.h:134
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
#define NC
Definition regutils.h:42
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl)
Get the libcall routine name for the specified libcall implementation.