LLVM  16.0.0git
InstrOrderFile.cpp
Go to the documentation of this file.
1 //===- InstrOrderFile.cpp ---- Late IR instrumentation for order file ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //===----------------------------------------------------------------------===//
10 
12 #include "llvm/IR/Constants.h"
13 #include "llvm/IR/Function.h"
14 #include "llvm/IR/GlobalValue.h"
15 #include "llvm/IR/IRBuilder.h"
16 #include "llvm/IR/Instructions.h"
17 #include "llvm/IR/Module.h"
18 #include "llvm/InitializePasses.h"
19 #include "llvm/Pass.h"
20 #include "llvm/PassRegistry.h"
26 #include <fstream>
27 #include <mutex>
28 #include <sstream>
29 
30 using namespace llvm;
31 #define DEBUG_TYPE "instrorderfile"
32 
34  "orderfile-write-mapping", cl::init(""),
35  cl::desc(
36  "Dump functions and their MD5 hash to deobfuscate profile data"),
37  cl::Hidden);
38 
39 namespace {
40 
41 // We need a global bitmap to tell if a function is executed. We also
42 // need a global variable to save the order of functions. We can use a
43 // fixed-size buffer that saves the MD5 hash of the function. We need
44 // a global variable to save the index into the buffer.
45 
46 std::mutex MappingMutex;
47 
48 struct InstrOrderFile {
49 private:
50  GlobalVariable *OrderFileBuffer;
51  GlobalVariable *BufferIdx;
52  GlobalVariable *BitMap;
53  ArrayType *BufferTy;
54  ArrayType *MapTy;
55 
56 public:
57  InstrOrderFile() = default;
58 
59  void createOrderFileData(Module &M) {
60  LLVMContext &Ctx = M.getContext();
61  int NumFunctions = 0;
62  for (Function &F : M) {
63  if (!F.isDeclaration())
64  NumFunctions++;
65  }
66 
67  BufferTy =
68  ArrayType::get(Type::getInt64Ty(Ctx), INSTR_ORDER_FILE_BUFFER_SIZE);
69  Type *IdxTy = Type::getInt32Ty(Ctx);
70  MapTy = ArrayType::get(Type::getInt8Ty(Ctx), NumFunctions);
71 
72  // Create the global variables.
73  std::string SymbolName = INSTR_PROF_ORDERFILE_BUFFER_NAME_STR;
74  OrderFileBuffer = new GlobalVariable(M, BufferTy, false, GlobalValue::LinkOnceODRLinkage,
76  Triple TT = Triple(M.getTargetTriple());
77  OrderFileBuffer->setSection(
78  getInstrProfSectionName(IPSK_orderfile, TT.getObjectFormat()));
79 
80  std::string IndexName = INSTR_PROF_ORDERFILE_BUFFER_IDX_NAME_STR;
81  BufferIdx = new GlobalVariable(M, IdxTy, false, GlobalValue::LinkOnceODRLinkage,
82  Constant::getNullValue(IdxTy), IndexName);
83 
84  std::string BitMapName = "bitmap_0";
85  BitMap = new GlobalVariable(M, MapTy, false, GlobalValue::PrivateLinkage,
86  Constant::getNullValue(MapTy), BitMapName);
87  }
88 
89  // Generate the code sequence in the entry block of each function to
90  // update the buffer.
91  void generateCodeSequence(Module &M, Function &F, int FuncId) {
92  if (!ClOrderFileWriteMapping.empty()) {
93  std::lock_guard<std::mutex> LogLock(MappingMutex);
94  std::error_code EC;
97  if (EC) {
98  report_fatal_error(Twine("Failed to open ") + ClOrderFileWriteMapping +
99  " to save mapping file for order file instrumentation\n");
100  } else {
101  std::stringstream stream;
102  stream << std::hex << MD5Hash(F.getName());
103  std::string singleLine = "MD5 " + stream.str() + " " +
104  std::string(F.getName()) + '\n';
105  OS << singleLine;
106  }
107  }
108 
109  BasicBlock *OrigEntry = &F.getEntryBlock();
110 
111  LLVMContext &Ctx = M.getContext();
113  IntegerType *Int8Ty = Type::getInt8Ty(Ctx);
114 
115  // Create a new entry block for instrumentation. We will check the bitmap
116  // in this basic block.
117  BasicBlock *NewEntry =
118  BasicBlock::Create(M.getContext(), "order_file_entry", &F, OrigEntry);
119  IRBuilder<> entryB(NewEntry);
120  // Create a basic block for updating the circular buffer.
121  BasicBlock *UpdateOrderFileBB =
122  BasicBlock::Create(M.getContext(), "order_file_set", &F, OrigEntry);
123  IRBuilder<> updateB(UpdateOrderFileBB);
124 
125  // Check the bitmap, if it is already 1, do nothing.
126  // Otherwise, set the bit, grab the index, update the buffer.
127  Value *IdxFlags[] = {ConstantInt::get(Int32Ty, 0),
129  Value *MapAddr = entryB.CreateGEP(MapTy, BitMap, IdxFlags, "");
130  LoadInst *loadBitMap = entryB.CreateLoad(Int8Ty, MapAddr, "");
131  entryB.CreateStore(ConstantInt::get(Int8Ty, 1), MapAddr);
132  Value *IsNotExecuted =
133  entryB.CreateICmpEQ(loadBitMap, ConstantInt::get(Int8Ty, 0));
134  entryB.CreateCondBr(IsNotExecuted, UpdateOrderFileBB, OrigEntry);
135 
136  // Fill up UpdateOrderFileBB: grab the index, update the buffer!
137  Value *IdxVal = updateB.CreateAtomicRMW(
140  // We need to wrap around the index to fit it inside the buffer.
141  Value *WrappedIdx = updateB.CreateAnd(
142  IdxVal, ConstantInt::get(Int32Ty, INSTR_ORDER_FILE_BUFFER_MASK));
143  Value *BufferGEPIdx[] = {ConstantInt::get(Int32Ty, 0), WrappedIdx};
144  Value *BufferAddr =
145  updateB.CreateGEP(BufferTy, OrderFileBuffer, BufferGEPIdx, "");
146  updateB.CreateStore(ConstantInt::get(Type::getInt64Ty(Ctx), MD5Hash(F.getName())),
147  BufferAddr);
148  updateB.CreateBr(OrigEntry);
149  }
150 
151  bool run(Module &M) {
152  createOrderFileData(M);
153 
154  int FuncId = 0;
155  for (Function &F : M) {
156  if (F.isDeclaration())
157  continue;
158  generateCodeSequence(M, F, FuncId);
159  ++FuncId;
160  }
161 
162  return true;
163  }
164 
165 }; // End of InstrOrderFile struct
166 } // End anonymous namespace
167 
170  if (InstrOrderFile().run(M))
171  return PreservedAnalyses::none();
172  return PreservedAnalyses::all();
173 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
Instrumentation.h
Int32Ty
IntegerType * Int32Ty
Definition: NVVMIntrRange.cpp:67
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::ARM::PredBlockMask::TT
@ TT
FileSystem.h
llvm::Function
Definition: Function.h:60
Pass.h
llvm::IRBuilder<>
llvm::GlobalVariable
Definition: GlobalVariable.h:39
llvm::AtomicOrdering::SequentiallyConsistent
@ SequentiallyConsistent
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:155
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Module.h
llvm::ArrayType
Class to represent array types.
Definition: DerivedTypes.h:357
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:237
PassRegistry.h
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
CommandLine.h
llvm::InstrOrderFilePass::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: InstrOrderFile.cpp:169
GlobalValue.h
ClOrderFileWriteMapping
static cl::opt< std::string > ClOrderFileWriteMapping("orderfile-write-mapping", cl::init(""), cl::desc("Dump functions and their MD5 hash to deobfuscate profile data"), cl::Hidden)
Constants.h
llvm::GlobalObject::setSection
void setSection(StringRef S)
Change the section for this global.
Definition: Globals.cpp:248
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
InstrProf.h
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:879
llvm::AtomicRMWInst::Add
@ Add
*p = old + v
Definition: Instructions.h:731
llvm::dxil::PointerTypeAnalysis::run
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Definition: PointerTypeAnalysis.cpp:101
llvm::cl::opt
Definition: CommandLine.h:1400
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
FuncId
Profile::FuncID FuncId
Definition: Profile.cpp:321
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:440
IRBuilder.h
llvm::codeview::CompileSym2Flags::EC
@ EC
llvm::ArrayType::get
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:638
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::BasicBlock::Create
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:97
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:174
llvm::raw_fd_ostream
A raw_ostream that writes to a file descriptor.
Definition: raw_ostream.h:440
llvm::AMDGPU::HSAMD::Kernel::Key::SymbolName
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
Definition: AMDGPUMetadata.h:386
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
llvm::Type::getInt64Ty
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:240
llvm::Constant::getNullValue
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:350
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
Function.h
llvm::getInstrProfSectionName
std::string getInstrProfSectionName(InstrProfSectKind IPSK, Triple::ObjectFormatType OF, bool AddSegmentInfo=true)
Return the name of the profile section corresponding to IPSK.
Definition: InstrProf.cpp:212
llvm::GlobalValue::PrivateLinkage
@ PrivateLinkage
Like Internal, but omit from symbol table.
Definition: GlobalValue.h:56
Instructions.h
llvm::sys::fs::OF_Append
@ OF_Append
The file should be opened in append mode.
Definition: FileSystem.h:773
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
InstrOrderFile.h
llvm::cl::desc
Definition: CommandLine.h:413
raw_ostream.h
llvm::GlobalValue::LinkOnceODRLinkage
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Definition: GlobalValue.h:51
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::MD5Hash
uint64_t MD5Hash(StringRef Str)
Helper to compute and return lower 64 bits of the given string's MD5 hash.
Definition: MD5.h:109