LLVM 20.0.0git
StructuralHash.cpp
Go to the documentation of this file.
1//===-- StructuralHash.cpp - IR Hashing -------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/Hashing.h"
11#include "llvm/IR/Function.h"
13#include "llvm/IR/InstrTypes.h"
16#include "llvm/IR/Module.h"
17
18using namespace llvm;
19
20namespace {
21
22// Basic hashing mechanism to detect structural change to the IR, used to verify
23// pass return status consistency with actual change. In addition to being used
24// by the MergeFunctions pass.
25
26class StructuralHashImpl {
27 uint64_t Hash;
28
29 void hash(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }
30
31 // This will produce different values on 32-bit and 64-bit systens as
32 // hash_combine returns a size_t. However, this is only used for
33 // detailed hashing which, in-tree, only needs to distinguish between
34 // differences in functions.
35 template <typename T> void hashArbitaryType(const T &V) {
37 }
38
39 void hashType(Type *ValueType) {
40 hash(ValueType->getTypeID());
41 if (ValueType->isIntegerTy())
42 hash(ValueType->getIntegerBitWidth());
43 }
44
45public:
46 StructuralHashImpl() : Hash(4) {}
47
48 void updateOperand(Value *Operand) {
49 hashType(Operand->getType());
50
51 // The cases enumerated below are not exhaustive and are only aimed to
52 // get decent coverage over the function.
53 if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(Operand)) {
54 hashArbitaryType(ConstInt->getValue());
55 } else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(Operand)) {
56 hashArbitaryType(ConstFP->getValue());
57 } else if (Argument *Arg = dyn_cast<Argument>(Operand)) {
58 hash(Arg->getArgNo());
59 } else if (Function *Func = dyn_cast<Function>(Operand)) {
60 // Hashing the name will be deterministic as LLVM's hashing infrastructure
61 // has explicit support for hashing strings and will not simply hash
62 // the pointer.
63 hashArbitaryType(Func->getName());
64 }
65 }
66
67 void updateInstruction(const Instruction &Inst, bool DetailedHash) {
68 hash(Inst.getOpcode());
69
70 if (!DetailedHash)
71 return;
72
73 hashType(Inst.getType());
74
75 // Handle additional properties of specific instructions that cause
76 // semantic differences in the IR.
77 if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst))
78 hash(ComparisonInstruction->getPredicate());
79
80 for (const auto &Op : Inst.operands())
82 }
83
84 // A function hash is calculated by considering only the number of arguments
85 // and whether a function is varargs, the order of basic blocks (given by the
86 // successors of each basic block in depth first order), and the order of
87 // opcodes of each instruction within each of these basic blocks. This mirrors
88 // the strategy FunctionComparator::compare() uses to compare functions by
89 // walking the BBs in depth first order and comparing each instruction in
90 // sequence. Because this hash currently does not look at the operands, it is
91 // insensitive to things such as the target of calls and the constants used in
92 // the function, which makes it useful when possibly merging functions which
93 // are the same modulo constants and call targets.
94 //
95 // Note that different users of StructuralHash will want different behavior
96 // out of it (i.e., MergeFunctions will want something different from PM
97 // expensive checks for pass modification status). When modifying this
98 // function, most changes should be gated behind an option and enabled
99 // selectively.
100 void update(const Function &F, bool DetailedHash) {
101 // Declarations don't affect analyses.
102 if (F.isDeclaration())
103 return;
104
105 hash(0x62642d6b6b2d6b72); // Function header
106
107 hash(F.isVarArg());
108 hash(F.arg_size());
109
112
113 // Walk the blocks in the same order as
114 // FunctionComparator::cmpBasicBlocks(), accumulating the hash of the
115 // function "structure." (BB and opcode sequence)
116 BBs.push_back(&F.getEntryBlock());
117 VisitedBBs.insert(BBs[0]);
118 while (!BBs.empty()) {
119 const BasicBlock *BB = BBs.pop_back_val();
120
121 // This random value acts as a block header, as otherwise the partition of
122 // opcodes into BBs wouldn't affect the hash, only the order of the
123 // opcodes
124 hash(45798);
125 for (auto &Inst : *BB)
126 updateInstruction(Inst, DetailedHash);
127
128 for (const BasicBlock *Succ : successors(BB))
129 if (VisitedBBs.insert(Succ).second)
130 BBs.push_back(Succ);
131 }
132 }
133
134 void update(const GlobalVariable &GV) {
135 // Declarations and used/compiler.used don't affect analyses.
136 // Since there are several `llvm.*` metadata, like `llvm.embedded.object`,
137 // we ignore anything with the `.llvm` prefix
138 if (GV.isDeclaration() || GV.getName().starts_with("llvm."))
139 return;
140 hash(23456); // Global header
141 hash(GV.getValueType()->getTypeID());
142 }
143
144 void update(const Module &M, bool DetailedHash) {
145 for (const GlobalVariable &GV : M.globals())
146 update(GV);
147 for (const Function &F : M)
148 update(F, DetailedHash);
149 }
150
151 uint64_t getHash() const { return Hash; }
152};
153
154} // namespace
155
156IRHash llvm::StructuralHash(const Function &F, bool DetailedHash) {
157 StructuralHashImpl H;
158 H.update(F, DetailedHash);
159 return H.getHash();
160}
161
162IRHash llvm::StructuralHash(const Module &M, bool DetailedHash) {
163 StructuralHashImpl H;
164 H.update(M, DetailedHash);
165 return H.getHash();
166}
static bool updateOperand(Instruction *Inst, unsigned Idx, Instruction *Mat)
Updates the operand at Idx in instruction Inst with the result of instruction Mat.
#define F(x, y, z)
Definition: MD5.cpp:55
#define H(x, y, z)
Definition: MD5.cpp:57
Module.h This file contains the declarations for the Module class.
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:269
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
This class represents an Operation in the Expression.
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:290
Type * getValueType() const
Definition: GlobalValue.h:296
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:368
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:503
bool empty() const
Definition: SmallVector.h:95
void push_back(const T &Elt)
Definition: SmallVector.h:427
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1210
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:250
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
op_range operands()
Definition: User.h:242
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
uint64_t hash_16_bytes(uint64_t low, uint64_t high)
Definition: Hashing.h:170
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto successors(const MachineBasicBlock *BB)
IRHash StructuralHash(const Function &F, bool DetailedHash=false)
Returns a hash of the function F.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition: Hashing.h:593