LLVM  15.0.0git
GlobalSplit.cpp
Go to the documentation of this file.
1 //===- GlobalSplit.cpp - global variable splitter -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass uses inrange annotations on GEP indices to split globals where
10 // beneficial. Clang currently attaches these annotations to references to
11 // virtual table globals under the Itanium ABI for the benefit of the
12 // whole-program virtual call optimization and control flow integrity passes.
13 //
14 //===----------------------------------------------------------------------===//
15 
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/StringExtras.h"
19 #include "llvm/IR/Constant.h"
20 #include "llvm/IR/Constants.h"
21 #include "llvm/IR/DataLayout.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/GlobalValue.h"
24 #include "llvm/IR/GlobalVariable.h"
25 #include "llvm/IR/Intrinsics.h"
26 #include "llvm/IR/LLVMContext.h"
27 #include "llvm/IR/Metadata.h"
28 #include "llvm/IR/Module.h"
29 #include "llvm/IR/Operator.h"
30 #include "llvm/IR/Type.h"
31 #include "llvm/IR/User.h"
32 #include "llvm/InitializePasses.h"
33 #include "llvm/Pass.h"
34 #include "llvm/Support/Casting.h"
35 #include "llvm/Transforms/IPO.h"
36 #include <cstdint>
37 #include <vector>
38 
39 using namespace llvm;
40 
41 static bool splitGlobal(GlobalVariable &GV) {
42  // If the address of the global is taken outside of the module, we cannot
43  // apply this transformation.
44  if (!GV.hasLocalLinkage())
45  return false;
46 
47  // We currently only know how to split ConstantStructs.
48  auto *Init = dyn_cast_or_null<ConstantStruct>(GV.getInitializer());
49  if (!Init)
50  return false;
51 
52  // Verify that each user of the global is an inrange getelementptr constant.
53  // From this it follows that any loads from or stores to that global must use
54  // a pointer derived from an inrange getelementptr constant, which is
55  // sufficient to allow us to apply the splitting transform.
56  for (User *U : GV.users()) {
57  if (!isa<Constant>(U))
58  return false;
59 
60  auto *GEP = dyn_cast<GEPOperator>(U);
61  if (!GEP || !GEP->getInRangeIndex() || *GEP->getInRangeIndex() != 1 ||
62  !isa<ConstantInt>(GEP->getOperand(1)) ||
63  !cast<ConstantInt>(GEP->getOperand(1))->isZero() ||
64  !isa<ConstantInt>(GEP->getOperand(2)))
65  return false;
66  }
67 
69  GV.getMetadata(LLVMContext::MD_type, Types);
70 
71  const DataLayout &DL = GV.getParent()->getDataLayout();
72  const StructLayout *SL = DL.getStructLayout(Init->getType());
73 
75 
76  std::vector<GlobalVariable *> SplitGlobals(Init->getNumOperands());
77  for (unsigned I = 0; I != Init->getNumOperands(); ++I) {
78  // Build a global representing this split piece.
79  auto *SplitGV =
80  new GlobalVariable(*GV.getParent(), Init->getOperand(I)->getType(),
82  Init->getOperand(I), GV.getName() + "." + utostr(I));
83  SplitGlobals[I] = SplitGV;
84 
85  unsigned SplitBegin = SL->getElementOffset(I);
86  unsigned SplitEnd = (I == Init->getNumOperands() - 1)
87  ? SL->getSizeInBytes()
88  : SL->getElementOffset(I + 1);
89 
90  // Rebuild type metadata, adjusting by the split offset.
91  // FIXME: See if we can use DW_OP_piece to preserve debug metadata here.
92  for (MDNode *Type : Types) {
93  uint64_t ByteOffset = cast<ConstantInt>(
94  cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
95  ->getZExtValue();
96  // Type metadata may be attached one byte after the end of the vtable, for
97  // classes without virtual methods in Itanium ABI. AFAIK, it is never
98  // attached to the first byte of a vtable. Subtract one to get the right
99  // slice.
100  // This is making an assumption that vtable groups are the only kinds of
101  // global variables that !type metadata can be attached to, and that they
102  // are either Itanium ABI vtable groups or contain a single vtable (i.e.
103  // Microsoft ABI vtables).
104  uint64_t AttachedTo = (ByteOffset == 0) ? ByteOffset : ByteOffset - 1;
105  if (AttachedTo < SplitBegin || AttachedTo >= SplitEnd)
106  continue;
107  SplitGV->addMetadata(
108  LLVMContext::MD_type,
109  *MDNode::get(GV.getContext(),
110  {ConstantAsMetadata::get(
111  ConstantInt::get(Int32Ty, ByteOffset - SplitBegin)),
112  Type->getOperand(1)}));
113  }
114 
115  if (GV.hasMetadata(LLVMContext::MD_vcall_visibility))
116  SplitGV->setVCallVisibilityMetadata(GV.getVCallVisibility());
117  }
118 
119  for (User *U : GV.users()) {
120  auto *GEP = cast<GEPOperator>(U);
121  unsigned I = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue();
122  if (I >= SplitGlobals.size())
123  continue;
124 
126  Ops.push_back(ConstantInt::get(Int32Ty, 0));
127  for (unsigned I = 3; I != GEP->getNumOperands(); ++I)
128  Ops.push_back(GEP->getOperand(I));
129 
130  auto *NewGEP = ConstantExpr::getGetElementPtr(
131  SplitGlobals[I]->getInitializer()->getType(), SplitGlobals[I], Ops,
132  GEP->isInBounds());
133  GEP->replaceAllUsesWith(NewGEP);
134  }
135 
136  // Finally, remove the original global. Any remaining uses refer to invalid
137  // elements of the global, so replace with undef.
138  if (!GV.use_empty())
140  GV.eraseFromParent();
141  return true;
142 }
143 
144 static bool splitGlobals(Module &M) {
145  // First, see if the module uses either of the llvm.type.test or
146  // llvm.type.checked.load intrinsics, which indicates that splitting globals
147  // may be beneficial.
148  Function *TypeTestFunc =
149  M.getFunction(Intrinsic::getName(Intrinsic::type_test));
150  Function *TypeCheckedLoadFunc =
151  M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load));
152  if ((!TypeTestFunc || TypeTestFunc->use_empty()) &&
153  (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()))
154  return false;
155 
156  bool Changed = false;
157  for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals()))
158  Changed |= splitGlobal(GV);
159  return Changed;
160 }
161 
162 namespace {
163 
164 struct GlobalSplit : public ModulePass {
165  static char ID;
166 
167  GlobalSplit() : ModulePass(ID) {
169  }
170 
171  bool runOnModule(Module &M) override {
172  if (skipModule(M))
173  return false;
174 
175  return splitGlobals(M);
176  }
177 };
178 
179 } // end anonymous namespace
180 
181 char GlobalSplit::ID = 0;
182 
183 INITIALIZE_PASS(GlobalSplit, "globalsplit", "Global splitter", false, false)
184 
186  return new GlobalSplit;
187 }
188 
190  if (!splitGlobals(M))
191  return PreservedAnalyses::all();
192  return PreservedAnalyses::none();
193 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
Int32Ty
IntegerType * Int32Ty
Definition: NVVMIntrRange.cpp:67
llvm::GlobalVariable::eraseFromParent
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition: Globals.cpp:428
llvm::GlobalSplitPass::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: GlobalSplit.cpp:189
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
Metadata.h
llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:248
GlobalSplit.h
llvm::Function
Definition: Function.h:60
Pass.h
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
llvm::Intrinsic::getName
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:879
llvm::GlobalVariable
Definition: GlobalVariable.h:39
llvm::GlobalObject::getMetadata
MDNode * getMetadata(unsigned KindID) const
Get the current metadata attachments for the given kind, if any.
Definition: Metadata.cpp:1229
llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:155
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Module.h
Operator.h
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1300
llvm::Intrinsic::getType
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
Definition: Function.cpp:1366
GlobalValue.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Constants.h
llvm::User
Definition: User.h:44
Intrinsics.h
llvm::createGlobalSplitPass
ModulePass * createGlobalSplitPass()
This pass splits globals into pieces for the benefit of whole-program devirtualization and control-fl...
Definition: GlobalSplit.cpp:185
INITIALIZE_PASS
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:37
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1769
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:919
llvm::Value::use_empty
bool use_empty() const
Definition: Value.h:344
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Type.h
splitGlobals
static bool splitGlobals(Module &M)
Definition: GlobalSplit.cpp:144
llvm::GlobalVariable::getInitializer
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
Definition: GlobalVariable.h:135
uint64_t
llvm::StructLayout
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:622
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:577
IPO.h
I
#define I(x, y, z)
Definition: MD5.cpp:58
StringExtras.h
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:608
llvm::GlobalValue::hasLocalLinkage
bool hasLocalLinkage() const
Definition: GlobalValue.h:450
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::MDNode
Metadata node.
Definition: Metadata.h:937
DataLayout.h
llvm::StructLayout::getSizeInBytes
uint64_t getSizeInBytes() const
Definition: DataLayout.h:629
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:529
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:991
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::GlobalObject::hasMetadata
bool hasMetadata() const
Return true if this value has any metadata attached to it.
Definition: Value.h:588
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:305
llvm::initializeGlobalSplitPass
void initializeGlobalSplitPass(PassRegistry &)
llvm::Init
Definition: Record.h:281
Constant.h
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::StructLayout::getElementOffset
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:652
GlobalVariable.h
Casting.h
Function.h
llvm::ConstantExpr::getGetElementPtr
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, bool InBounds=false, Optional< unsigned > InRangeIndex=None, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
Definition: Constants.h:1243
llvm::GlobalValue::PrivateLinkage
@ PrivateLinkage
Like Internal, but omit from symbol table.
Definition: GlobalValue.h:56
SmallVector.h
User.h
llvm::GlobalVariable::isConstant
bool isConstant() const
If the value is a global constant, its value is immutable throughout the runtime execution of the pro...
Definition: GlobalVariable.h:152
llvm::GlobalObject::getVCallVisibility
VCallVisibility getVCallVisibility() const
Definition: Metadata.cpp:1558
llvm::GlobalValue::getType
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:270
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:172
LLVMContext.h
InitializePasses.h
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
splitGlobal
static bool splitGlobal(GlobalVariable &GV)
Definition: GlobalSplit.cpp:41
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37