LLVM  9.0.0svn
GlobalOpt.cpp
Go to the documentation of this file.
1 //===- GlobalOpt.cpp - Optimize Global Variables --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass transforms simple global variables that never have their address
10 // taken. If obviously true, it marks read/write globals as constant, deletes
11 // variables only stored to, etc.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallPtrSet.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/ADT/Twine.h"
30 #include "llvm/IR/Attributes.h"
31 #include "llvm/IR/BasicBlock.h"
32 #include "llvm/IR/CallSite.h"
33 #include "llvm/IR/CallingConv.h"
34 #include "llvm/IR/Constant.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DataLayout.h"
38 #include "llvm/IR/DerivedTypes.h"
39 #include "llvm/IR/Dominators.h"
40 #include "llvm/IR/Function.h"
42 #include "llvm/IR/GlobalAlias.h"
43 #include "llvm/IR/GlobalValue.h"
44 #include "llvm/IR/GlobalVariable.h"
45 #include "llvm/IR/InstrTypes.h"
46 #include "llvm/IR/Instruction.h"
47 #include "llvm/IR/Instructions.h"
48 #include "llvm/IR/IntrinsicInst.h"
49 #include "llvm/IR/Module.h"
50 #include "llvm/IR/Operator.h"
51 #include "llvm/IR/Type.h"
52 #include "llvm/IR/Use.h"
53 #include "llvm/IR/User.h"
54 #include "llvm/IR/Value.h"
55 #include "llvm/IR/ValueHandle.h"
56 #include "llvm/Pass.h"
58 #include "llvm/Support/Casting.h"
60 #include "llvm/Support/Debug.h"
64 #include "llvm/Transforms/IPO.h"
68 #include <cassert>
69 #include <cstdint>
70 #include <utility>
71 #include <vector>
72 
73 using namespace llvm;
74 
75 #define DEBUG_TYPE "globalopt"
76 
77 STATISTIC(NumMarked , "Number of globals marked constant");
78 STATISTIC(NumUnnamed , "Number of globals marked unnamed_addr");
79 STATISTIC(NumSRA , "Number of aggregate globals broken into scalars");
80 STATISTIC(NumHeapSRA , "Number of heap objects SRA'd");
81 STATISTIC(NumSubstitute,"Number of globals with initializers stored into them");
82 STATISTIC(NumDeleted , "Number of globals deleted");
83 STATISTIC(NumGlobUses , "Number of global uses devirtualized");
84 STATISTIC(NumLocalized , "Number of globals localized");
85 STATISTIC(NumShrunkToBool , "Number of global vars shrunk to booleans");
86 STATISTIC(NumFastCallFns , "Number of functions converted to fastcc");
87 STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated");
88 STATISTIC(NumNestRemoved , "Number of nest attributes removed");
89 STATISTIC(NumAliasesResolved, "Number of global aliases resolved");
90 STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
91 STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed");
92 STATISTIC(NumInternalFunc, "Number of internal functions");
93 STATISTIC(NumColdCC, "Number of functions marked coldcc");
94 
95 static cl::opt<bool>
96  EnableColdCCStressTest("enable-coldcc-stress-test",
97  cl::desc("Enable stress test of coldcc by adding "
98  "calling conv to all internal functions."),
99  cl::init(false), cl::Hidden);
100 
102  "coldcc-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
103  cl::desc(
104  "Maximum block frequency, expressed as a percentage of caller's "
105  "entry frequency, for a call site to be considered cold for enabling"
106  "coldcc"));
107 
108 /// Is this global variable possibly used by a leak checker as a root? If so,
109 /// we might not really want to eliminate the stores to it.
111  // A global variable is a root if it is a pointer, or could plausibly contain
112  // a pointer. There are two challenges; one is that we could have a struct
113  // the has an inner member which is a pointer. We recurse through the type to
114  // detect these (up to a point). The other is that we may actually be a union
115  // of a pointer and another type, and so our LLVM type is an integer which
116  // gets converted into a pointer, or our type is an [i8 x #] with a pointer
117  // potentially contained here.
118 
119  if (GV->hasPrivateLinkage())
120  return false;
121 
123  Types.push_back(GV->getValueType());
124 
125  unsigned Limit = 20;
126  do {
127  Type *Ty = Types.pop_back_val();
128  switch (Ty->getTypeID()) {
129  default: break;
130  case Type::PointerTyID: return true;
131  case Type::ArrayTyID:
132  case Type::VectorTyID: {
133  SequentialType *STy = cast<SequentialType>(Ty);
134  Types.push_back(STy->getElementType());
135  break;
136  }
137  case Type::StructTyID: {
138  StructType *STy = cast<StructType>(Ty);
139  if (STy->isOpaque()) return true;
141  E = STy->element_end(); I != E; ++I) {
142  Type *InnerTy = *I;
143  if (isa<PointerType>(InnerTy)) return true;
144  if (isa<CompositeType>(InnerTy))
145  Types.push_back(InnerTy);
146  }
147  break;
148  }
149  }
150  if (--Limit == 0) return true;
151  } while (!Types.empty());
152  return false;
153 }
154 
155 /// Given a value that is stored to a global but never read, determine whether
156 /// it's safe to remove the store and the chain of computation that feeds the
157 /// store.
158 static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) {
159  do {
160  if (isa<Constant>(V))
161  return true;
162  if (!V->hasOneUse())
163  return false;
164  if (isa<LoadInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V) ||
165  isa<GlobalValue>(V))
166  return false;
167  if (isAllocationFn(V, TLI))
168  return true;
169 
170  Instruction *I = cast<Instruction>(V);
171  if (I->mayHaveSideEffects())
172  return false;
173  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
174  if (!GEP->hasAllConstantIndices())
175  return false;
176  } else if (I->getNumOperands() != 1) {
177  return false;
178  }
179 
180  V = I->getOperand(0);
181  } while (true);
182 }
183 
184 /// This GV is a pointer root. Loop over all users of the global and clean up
185 /// any that obviously don't assign the global a value that isn't dynamically
186 /// allocated.
188  const TargetLibraryInfo *TLI) {
189  // A brief explanation of leak checkers. The goal is to find bugs where
190  // pointers are forgotten, causing an accumulating growth in memory
191  // usage over time. The common strategy for leak checkers is to whitelist the
192  // memory pointed to by globals at exit. This is popular because it also
193  // solves another problem where the main thread of a C++ program may shut down
194  // before other threads that are still expecting to use those globals. To
195  // handle that case, we expect the program may create a singleton and never
196  // destroy it.
197 
198  bool Changed = false;
199 
200  // If Dead[n].first is the only use of a malloc result, we can delete its
201  // chain of computation and the store to the global in Dead[n].second.
203 
204  // Constants can't be pointers to dynamically allocated memory.
205  for (Value::user_iterator UI = GV->user_begin(), E = GV->user_end();
206  UI != E;) {
207  User *U = *UI++;
208  if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
209  Value *V = SI->getValueOperand();
210  if (isa<Constant>(V)) {
211  Changed = true;
212  SI->eraseFromParent();
213  } else if (Instruction *I = dyn_cast<Instruction>(V)) {
214  if (I->hasOneUse())
215  Dead.push_back(std::make_pair(I, SI));
216  }
217  } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(U)) {
218  if (isa<Constant>(MSI->getValue())) {
219  Changed = true;
220  MSI->eraseFromParent();
221  } else if (Instruction *I = dyn_cast<Instruction>(MSI->getValue())) {
222  if (I->hasOneUse())
223  Dead.push_back(std::make_pair(I, MSI));
224  }
225  } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U)) {
226  GlobalVariable *MemSrc = dyn_cast<GlobalVariable>(MTI->getSource());
227  if (MemSrc && MemSrc->isConstant()) {
228  Changed = true;
229  MTI->eraseFromParent();
230  } else if (Instruction *I = dyn_cast<Instruction>(MemSrc)) {
231  if (I->hasOneUse())
232  Dead.push_back(std::make_pair(I, MTI));
233  }
234  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
235  if (CE->use_empty()) {
236  CE->destroyConstant();
237  Changed = true;
238  }
239  } else if (Constant *C = dyn_cast<Constant>(U)) {
240  if (isSafeToDestroyConstant(C)) {
241  C->destroyConstant();
242  // This could have invalidated UI, start over from scratch.
243  Dead.clear();
244  CleanupPointerRootUsers(GV, TLI);
245  return true;
246  }
247  }
248  }
249 
250  for (int i = 0, e = Dead.size(); i != e; ++i) {
251  if (IsSafeComputationToRemove(Dead[i].first, TLI)) {
252  Dead[i].second->eraseFromParent();
253  Instruction *I = Dead[i].first;
254  do {
255  if (isAllocationFn(I, TLI))
256  break;
258  if (!J)
259  break;
260  I->eraseFromParent();
261  I = J;
262  } while (true);
263  I->eraseFromParent();
264  }
265  }
266 
267  return Changed;
268 }
269 
270 /// We just marked GV constant. Loop over all users of the global, cleaning up
271 /// the obvious ones. This is largely just a quick scan over the use list to
272 /// clean up the easy and obvious cruft. This returns true if it made a change.
274  const DataLayout &DL,
275  TargetLibraryInfo *TLI) {
276  bool Changed = false;
277  // Note that we need to use a weak value handle for the worklist items. When
278  // we delete a constant array, we may also be holding pointer to one of its
279  // elements (or an element of one of its elements if we're dealing with an
280  // array of arrays) in the worklist.
282  while (!WorkList.empty()) {
283  Value *UV = WorkList.pop_back_val();
284  if (!UV)
285  continue;
286 
287  User *U = cast<User>(UV);
288 
289  if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
290  if (Init) {
291  // Replace the load with the initializer.
292  LI->replaceAllUsesWith(Init);
293  LI->eraseFromParent();
294  Changed = true;
295  }
296  } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
297  // Store must be unreachable or storing Init into the global.
298  SI->eraseFromParent();
299  Changed = true;
300  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
301  if (CE->getOpcode() == Instruction::GetElementPtr) {
302  Constant *SubInit = nullptr;
303  if (Init)
304  SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
305  Changed |= CleanupConstantGlobalUsers(CE, SubInit, DL, TLI);
306  } else if ((CE->getOpcode() == Instruction::BitCast &&
307  CE->getType()->isPointerTy()) ||
308  CE->getOpcode() == Instruction::AddrSpaceCast) {
309  // Pointer cast, delete any stores and memsets to the global.
310  Changed |= CleanupConstantGlobalUsers(CE, nullptr, DL, TLI);
311  }
312 
313  if (CE->use_empty()) {
314  CE->destroyConstant();
315  Changed = true;
316  }
317  } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
318  // Do not transform "gepinst (gep constexpr (GV))" here, because forming
319  // "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold
320  // and will invalidate our notion of what Init is.
321  Constant *SubInit = nullptr;
322  if (!isa<ConstantExpr>(GEP->getOperand(0))) {
323  ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(
324  ConstantFoldInstruction(GEP, DL, TLI));
325  if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
326  SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
327 
328  // If the initializer is an all-null value and we have an inbounds GEP,
329  // we already know what the result of any load from that GEP is.
330  // TODO: Handle splats.
331  if (Init && isa<ConstantAggregateZero>(Init) && GEP->isInBounds())
332  SubInit = Constant::getNullValue(GEP->getResultElementType());
333  }
334  Changed |= CleanupConstantGlobalUsers(GEP, SubInit, DL, TLI);
335 
336  if (GEP->use_empty()) {
337  GEP->eraseFromParent();
338  Changed = true;
339  }
340  } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv
341  if (MI->getRawDest() == V) {
342  MI->eraseFromParent();
343  Changed = true;
344  }
345 
346  } else if (Constant *C = dyn_cast<Constant>(U)) {
347  // If we have a chain of dead constantexprs or other things dangling from
348  // us, and if they are all dead, nuke them without remorse.
349  if (isSafeToDestroyConstant(C)) {
350  C->destroyConstant();
351  CleanupConstantGlobalUsers(V, Init, DL, TLI);
352  return true;
353  }
354  }
355  }
356  return Changed;
357 }
358 
359 static bool isSafeSROAElementUse(Value *V);
360 
361 /// Return true if the specified GEP is a safe user of a derived
362 /// expression from a global that we want to SROA.
363 static bool isSafeSROAGEP(User *U) {
364  // Check to see if this ConstantExpr GEP is SRA'able. In particular, we
365  // don't like < 3 operand CE's, and we don't like non-constant integer
366  // indices. This enforces that all uses are 'gep GV, 0, C, ...' for some
367  // value of C.
368  if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) ||
369  !cast<Constant>(U->getOperand(1))->isNullValue())
370  return false;
371 
373  ++GEPI; // Skip over the pointer index.
374 
375  // For all other level we require that the indices are constant and inrange.
376  // In particular, consider: A[0][i]. We cannot know that the user isn't doing
377  // invalid things like allowing i to index an out-of-range subscript that
378  // accesses A[1]. This can also happen between different members of a struct
379  // in llvm IR.
380  for (; GEPI != E; ++GEPI) {
381  if (GEPI.isStruct())
382  continue;
383 
384  ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand());
385  if (!IdxVal || (GEPI.isBoundedSequential() &&
386  IdxVal->getZExtValue() >= GEPI.getSequentialNumElements()))
387  return false;
388  }
389 
390  return llvm::all_of(U->users(),
391  [](User *UU) { return isSafeSROAElementUse(UU); });
392 }
393 
394 /// Return true if the specified instruction is a safe user of a derived
395 /// expression from a global that we want to SROA.
396 static bool isSafeSROAElementUse(Value *V) {
397  // We might have a dead and dangling constant hanging off of here.
398  if (Constant *C = dyn_cast<Constant>(V))
399  return isSafeToDestroyConstant(C);
400 
402  if (!I) return false;
403 
404  // Loads are ok.
405  if (isa<LoadInst>(I)) return true;
406 
407  // Stores *to* the pointer are ok.
408  if (StoreInst *SI = dyn_cast<StoreInst>(I))
409  return SI->getOperand(0) != V;
410 
411  // Otherwise, it must be a GEP. Check it and its users are safe to SRA.
412  return isa<GetElementPtrInst>(I) && isSafeSROAGEP(I);
413 }
414 
415 /// Look at all uses of the global and decide whether it is safe for us to
416 /// perform this transformation.
418  for (User *U : GV->users()) {
419  // The user of the global must be a GEP Inst or a ConstantExpr GEP.
420  if (!isa<GetElementPtrInst>(U) &&
421  (!isa<ConstantExpr>(U) ||
422  cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr))
423  return false;
424 
425  // Check the gep and it's users are safe to SRA
426  if (!isSafeSROAGEP(U))
427  return false;
428  }
429 
430  return true;
431 }
432 
433 /// Copy over the debug info for a variable to its SRA replacements.
435  uint64_t FragmentOffsetInBits,
436  uint64_t FragmentSizeInBits,
437  unsigned NumElements) {
439  GV->getDebugInfo(GVs);
440  for (auto *GVE : GVs) {
441  DIVariable *Var = GVE->getVariable();
442  DIExpression *Expr = GVE->getExpression();
443  if (NumElements > 1) {
445  Expr, FragmentOffsetInBits, FragmentSizeInBits))
446  Expr = *E;
447  else
448  return;
449  }
450  auto *NGVE = DIGlobalVariableExpression::get(GVE->getContext(), Var, Expr);
451  NGV->addDebugInfo(NGVE);
452  }
453 }
454 
455 /// Perform scalar replacement of aggregates on the specified global variable.
456 /// This opens the door for other optimizations by exposing the behavior of the
457 /// program in a more fine-grained way. We have determined that this
458 /// transformation is safe already. We return the first global variable we
459 /// insert so that the caller can reprocess it.
461  // Make sure this global only has simple uses that we can SRA.
462  if (!GlobalUsersSafeToSRA(GV))
463  return nullptr;
464 
465  assert(GV->hasLocalLinkage());
466  Constant *Init = GV->getInitializer();
467  Type *Ty = Init->getType();
468 
469  std::vector<GlobalVariable *> NewGlobals;
470  Module::GlobalListType &Globals = GV->getParent()->getGlobalList();
471 
472  // Get the alignment of the global, either explicit or target-specific.
473  unsigned StartAlignment = GV->getAlignment();
474  if (StartAlignment == 0)
475  StartAlignment = DL.getABITypeAlignment(GV->getType());
476 
477  if (StructType *STy = dyn_cast<StructType>(Ty)) {
478  unsigned NumElements = STy->getNumElements();
479  NewGlobals.reserve(NumElements);
480  const StructLayout &Layout = *DL.getStructLayout(STy);
481  for (unsigned i = 0, e = NumElements; i != e; ++i) {
482  Constant *In = Init->getAggregateElement(i);
483  assert(In && "Couldn't get element of initializer?");
484  GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false,
486  In, GV->getName()+"."+Twine(i),
487  GV->getThreadLocalMode(),
488  GV->getType()->getAddressSpace());
490  NGV->copyAttributesFrom(GV);
491  Globals.push_back(NGV);
492  NewGlobals.push_back(NGV);
493 
494  // Calculate the known alignment of the field. If the original aggregate
495  // had 256 byte alignment for example, something might depend on that:
496  // propagate info to each field.
497  uint64_t FieldOffset = Layout.getElementOffset(i);
498  unsigned NewAlign = (unsigned)MinAlign(StartAlignment, FieldOffset);
499  if (NewAlign > DL.getABITypeAlignment(STy->getElementType(i)))
500  NGV->setAlignment(NewAlign);
501 
502  // Copy over the debug info for the variable.
503  uint64_t Size = DL.getTypeAllocSizeInBits(NGV->getValueType());
504  uint64_t FragmentOffsetInBits = Layout.getElementOffsetInBits(i);
505  transferSRADebugInfo(GV, NGV, FragmentOffsetInBits, Size, NumElements);
506  }
507  } else if (SequentialType *STy = dyn_cast<SequentialType>(Ty)) {
508  unsigned NumElements = STy->getNumElements();
509  if (NumElements > 16 && GV->hasNUsesOrMore(16))
510  return nullptr; // It's not worth it.
511  NewGlobals.reserve(NumElements);
512  auto ElTy = STy->getElementType();
513  uint64_t EltSize = DL.getTypeAllocSize(ElTy);
514  unsigned EltAlign = DL.getABITypeAlignment(ElTy);
515  uint64_t FragmentSizeInBits = DL.getTypeAllocSizeInBits(ElTy);
516  for (unsigned i = 0, e = NumElements; i != e; ++i) {
517  Constant *In = Init->getAggregateElement(i);
518  assert(In && "Couldn't get element of initializer?");
519 
520  GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false,
522  In, GV->getName()+"."+Twine(i),
523  GV->getThreadLocalMode(),
524  GV->getType()->getAddressSpace());
526  NGV->copyAttributesFrom(GV);
527  Globals.push_back(NGV);
528  NewGlobals.push_back(NGV);
529 
530  // Calculate the known alignment of the field. If the original aggregate
531  // had 256 byte alignment for example, something might depend on that:
532  // propagate info to each field.
533  unsigned NewAlign = (unsigned)MinAlign(StartAlignment, EltSize*i);
534  if (NewAlign > EltAlign)
535  NGV->setAlignment(NewAlign);
536  transferSRADebugInfo(GV, NGV, FragmentSizeInBits * i, FragmentSizeInBits,
537  NumElements);
538  }
539  }
540 
541  if (NewGlobals.empty())
542  return nullptr;
543 
544  LLVM_DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV << "\n");
545 
547 
548  // Loop over all of the uses of the global, replacing the constantexpr geps,
549  // with smaller constantexpr geps or direct references.
550  while (!GV->use_empty()) {
551  User *GEP = GV->user_back();
552  assert(((isa<ConstantExpr>(GEP) &&
553  cast<ConstantExpr>(GEP)->getOpcode()==Instruction::GetElementPtr)||
554  isa<GetElementPtrInst>(GEP)) && "NonGEP CE's are not SRAable!");
555 
556  // Ignore the 1th operand, which has to be zero or else the program is quite
557  // broken (undefined). Get the 2nd operand, which is the structure or array
558  // index.
559  unsigned Val = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue();
560  if (Val >= NewGlobals.size()) Val = 0; // Out of bound array access.
561 
562  Value *NewPtr = NewGlobals[Val];
563  Type *NewTy = NewGlobals[Val]->getValueType();
564 
565  // Form a shorter GEP if needed.
566  if (GEP->getNumOperands() > 3) {
567  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GEP)) {
569  Idxs.push_back(NullInt);
570  for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i)
571  Idxs.push_back(CE->getOperand(i));
572  NewPtr =
573  ConstantExpr::getGetElementPtr(NewTy, cast<Constant>(NewPtr), Idxs);
574  } else {
575  GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP);
577  Idxs.push_back(NullInt);
578  for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i)
579  Idxs.push_back(GEPI->getOperand(i));
580  NewPtr = GetElementPtrInst::Create(
581  NewTy, NewPtr, Idxs, GEPI->getName() + "." + Twine(Val), GEPI);
582  }
583  }
584  GEP->replaceAllUsesWith(NewPtr);
585 
586  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(GEP))
587  GEPI->eraseFromParent();
588  else
589  cast<ConstantExpr>(GEP)->destroyConstant();
590  }
591 
592  // Delete the old global, now that it is dead.
593  Globals.erase(GV);
594  ++NumSRA;
595 
596  // Loop over the new globals array deleting any globals that are obviously
597  // dead. This can arise due to scalarization of a structure or an array that
598  // has elements that are dead.
599  unsigned FirstGlobal = 0;
600  for (unsigned i = 0, e = NewGlobals.size(); i != e; ++i)
601  if (NewGlobals[i]->use_empty()) {
602  Globals.erase(NewGlobals[i]);
603  if (FirstGlobal == i) ++FirstGlobal;
604  }
605 
606  return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : nullptr;
607 }
608 
609 /// Return true if all users of the specified value will trap if the value is
610 /// dynamically null. PHIs keeps track of any phi nodes we've seen to avoid
611 /// reprocessing them.
612 static bool AllUsesOfValueWillTrapIfNull(const Value *V,
614  for (const User *U : V->users()) {
615  if (const Instruction *I = dyn_cast<Instruction>(U)) {
616  // If null pointer is considered valid, then all uses are non-trapping.
617  // Non address-space 0 globals have already been pruned by the caller.
618  if (NullPointerIsDefined(I->getFunction()))
619  return false;
620  }
621  if (isa<LoadInst>(U)) {
622  // Will trap.
623  } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
624  if (SI->getOperand(0) == V) {
625  //cerr << "NONTRAPPING USE: " << *U;
626  return false; // Storing the value.
627  }
628  } else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
629  if (CI->getCalledValue() != V) {
630  //cerr << "NONTRAPPING USE: " << *U;
631  return false; // Not calling the ptr
632  }
633  } else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) {
634  if (II->getCalledValue() != V) {
635  //cerr << "NONTRAPPING USE: " << *U;
636  return false; // Not calling the ptr
637  }
638  } else if (const BitCastInst *CI = dyn_cast<BitCastInst>(U)) {
639  if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false;
640  } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
641  if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false;
642  } else if (const PHINode *PN = dyn_cast<PHINode>(U)) {
643  // If we've already seen this phi node, ignore it, it has already been
644  // checked.
645  if (PHIs.insert(PN).second && !AllUsesOfValueWillTrapIfNull(PN, PHIs))
646  return false;
647  } else if (isa<ICmpInst>(U) &&
648  isa<ConstantPointerNull>(U->getOperand(1))) {
649  // Ignore icmp X, null
650  } else {
651  //cerr << "NONTRAPPING USE: " << *U;
652  return false;
653  }
654  }
655  return true;
656 }
657 
658 /// Return true if all uses of any loads from GV will trap if the loaded value
659 /// is null. Note that this also permits comparisons of the loaded value
660 /// against null, as a special case.
662  for (const User *U : GV->users())
663  if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
665  if (!AllUsesOfValueWillTrapIfNull(LI, PHIs))
666  return false;
667  } else if (isa<StoreInst>(U)) {
668  // Ignore stores to the global.
669  } else {
670  // We don't know or understand this user, bail out.
671  //cerr << "UNKNOWN USER OF GLOBAL!: " << *U;
672  return false;
673  }
674  return true;
675 }
676 
678  bool Changed = false;
679  for (auto UI = V->user_begin(), E = V->user_end(); UI != E; ) {
680  Instruction *I = cast<Instruction>(*UI++);
681  // Uses are non-trapping if null pointer is considered valid.
682  // Non address-space 0 globals are already pruned by the caller.
684  return false;
685  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
686  LI->setOperand(0, NewV);
687  Changed = true;
688  } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
689  if (SI->getOperand(1) == V) {
690  SI->setOperand(1, NewV);
691  Changed = true;
692  }
693  } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
694  CallSite CS(I);
695  if (CS.getCalledValue() == V) {
696  // Calling through the pointer! Turn into a direct call, but be careful
697  // that the pointer is not also being passed as an argument.
698  CS.setCalledFunction(NewV);
699  Changed = true;
700  bool PassedAsArg = false;
701  for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
702  if (CS.getArgument(i) == V) {
703  PassedAsArg = true;
704  CS.setArgument(i, NewV);
705  }
706 
707  if (PassedAsArg) {
708  // Being passed as an argument also. Be careful to not invalidate UI!
709  UI = V->user_begin();
710  }
711  }
712  } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
713  Changed |= OptimizeAwayTrappingUsesOfValue(CI,
714  ConstantExpr::getCast(CI->getOpcode(),
715  NewV, CI->getType()));
716  if (CI->use_empty()) {
717  Changed = true;
718  CI->eraseFromParent();
719  }
720  } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
721  // Should handle GEP here.
723  Idxs.reserve(GEPI->getNumOperands()-1);
724  for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end();
725  i != e; ++i)
726  if (Constant *C = dyn_cast<Constant>(*i))
727  Idxs.push_back(C);
728  else
729  break;
730  if (Idxs.size() == GEPI->getNumOperands()-1)
732  GEPI, ConstantExpr::getGetElementPtr(GEPI->getSourceElementType(),
733  NewV, Idxs));
734  if (GEPI->use_empty()) {
735  Changed = true;
736  GEPI->eraseFromParent();
737  }
738  }
739  }
740 
741  return Changed;
742 }
743 
744 /// The specified global has only one non-null value stored into it. If there
745 /// are uses of the loaded value that would trap if the loaded value is
746 /// dynamically null, then we know that they cannot be reachable with a null
747 /// optimize away the load.
749  const DataLayout &DL,
750  TargetLibraryInfo *TLI) {
751  bool Changed = false;
752 
753  // Keep track of whether we are able to remove all the uses of the global
754  // other than the store that defines it.
755  bool AllNonStoreUsesGone = true;
756 
757  // Replace all uses of loads with uses of uses of the stored value.
758  for (Value::user_iterator GUI = GV->user_begin(), E = GV->user_end(); GUI != E;){
759  User *GlobalUser = *GUI++;
760  if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) {
761  Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV);
762  // If we were able to delete all uses of the loads
763  if (LI->use_empty()) {
764  LI->eraseFromParent();
765  Changed = true;
766  } else {
767  AllNonStoreUsesGone = false;
768  }
769  } else if (isa<StoreInst>(GlobalUser)) {
770  // Ignore the store that stores "LV" to the global.
771  assert(GlobalUser->getOperand(1) == GV &&
772  "Must be storing *to* the global");
773  } else {
774  AllNonStoreUsesGone = false;
775 
776  // If we get here we could have other crazy uses that are transitively
777  // loaded.
778  assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) ||
779  isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser) ||
780  isa<BitCastInst>(GlobalUser) ||
781  isa<GetElementPtrInst>(GlobalUser)) &&
782  "Only expect load and stores!");
783  }
784  }
785 
786  if (Changed) {
787  LLVM_DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV
788  << "\n");
789  ++NumGlobUses;
790  }
791 
792  // If we nuked all of the loads, then none of the stores are needed either,
793  // nor is the global.
794  if (AllNonStoreUsesGone) {
795  if (isLeakCheckerRoot(GV)) {
796  Changed |= CleanupPointerRootUsers(GV, TLI);
797  } else {
798  Changed = true;
799  CleanupConstantGlobalUsers(GV, nullptr, DL, TLI);
800  }
801  if (GV->use_empty()) {
802  LLVM_DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n");
803  Changed = true;
804  GV->eraseFromParent();
805  ++NumDeleted;
806  }
807  }
808  return Changed;
809 }
810 
811 /// Walk the use list of V, constant folding all of the instructions that are
812 /// foldable.
813 static void ConstantPropUsersOf(Value *V, const DataLayout &DL,
814  TargetLibraryInfo *TLI) {
815  for (Value::user_iterator UI = V->user_begin(), E = V->user_end(); UI != E; )
816  if (Instruction *I = dyn_cast<Instruction>(*UI++))
817  if (Constant *NewC = ConstantFoldInstruction(I, DL, TLI)) {
818  I->replaceAllUsesWith(NewC);
819 
820  // Advance UI to the next non-I use to avoid invalidating it!
821  // Instructions could multiply use V.
822  while (UI != E && *UI == I)
823  ++UI;
824  if (isInstructionTriviallyDead(I, TLI))
825  I->eraseFromParent();
826  }
827 }
828 
829 /// This function takes the specified global variable, and transforms the
830 /// program as if it always contained the result of the specified malloc.
831 /// Because it is always the result of the specified malloc, there is no reason
832 /// to actually DO the malloc. Instead, turn the malloc into a global, and any
833 /// loads of GV as uses of the new global.
834 static GlobalVariable *
836  ConstantInt *NElements, const DataLayout &DL,
837  TargetLibraryInfo *TLI) {
838  LLVM_DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI
839  << '\n');
840 
841  Type *GlobalType;
842  if (NElements->getZExtValue() == 1)
843  GlobalType = AllocTy;
844  else
845  // If we have an array allocation, the global variable is of an array.
846  GlobalType = ArrayType::get(AllocTy, NElements->getZExtValue());
847 
848  // Create the new global variable. The contents of the malloc'd memory is
849  // undefined, so initialize with an undef value.
850  GlobalVariable *NewGV = new GlobalVariable(
851  *GV->getParent(), GlobalType, false, GlobalValue::InternalLinkage,
852  UndefValue::get(GlobalType), GV->getName() + ".body", nullptr,
853  GV->getThreadLocalMode());
854 
855  // If there are bitcast users of the malloc (which is typical, usually we have
856  // a malloc + bitcast) then replace them with uses of the new global. Update
857  // other users to use the global as well.
858  BitCastInst *TheBC = nullptr;
859  while (!CI->use_empty()) {
860  Instruction *User = cast<Instruction>(CI->user_back());
861  if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
862  if (BCI->getType() == NewGV->getType()) {
863  BCI->replaceAllUsesWith(NewGV);
864  BCI->eraseFromParent();
865  } else {
866  BCI->setOperand(0, NewGV);
867  }
868  } else {
869  if (!TheBC)
870  TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI);
871  User->replaceUsesOfWith(CI, TheBC);
872  }
873  }
874 
875  Constant *RepValue = NewGV;
876  if (NewGV->getType() != GV->getValueType())
877  RepValue = ConstantExpr::getBitCast(RepValue, GV->getValueType());
878 
879  // If there is a comparison against null, we will insert a global bool to
880  // keep track of whether the global was initialized yet or not.
881  GlobalVariable *InitBool =
882  new GlobalVariable(Type::getInt1Ty(GV->getContext()), false,
883  GlobalValue::InternalLinkage,
885  GV->getName()+".init", GV->getThreadLocalMode());
886  bool InitBoolUsed = false;
887 
888  // Loop over all uses of GV, processing them in turn.
889  while (!GV->use_empty()) {
890  if (StoreInst *SI = dyn_cast<StoreInst>(GV->user_back())) {
891  // The global is initialized when the store to it occurs.
892  new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false, 0,
893  SI->getOrdering(), SI->getSyncScopeID(), SI);
894  SI->eraseFromParent();
895  continue;
896  }
897 
898  LoadInst *LI = cast<LoadInst>(GV->user_back());
899  while (!LI->use_empty()) {
900  Use &LoadUse = *LI->use_begin();
901  ICmpInst *ICI = dyn_cast<ICmpInst>(LoadUse.getUser());
902  if (!ICI) {
903  LoadUse = RepValue;
904  continue;
905  }
906 
907  // Replace the cmp X, 0 with a use of the bool value.
908  // Sink the load to where the compare was, if atomic rules allow us to.
909  Value *LV = new LoadInst(InitBool->getValueType(), InitBool,
910  InitBool->getName() + ".val", false, 0,
911  LI->getOrdering(), LI->getSyncScopeID(),
912  LI->isUnordered() ? (Instruction *)ICI : LI);
913  InitBoolUsed = true;
914  switch (ICI->getPredicate()) {
915  default: llvm_unreachable("Unknown ICmp Predicate!");
916  case ICmpInst::ICMP_ULT:
917  case ICmpInst::ICMP_SLT: // X < null -> always false
918  LV = ConstantInt::getFalse(GV->getContext());
919  break;
920  case ICmpInst::ICMP_ULE:
921  case ICmpInst::ICMP_SLE:
922  case ICmpInst::ICMP_EQ:
923  LV = BinaryOperator::CreateNot(LV, "notinit", ICI);
924  break;
925  case ICmpInst::ICMP_NE:
926  case ICmpInst::ICMP_UGE:
927  case ICmpInst::ICMP_SGE:
928  case ICmpInst::ICMP_UGT:
929  case ICmpInst::ICMP_SGT:
930  break; // no change.
931  }
932  ICI->replaceAllUsesWith(LV);
933  ICI->eraseFromParent();
934  }
935  LI->eraseFromParent();
936  }
937 
938  // If the initialization boolean was used, insert it, otherwise delete it.
939  if (!InitBoolUsed) {
940  while (!InitBool->use_empty()) // Delete initializations
941  cast<StoreInst>(InitBool->user_back())->eraseFromParent();
942  delete InitBool;
943  } else
944  GV->getParent()->getGlobalList().insert(GV->getIterator(), InitBool);
945 
946  // Now the GV is dead, nuke it and the malloc..
947  GV->eraseFromParent();
948  CI->eraseFromParent();
949 
950  // To further other optimizations, loop over all users of NewGV and try to
951  // constant prop them. This will promote GEP instructions with constant
952  // indices into GEP constant-exprs, which will allow global-opt to hack on it.
953  ConstantPropUsersOf(NewGV, DL, TLI);
954  if (RepValue != NewGV)
955  ConstantPropUsersOf(RepValue, DL, TLI);
956 
957  return NewGV;
958 }
959 
960 /// Scan the use-list of V checking to make sure that there are no complex uses
961 /// of V. We permit simple things like dereferencing the pointer, but not
962 /// storing through the address, unless it is to the specified global.
964  const GlobalVariable *GV,
966  for (const User *U : V->users()) {
967  const Instruction *Inst = cast<Instruction>(U);
968 
969  if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) {
970  continue; // Fine, ignore.
971  }
972 
973  if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
974  if (SI->getOperand(0) == V && SI->getOperand(1) != GV)
975  return false; // Storing the pointer itself... bad.
976  continue; // Otherwise, storing through it, or storing into GV... fine.
977  }
978 
979  // Must index into the array and into the struct.
980  if (isa<GetElementPtrInst>(Inst) && Inst->getNumOperands() >= 3) {
981  if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs))
982  return false;
983  continue;
984  }
985 
986  if (const PHINode *PN = dyn_cast<PHINode>(Inst)) {
987  // PHIs are ok if all uses are ok. Don't infinitely recurse through PHI
988  // cycles.
989  if (PHIs.insert(PN).second)
991  return false;
992  continue;
993  }
994 
995  if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) {
996  if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs))
997  return false;
998  continue;
999  }
1000 
1001  return false;
1002  }
1003  return true;
1004 }
1005 
1006 /// The Alloc pointer is stored into GV somewhere. Transform all uses of the
1007 /// allocation into loads from the global and uses of the resultant pointer.
1008 /// Further, delete the store into GV. This assumes that these value pass the
1009 /// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate.
1011  GlobalVariable *GV) {
1012  while (!Alloc->use_empty()) {
1013  Instruction *U = cast<Instruction>(*Alloc->user_begin());
1014  Instruction *InsertPt = U;
1015  if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
1016  // If this is the store of the allocation into the global, remove it.
1017  if (SI->getOperand(1) == GV) {
1018  SI->eraseFromParent();
1019  continue;
1020  }
1021  } else if (PHINode *PN = dyn_cast<PHINode>(U)) {
1022  // Insert the load in the corresponding predecessor, not right before the
1023  // PHI.
1024  InsertPt = PN->getIncomingBlock(*Alloc->use_begin())->getTerminator();
1025  } else if (isa<BitCastInst>(U)) {
1026  // Must be bitcast between the malloc and store to initialize the global.
1028  U->eraseFromParent();
1029  continue;
1030  } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
1031  // If this is a "GEP bitcast" and the user is a store to the global, then
1032  // just process it as a bitcast.
1033  if (GEPI->hasAllZeroIndices() && GEPI->hasOneUse())
1034  if (StoreInst *SI = dyn_cast<StoreInst>(GEPI->user_back()))
1035  if (SI->getOperand(1) == GV) {
1036  // Must be bitcast GEP between the malloc and store to initialize
1037  // the global.
1039  GEPI->eraseFromParent();
1040  continue;
1041  }
1042  }
1043 
1044  // Insert a load from the global, and use it instead of the malloc.
1045  Value *NL =
1046  new LoadInst(GV->getValueType(), GV, GV->getName() + ".val", InsertPt);
1047  U->replaceUsesOfWith(Alloc, NL);
1048  }
1049 }
1050 
1051 /// Verify that all uses of V (a load, or a phi of a load) are simple enough to
1052 /// perform heap SRA on. This permits GEP's that index through the array and
1053 /// struct field, icmps of null, and PHIs.
1055  SmallPtrSetImpl<const PHINode*> &LoadUsingPHIs,
1056  SmallPtrSetImpl<const PHINode*> &LoadUsingPHIsPerLoad) {
1057  // We permit two users of the load: setcc comparing against the null
1058  // pointer, and a getelementptr of a specific form.
1059  for (const User *U : V->users()) {
1060  const Instruction *UI = cast<Instruction>(U);
1061 
1062  // Comparison against null is ok.
1063  if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UI)) {
1064  if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
1065  return false;
1066  continue;
1067  }
1068 
1069  // getelementptr is also ok, but only a simple form.
1070  if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(UI)) {
1071  // Must index into the array and into the struct.
1072  if (GEPI->getNumOperands() < 3)
1073  return false;
1074 
1075  // Otherwise the GEP is ok.
1076  continue;
1077  }
1078 
1079  if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
1080  if (!LoadUsingPHIsPerLoad.insert(PN).second)
1081  // This means some phi nodes are dependent on each other.
1082  // Avoid infinite looping!
1083  return false;
1084  if (!LoadUsingPHIs.insert(PN).second)
1085  // If we have already analyzed this PHI, then it is safe.
1086  continue;
1087 
1088  // Make sure all uses of the PHI are simple enough to transform.
1090  LoadUsingPHIs, LoadUsingPHIsPerLoad))
1091  return false;
1092 
1093  continue;
1094  }
1095 
1096  // Otherwise we don't know what this is, not ok.
1097  return false;
1098  }
1099 
1100  return true;
1101 }
1102 
1103 /// If all users of values loaded from GV are simple enough to perform HeapSRA,
1104 /// return true.
1106  Instruction *StoredVal) {
1107  SmallPtrSet<const PHINode*, 32> LoadUsingPHIs;
1108  SmallPtrSet<const PHINode*, 32> LoadUsingPHIsPerLoad;
1109  for (const User *U : GV->users())
1110  if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
1111  if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs,
1112  LoadUsingPHIsPerLoad))
1113  return false;
1114  LoadUsingPHIsPerLoad.clear();
1115  }
1116 
1117  // If we reach here, we know that all uses of the loads and transitive uses
1118  // (through PHI nodes) are simple enough to transform. However, we don't know
1119  // that all inputs the to the PHI nodes are in the same equivalence sets.
1120  // Check to verify that all operands of the PHIs are either PHIS that can be
1121  // transformed, loads from GV, or MI itself.
1122  for (const PHINode *PN : LoadUsingPHIs) {
1123  for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) {
1124  Value *InVal = PN->getIncomingValue(op);
1125 
1126  // PHI of the stored value itself is ok.
1127  if (InVal == StoredVal) continue;
1128 
1129  if (const PHINode *InPN = dyn_cast<PHINode>(InVal)) {
1130  // One of the PHIs in our set is (optimistically) ok.
1131  if (LoadUsingPHIs.count(InPN))
1132  continue;
1133  return false;
1134  }
1135 
1136  // Load from GV is ok.
1137  if (const LoadInst *LI = dyn_cast<LoadInst>(InVal))
1138  if (LI->getOperand(0) == GV)
1139  continue;
1140 
1141  // UNDEF? NULL?
1142 
1143  // Anything else is rejected.
1144  return false;
1145  }
1146  }
1147 
1148  return true;
1149 }
1150 
1151 static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
1152  DenseMap<Value *, std::vector<Value *>> &InsertedScalarizedValues,
1153  std::vector<std::pair<PHINode *, unsigned>> &PHIsToRewrite) {
1154  std::vector<Value *> &FieldVals = InsertedScalarizedValues[V];
1155 
1156  if (FieldNo >= FieldVals.size())
1157  FieldVals.resize(FieldNo+1);
1158 
1159  // If we already have this value, just reuse the previously scalarized
1160  // version.
1161  if (Value *FieldVal = FieldVals[FieldNo])
1162  return FieldVal;
1163 
1164  // Depending on what instruction this is, we have several cases.
1165  Value *Result;
1166  if (LoadInst *LI = dyn_cast<LoadInst>(V)) {
1167  // This is a scalarized version of the load from the global. Just create
1168  // a new Load of the scalarized global.
1169  Value *V = GetHeapSROAValue(LI->getOperand(0), FieldNo,
1170  InsertedScalarizedValues, PHIsToRewrite);
1171  Result = new LoadInst(V->getType()->getPointerElementType(), V,
1172  LI->getName() + ".f" + Twine(FieldNo), LI);
1173  } else {
1174  PHINode *PN = cast<PHINode>(V);
1175  // PN's type is pointer to struct. Make a new PHI of pointer to struct
1176  // field.
1177 
1178  PointerType *PTy = cast<PointerType>(PN->getType());
1179  StructType *ST = cast<StructType>(PTy->getElementType());
1180 
1181  unsigned AS = PTy->getAddressSpace();
1182  PHINode *NewPN =
1183  PHINode::Create(PointerType::get(ST->getElementType(FieldNo), AS),
1184  PN->getNumIncomingValues(),
1185  PN->getName()+".f"+Twine(FieldNo), PN);
1186  Result = NewPN;
1187  PHIsToRewrite.push_back(std::make_pair(PN, FieldNo));
1188  }
1189 
1190  return FieldVals[FieldNo] = Result;
1191 }
1192 
1193 /// Given a load instruction and a value derived from the load, rewrite the
1194 /// derived value to use the HeapSRoA'd load.
1195 static void RewriteHeapSROALoadUser(Instruction *LoadUser,
1196  DenseMap<Value *, std::vector<Value *>> &InsertedScalarizedValues,
1197  std::vector<std::pair<PHINode *, unsigned>> &PHIsToRewrite) {
1198  // If this is a comparison against null, handle it.
1199  if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) {
1200  assert(isa<ConstantPointerNull>(SCI->getOperand(1)));
1201  // If we have a setcc of the loaded pointer, we can use a setcc of any
1202  // field.
1203  Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0,
1204  InsertedScalarizedValues, PHIsToRewrite);
1205 
1206  Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr,
1208  SCI->getName());
1209  SCI->replaceAllUsesWith(New);
1210  SCI->eraseFromParent();
1211  return;
1212  }
1213 
1214  // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...'
1215  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(LoadUser)) {
1216  assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2))
1217  && "Unexpected GEPI!");
1218 
1219  // Load the pointer for this field.
1220  unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
1221  Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo,
1222  InsertedScalarizedValues, PHIsToRewrite);
1223 
1224  // Create the new GEP idx vector.
1225  SmallVector<Value*, 8> GEPIdx;
1226  GEPIdx.push_back(GEPI->getOperand(1));
1227  GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end());
1228 
1229  Value *NGEPI = GetElementPtrInst::Create(GEPI->getResultElementType(), NewPtr, GEPIdx,
1230  GEPI->getName(), GEPI);
1231  GEPI->replaceAllUsesWith(NGEPI);
1232  GEPI->eraseFromParent();
1233  return;
1234  }
1235 
1236  // Recursively transform the users of PHI nodes. This will lazily create the
1237  // PHIs that are needed for individual elements. Keep track of what PHIs we
1238  // see in InsertedScalarizedValues so that we don't get infinite loops (very
1239  // antisocial). If the PHI is already in InsertedScalarizedValues, it has
1240  // already been seen first by another load, so its uses have already been
1241  // processed.
1242  PHINode *PN = cast<PHINode>(LoadUser);
1243  if (!InsertedScalarizedValues.insert(std::make_pair(PN,
1244  std::vector<Value *>())).second)
1245  return;
1246 
1247  // If this is the first time we've seen this PHI, recursively process all
1248  // users.
1249  for (auto UI = PN->user_begin(), E = PN->user_end(); UI != E;) {
1250  Instruction *User = cast<Instruction>(*UI++);
1251  RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
1252  }
1253 }
1254 
1255 /// We are performing Heap SRoA on a global. Ptr is a value loaded from the
1256 /// global. Eliminate all uses of Ptr, making them use FieldGlobals instead.
1257 /// All uses of loaded values satisfy AllGlobalLoadUsesSimpleEnoughForHeapSRA.
1259  DenseMap<Value *, std::vector<Value *>> &InsertedScalarizedValues,
1260  std::vector<std::pair<PHINode *, unsigned> > &PHIsToRewrite) {
1261  for (auto UI = Load->user_begin(), E = Load->user_end(); UI != E;) {
1262  Instruction *User = cast<Instruction>(*UI++);
1263  RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
1264  }
1265 
1266  if (Load->use_empty()) {
1267  Load->eraseFromParent();
1268  InsertedScalarizedValues.erase(Load);
1269  }
1270 }
1271 
1272 /// CI is an allocation of an array of structures. Break it up into multiple
1273 /// allocations of arrays of the fields.
1275  Value *NElems, const DataLayout &DL,
1276  const TargetLibraryInfo *TLI) {
1277  LLVM_DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI
1278  << '\n');
1279  Type *MAT = getMallocAllocatedType(CI, TLI);
1280  StructType *STy = cast<StructType>(MAT);
1281 
1282  // There is guaranteed to be at least one use of the malloc (storing
1283  // it into GV). If there are other uses, change them to be uses of
1284  // the global to simplify later code. This also deletes the store
1285  // into GV.
1287 
1288  // Okay, at this point, there are no users of the malloc. Insert N
1289  // new mallocs at the same place as CI, and N globals.
1290  std::vector<Value *> FieldGlobals;
1291  std::vector<Value *> FieldMallocs;
1292 
1294  CI->getOperandBundlesAsDefs(OpBundles);
1295 
1296  unsigned AS = GV->getType()->getPointerAddressSpace();
1297  for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){
1298  Type *FieldTy = STy->getElementType(FieldNo);
1299  PointerType *PFieldTy = PointerType::get(FieldTy, AS);
1300 
1301  GlobalVariable *NGV = new GlobalVariable(
1302  *GV->getParent(), PFieldTy, false, GlobalValue::InternalLinkage,
1303  Constant::getNullValue(PFieldTy), GV->getName() + ".f" + Twine(FieldNo),
1304  nullptr, GV->getThreadLocalMode());
1305  NGV->copyAttributesFrom(GV);
1306  FieldGlobals.push_back(NGV);
1307 
1308  unsigned TypeSize = DL.getTypeAllocSize(FieldTy);
1309  if (StructType *ST = dyn_cast<StructType>(FieldTy))
1310  TypeSize = DL.getStructLayout(ST)->getSizeInBytes();
1311  Type *IntPtrTy = DL.getIntPtrType(CI->getType());
1312  Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
1313  ConstantInt::get(IntPtrTy, TypeSize),
1314  NElems, OpBundles, nullptr,
1315  CI->getName() + ".f" + Twine(FieldNo));
1316  FieldMallocs.push_back(NMI);
1317  new StoreInst(NMI, NGV, CI);
1318  }
1319 
1320  // The tricky aspect of this transformation is handling the case when malloc
1321  // fails. In the original code, malloc failing would set the result pointer
1322  // of malloc to null. In this case, some mallocs could succeed and others
1323  // could fail. As such, we emit code that looks like this:
1324  // F0 = malloc(field0)
1325  // F1 = malloc(field1)
1326  // F2 = malloc(field2)
1327  // if (F0 == 0 || F1 == 0 || F2 == 0) {
1328  // if (F0) { free(F0); F0 = 0; }
1329  // if (F1) { free(F1); F1 = 0; }
1330  // if (F2) { free(F2); F2 = 0; }
1331  // }
1332  // The malloc can also fail if its argument is too large.
1333  Constant *ConstantZero = ConstantInt::get(CI->getArgOperand(0)->getType(), 0);
1334  Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getArgOperand(0),
1335  ConstantZero, "isneg");
1336  for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) {
1337  Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i],
1338  Constant::getNullValue(FieldMallocs[i]->getType()),
1339  "isnull");
1340  RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI);
1341  }
1342 
1343  // Split the basic block at the old malloc.
1344  BasicBlock *OrigBB = CI->getParent();
1345  BasicBlock *ContBB =
1346  OrigBB->splitBasicBlock(CI->getIterator(), "malloc_cont");
1347 
1348  // Create the block to check the first condition. Put all these blocks at the
1349  // end of the function as they are unlikely to be executed.
1350  BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(),
1351  "malloc_ret_null",
1352  OrigBB->getParent());
1353 
1354  // Remove the uncond branch from OrigBB to ContBB, turning it into a cond
1355  // branch on RunningOr.
1356  OrigBB->getTerminator()->eraseFromParent();
1357  BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB);
1358 
1359  // Within the NullPtrBlock, we need to emit a comparison and branch for each
1360  // pointer, because some may be null while others are not.
1361  for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
1362  Value *GVVal =
1363  new LoadInst(cast<GlobalVariable>(FieldGlobals[i])->getValueType(),
1364  FieldGlobals[i], "tmp", NullPtrBlock);
1365  Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal,
1366  Constant::getNullValue(GVVal->getType()));
1367  BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it",
1368  OrigBB->getParent());
1369  BasicBlock *NextBlock = BasicBlock::Create(Cmp->getContext(), "next",
1370  OrigBB->getParent());
1371  Instruction *BI = BranchInst::Create(FreeBlock, NextBlock,
1372  Cmp, NullPtrBlock);
1373 
1374  // Fill in FreeBlock.
1375  CallInst::CreateFree(GVVal, OpBundles, BI);
1376  new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i],
1377  FreeBlock);
1378  BranchInst::Create(NextBlock, FreeBlock);
1379 
1380  NullPtrBlock = NextBlock;
1381  }
1382 
1383  BranchInst::Create(ContBB, NullPtrBlock);
1384 
1385  // CI is no longer needed, remove it.
1386  CI->eraseFromParent();
1387 
1388  /// As we process loads, if we can't immediately update all uses of the load,
1389  /// keep track of what scalarized loads are inserted for a given load.
1390  DenseMap<Value *, std::vector<Value *>> InsertedScalarizedValues;
1391  InsertedScalarizedValues[GV] = FieldGlobals;
1392 
1393  std::vector<std::pair<PHINode *, unsigned>> PHIsToRewrite;
1394 
1395  // Okay, the malloc site is completely handled. All of the uses of GV are now
1396  // loads, and all uses of those loads are simple. Rewrite them to use loads
1397  // of the per-field globals instead.
1398  for (auto UI = GV->user_begin(), E = GV->user_end(); UI != E;) {
1399  Instruction *User = cast<Instruction>(*UI++);
1400 
1401  if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
1402  RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite);
1403  continue;
1404  }
1405 
1406  // Must be a store of null.
1407  StoreInst *SI = cast<StoreInst>(User);
1408  assert(isa<ConstantPointerNull>(SI->getOperand(0)) &&
1409  "Unexpected heap-sra user!");
1410 
1411  // Insert a store of null into each global.
1412  for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
1413  Type *ValTy = cast<GlobalValue>(FieldGlobals[i])->getValueType();
1414  Constant *Null = Constant::getNullValue(ValTy);
1415  new StoreInst(Null, FieldGlobals[i], SI);
1416  }
1417  // Erase the original store.
1418  SI->eraseFromParent();
1419  }
1420 
1421  // While we have PHIs that are interesting to rewrite, do it.
1422  while (!PHIsToRewrite.empty()) {
1423  PHINode *PN = PHIsToRewrite.back().first;
1424  unsigned FieldNo = PHIsToRewrite.back().second;
1425  PHIsToRewrite.pop_back();
1426  PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]);
1427  assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi");
1428 
1429  // Add all the incoming values. This can materialize more phis.
1430  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
1431  Value *InVal = PN->getIncomingValue(i);
1432  InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues,
1433  PHIsToRewrite);
1434  FieldPN->addIncoming(InVal, PN->getIncomingBlock(i));
1435  }
1436  }
1437 
1438  // Drop all inter-phi links and any loads that made it this far.
1439  for (DenseMap<Value *, std::vector<Value *>>::iterator
1440  I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
1441  I != E; ++I) {
1442  if (PHINode *PN = dyn_cast<PHINode>(I->first))
1443  PN->dropAllReferences();
1444  else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
1445  LI->dropAllReferences();
1446  }
1447 
1448  // Delete all the phis and loads now that inter-references are dead.
1449  for (DenseMap<Value *, std::vector<Value *>>::iterator
1450  I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
1451  I != E; ++I) {
1452  if (PHINode *PN = dyn_cast<PHINode>(I->first))
1453  PN->eraseFromParent();
1454  else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
1455  LI->eraseFromParent();
1456  }
1457 
1458  // The old global is now dead, remove it.
1459  GV->eraseFromParent();
1460 
1461  ++NumHeapSRA;
1462  return cast<GlobalVariable>(FieldGlobals[0]);
1463 }
1464 
1465 /// This function is called when we see a pointer global variable with a single
1466 /// value stored it that is a malloc or cast of malloc.
1468  Type *AllocTy,
1469  AtomicOrdering Ordering,
1470  const DataLayout &DL,
1471  TargetLibraryInfo *TLI) {
1472  // If this is a malloc of an abstract type, don't touch it.
1473  if (!AllocTy->isSized())
1474  return false;
1475 
1476  // We can't optimize this global unless all uses of it are *known* to be
1477  // of the malloc value, not of the null initializer value (consider a use
1478  // that compares the global's value against zero to see if the malloc has
1479  // been reached). To do this, we check to see if all uses of the global
1480  // would trap if the global were null: this proves that they must all
1481  // happen after the malloc.
1483  return false;
1484 
1485  // We can't optimize this if the malloc itself is used in a complex way,
1486  // for example, being stored into multiple globals. This allows the
1487  // malloc to be stored into the specified global, loaded icmp'd, and
1488  // GEP'd. These are all things we could transform to using the global
1489  // for.
1491  if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs))
1492  return false;
1493 
1494  // If we have a global that is only initialized with a fixed size malloc,
1495  // transform the program to use global memory instead of malloc'd memory.
1496  // This eliminates dynamic allocation, avoids an indirection accessing the
1497  // data, and exposes the resultant global to further GlobalOpt.
1498  // We cannot optimize the malloc if we cannot determine malloc array size.
1499  Value *NElems = getMallocArraySize(CI, DL, TLI, true);
1500  if (!NElems)
1501  return false;
1502 
1503  if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
1504  // Restrict this transformation to only working on small allocations
1505  // (2048 bytes currently), as we don't want to introduce a 16M global or
1506  // something.
1507  if (NElements->getZExtValue() * DL.getTypeAllocSize(AllocTy) < 2048) {
1508  OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, DL, TLI);
1509  return true;
1510  }
1511 
1512  // If the allocation is an array of structures, consider transforming this
1513  // into multiple malloc'd arrays, one for each field. This is basically
1514  // SRoA for malloc'd memory.
1515 
1516  if (Ordering != AtomicOrdering::NotAtomic)
1517  return false;
1518 
1519  // If this is an allocation of a fixed size array of structs, analyze as a
1520  // variable size array. malloc [100 x struct],1 -> malloc struct, 100
1521  if (NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1))
1522  if (ArrayType *AT = dyn_cast<ArrayType>(AllocTy))
1523  AllocTy = AT->getElementType();
1524 
1525  StructType *AllocSTy = dyn_cast<StructType>(AllocTy);
1526  if (!AllocSTy)
1527  return false;
1528 
1529  // This the structure has an unreasonable number of fields, leave it
1530  // alone.
1531  if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 &&
1533 
1534  // If this is a fixed size array, transform the Malloc to be an alloc of
1535  // structs. malloc [100 x struct],1 -> malloc struct, 100
1536  if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) {
1537  Type *IntPtrTy = DL.getIntPtrType(CI->getType());
1538  unsigned TypeSize = DL.getStructLayout(AllocSTy)->getSizeInBytes();
1539  Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
1540  Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
1542  CI->getOperandBundlesAsDefs(OpBundles);
1543  Instruction *Malloc =
1544  CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy, AllocSize, NumElements,
1545  OpBundles, nullptr, CI->getName());
1546  Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI);
1547  CI->replaceAllUsesWith(Cast);
1548  CI->eraseFromParent();
1549  if (BitCastInst *BCI = dyn_cast<BitCastInst>(Malloc))
1550  CI = cast<CallInst>(BCI->getOperand(0));
1551  else
1552  CI = cast<CallInst>(Malloc);
1553  }
1554 
1555  PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, DL, TLI, true), DL,
1556  TLI);
1557  return true;
1558  }
1559 
1560  return false;
1561 }
1562 
1563 // Try to optimize globals based on the knowledge that only one value (besides
1564 // its initializer) is ever stored to the global.
1565 static bool optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
1566  AtomicOrdering Ordering,
1567  const DataLayout &DL,
1568  TargetLibraryInfo *TLI) {
1569  // Ignore no-op GEPs and bitcasts.
1570  StoredOnceVal = StoredOnceVal->stripPointerCasts();
1571 
1572  // If we are dealing with a pointer global that is initialized to null and
1573  // only has one (non-null) value stored into it, then we can optimize any
1574  // users of the loaded value (often calls and loads) that would trap if the
1575  // value was null.
1576  if (GV->getInitializer()->getType()->isPointerTy() &&
1577  GV->getInitializer()->isNullValue() &&
1579  nullptr /* F */,
1581  if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) {
1582  if (GV->getInitializer()->getType() != SOVC->getType())
1583  SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
1584 
1585  // Optimize away any trapping uses of the loaded value.
1586  if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, DL, TLI))
1587  return true;
1588  } else if (CallInst *CI = extractMallocCall(StoredOnceVal, TLI)) {
1589  Type *MallocType = getMallocAllocatedType(CI, TLI);
1590  if (MallocType && tryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType,
1591  Ordering, DL, TLI))
1592  return true;
1593  }
1594  }
1595 
1596  return false;
1597 }
1598 
1599 /// At this point, we have learned that the only two values ever stored into GV
1600 /// are its initializer and OtherVal. See if we can shrink the global into a
1601 /// boolean and select between the two values whenever it is used. This exposes
1602 /// the values to other scalar optimizations.
1604  Type *GVElType = GV->getValueType();
1605 
1606  // If GVElType is already i1, it is already shrunk. If the type of the GV is
1607  // an FP value, pointer or vector, don't do this optimization because a select
1608  // between them is very expensive and unlikely to lead to later
1609  // simplification. In these cases, we typically end up with "cond ? v1 : v2"
1610  // where v1 and v2 both require constant pool loads, a big loss.
1611  if (GVElType == Type::getInt1Ty(GV->getContext()) ||
1612  GVElType->isFloatingPointTy() ||
1613  GVElType->isPointerTy() || GVElType->isVectorTy())
1614  return false;
1615 
1616  // Walk the use list of the global seeing if all the uses are load or store.
1617  // If there is anything else, bail out.
1618  for (User *U : GV->users())
1619  if (!isa<LoadInst>(U) && !isa<StoreInst>(U))
1620  return false;
1621 
1622  LLVM_DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV << "\n");
1623 
1624  // Create the new global, initializing it to false.
1626  false,
1629  GV->getName()+".b",
1630  GV->getThreadLocalMode(),
1631  GV->getType()->getAddressSpace());
1632  NewGV->copyAttributesFrom(GV);
1633  GV->getParent()->getGlobalList().insert(GV->getIterator(), NewGV);
1634 
1635  Constant *InitVal = GV->getInitializer();
1636  assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) &&
1637  "No reason to shrink to bool!");
1638 
1640  GV->getDebugInfo(GVs);
1641 
1642  // If initialized to zero and storing one into the global, we can use a cast
1643  // instead of a select to synthesize the desired value.
1644  bool IsOneZero = false;
1645  bool EmitOneOrZero = true;
1646  if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)){
1647  IsOneZero = InitVal->isNullValue() && CI->isOne();
1648 
1649  if (ConstantInt *CIInit = dyn_cast<ConstantInt>(GV->getInitializer())){
1650  uint64_t ValInit = CIInit->getZExtValue();
1651  uint64_t ValOther = CI->getZExtValue();
1652  uint64_t ValMinus = ValOther - ValInit;
1653 
1654  for(auto *GVe : GVs){
1655  DIGlobalVariable *DGV = GVe->getVariable();
1656  DIExpression *E = GVe->getExpression();
1657 
1658  // It is expected that the address of global optimized variable is on
1659  // top of the stack. After optimization, value of that variable will
1660  // be ether 0 for initial value or 1 for other value. The following
1661  // expression should return constant integer value depending on the
1662  // value at global object address:
1663  // val * (ValOther - ValInit) + ValInit:
1664  // DW_OP_deref DW_OP_constu <ValMinus>
1665  // DW_OP_mul DW_OP_constu <ValInit> DW_OP_plus DW_OP_stack_value
1667  dwarf::DW_OP_deref, dwarf::DW_OP_constu, ValMinus,
1668  dwarf::DW_OP_mul, dwarf::DW_OP_constu, ValInit,
1669  dwarf::DW_OP_plus};
1673  NewGV->addDebugInfo(DGVE);
1674  }
1675  EmitOneOrZero = false;
1676  }
1677  }
1678 
1679  if (EmitOneOrZero) {
1680  // FIXME: This will only emit address for debugger on which will
1681  // be written only 0 or 1.
1682  for(auto *GV : GVs)
1683  NewGV->addDebugInfo(GV);
1684  }
1685 
1686  while (!GV->use_empty()) {
1687  Instruction *UI = cast<Instruction>(GV->user_back());
1688  if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
1689  // Change the store into a boolean store.
1690  bool StoringOther = SI->getOperand(0) == OtherVal;
1691  // Only do this if we weren't storing a loaded value.
1692  Value *StoreVal;
1693  if (StoringOther || SI->getOperand(0) == InitVal) {
1694  StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()),
1695  StoringOther);
1696  } else {
1697  // Otherwise, we are storing a previously loaded copy. To do this,
1698  // change the copy from copying the original value to just copying the
1699  // bool.
1700  Instruction *StoredVal = cast<Instruction>(SI->getOperand(0));
1701 
1702  // If we've already replaced the input, StoredVal will be a cast or
1703  // select instruction. If not, it will be a load of the original
1704  // global.
1705  if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
1706  assert(LI->getOperand(0) == GV && "Not a copy!");
1707  // Insert a new load, to preserve the saved value.
1708  StoreVal = new LoadInst(NewGV->getValueType(), NewGV,
1709  LI->getName() + ".b", false, 0,
1710  LI->getOrdering(), LI->getSyncScopeID(), LI);
1711  } else {
1712  assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) &&
1713  "This is not a form that we understand!");
1714  StoreVal = StoredVal->getOperand(0);
1715  assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!");
1716  }
1717  }
1718  StoreInst *NSI =
1719  new StoreInst(StoreVal, NewGV, false, 0, SI->getOrdering(),
1720  SI->getSyncScopeID(), SI);
1721  NSI->setDebugLoc(SI->getDebugLoc());
1722  } else {
1723  // Change the load into a load of bool then a select.
1724  LoadInst *LI = cast<LoadInst>(UI);
1725  LoadInst *NLI =
1726  new LoadInst(NewGV->getValueType(), NewGV, LI->getName() + ".b",
1727  false, 0, LI->getOrdering(), LI->getSyncScopeID(), LI);
1728  Instruction *NSI;
1729  if (IsOneZero)
1730  NSI = new ZExtInst(NLI, LI->getType(), "", LI);
1731  else
1732  NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI);
1733  NSI->takeName(LI);
1734  // Since LI is split into two instructions, NLI and NSI both inherit the
1735  // same DebugLoc
1736  NLI->setDebugLoc(LI->getDebugLoc());
1737  NSI->setDebugLoc(LI->getDebugLoc());
1738  LI->replaceAllUsesWith(NSI);
1739  }
1740  UI->eraseFromParent();
1741  }
1742 
1743  // Retain the name of the old global variable. People who are debugging their
1744  // programs may expect these variables to be named the same.
1745  NewGV->takeName(GV);
1746  GV->eraseFromParent();
1747  return true;
1748 }
1749 
1750 static bool deleteIfDead(
1751  GlobalValue &GV, SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
1753 
1754  if (!GV.isDiscardableIfUnused() && !GV.isDeclaration())
1755  return false;
1756 
1757  if (const Comdat *C = GV.getComdat())
1758  if (!GV.hasLocalLinkage() && NotDiscardableComdats.count(C))
1759  return false;
1760 
1761  bool Dead;
1762  if (auto *F = dyn_cast<Function>(&GV))
1763  Dead = (F->isDeclaration() && F->use_empty()) || F->isDefTriviallyDead();
1764  else
1765  Dead = GV.use_empty();
1766  if (!Dead)
1767  return false;
1768 
1769  LLVM_DEBUG(dbgs() << "GLOBAL DEAD: " << GV << "\n");
1770  GV.eraseFromParent();
1771  ++NumDeleted;
1772  return true;
1773 }
1774 
1776  const Function *F, GlobalValue *GV,
1777  function_ref<DominatorTree &(Function &)> LookupDomTree) {
1778  // Find all uses of GV. We expect them all to be in F, and if we can't
1779  // identify any of the uses we bail out.
1780  //
1781  // On each of these uses, identify if the memory that GV points to is
1782  // used/required/live at the start of the function. If it is not, for example
1783  // if the first thing the function does is store to the GV, the GV can
1784  // possibly be demoted.
1785  //
1786  // We don't do an exhaustive search for memory operations - simply look
1787  // through bitcasts as they're quite common and benign.
1788  const DataLayout &DL = GV->getParent()->getDataLayout();
1791  for (auto *U : GV->users()) {
1792  if (Operator::getOpcode(U) == Instruction::BitCast) {
1793  for (auto *UU : U->users()) {
1794  if (auto *LI = dyn_cast<LoadInst>(UU))
1795  Loads.push_back(LI);
1796  else if (auto *SI = dyn_cast<StoreInst>(UU))
1797  Stores.push_back(SI);
1798  else
1799  return false;
1800  }
1801  continue;
1802  }
1803 
1805  if (!I)
1806  return false;
1807  assert(I->getParent()->getParent() == F);
1808 
1809  if (auto *LI = dyn_cast<LoadInst>(I))
1810  Loads.push_back(LI);
1811  else if (auto *SI = dyn_cast<StoreInst>(I))
1812  Stores.push_back(SI);
1813  else
1814  return false;
1815  }
1816 
1817  // We have identified all uses of GV into loads and stores. Now check if all
1818  // of them are known not to depend on the value of the global at the function
1819  // entry point. We do this by ensuring that every load is dominated by at
1820  // least one store.
1821  auto &DT = LookupDomTree(*const_cast<Function *>(F));
1822 
1823  // The below check is quadratic. Check we're not going to do too many tests.
1824  // FIXME: Even though this will always have worst-case quadratic time, we
1825  // could put effort into minimizing the average time by putting stores that
1826  // have been shown to dominate at least one load at the beginning of the
1827  // Stores array, making subsequent dominance checks more likely to succeed
1828  // early.
1829  //
1830  // The threshold here is fairly large because global->local demotion is a
1831  // very powerful optimization should it fire.
1832  const unsigned Threshold = 100;
1833  if (Loads.size() * Stores.size() > Threshold)
1834  return false;
1835 
1836  for (auto *L : Loads) {
1837  auto *LTy = L->getType();
1838  if (none_of(Stores, [&](const StoreInst *S) {
1839  auto *STy = S->getValueOperand()->getType();
1840  // The load is only dominated by the store if DomTree says so
1841  // and the number of bits loaded in L is less than or equal to
1842  // the number of bits stored in S.
1843  return DT.dominates(S, L) &&
1844  DL.getTypeStoreSize(LTy) <= DL.getTypeStoreSize(STy);
1845  }))
1846  return false;
1847  }
1848  // All loads have known dependences inside F, so the global can be localized.
1849  return true;
1850 }
1851 
1852 /// C may have non-instruction users. Can all of those users be turned into
1853 /// instructions?
1855  // We don't do this exhaustively. The most common pattern that we really need
1856  // to care about is a constant GEP or constant bitcast - so just looking
1857  // through one single ConstantExpr.
1858  //
1859  // The set of constants that this function returns true for must be able to be
1860  // handled by makeAllConstantUsesInstructions.
1861  for (auto *U : C->users()) {
1862  if (isa<Instruction>(U))
1863  continue;
1864  if (!isa<ConstantExpr>(U))
1865  // Non instruction, non-constantexpr user; cannot convert this.
1866  return false;
1867  for (auto *UU : U->users())
1868  if (!isa<Instruction>(UU))
1869  // A constantexpr used by another constant. We don't try and recurse any
1870  // further but just bail out at this point.
1871  return false;
1872  }
1873 
1874  return true;
1875 }
1876 
1877 /// C may have non-instruction users, and
1878 /// allNonInstructionUsersCanBeMadeInstructions has returned true. Convert the
1879 /// non-instruction users to instructions.
1882  for (auto *U : C->users()) {
1883  if (isa<ConstantExpr>(U))
1884  Users.push_back(cast<ConstantExpr>(U));
1885  else
1886  // We should never get here; allNonInstructionUsersCanBeMadeInstructions
1887  // should not have returned true for C.
1888  assert(
1889  isa<Instruction>(U) &&
1890  "Can't transform non-constantexpr non-instruction to instruction!");
1891  }
1892 
1893  SmallVector<Value*,4> UUsers;
1894  for (auto *U : Users) {
1895  UUsers.clear();
1896  for (auto *UU : U->users())
1897  UUsers.push_back(UU);
1898  for (auto *UU : UUsers) {
1899  Instruction *UI = cast<Instruction>(UU);
1900  Instruction *NewU = U->getAsInstruction();
1901  NewU->insertBefore(UI);
1902  UI->replaceUsesOfWith(U, NewU);
1903  }
1904  // We've replaced all the uses, so destroy the constant. (destroyConstant
1905  // will update value handles and metadata.)
1906  U->destroyConstant();
1907  }
1908 }
1909 
1910 /// Analyze the specified global variable and optimize
1911 /// it if possible. If we make a change, return true.
1913  GlobalVariable *GV, const GlobalStatus &GS, TargetLibraryInfo *TLI,
1914  function_ref<DominatorTree &(Function &)> LookupDomTree) {
1915  auto &DL = GV->getParent()->getDataLayout();
1916  // If this is a first class global and has only one accessing function and
1917  // this function is non-recursive, we replace the global with a local alloca
1918  // in this function.
1919  //
1920  // NOTE: It doesn't make sense to promote non-single-value types since we
1921  // are just replacing static memory to stack memory.
1922  //
1923  // If the global is in different address space, don't bring it to stack.
1925  GS.AccessingFunction &&
1926  GV->getValueType()->isSingleValueType() &&
1927  GV->getType()->getAddressSpace() == 0 &&
1928  !GV->isExternallyInitialized() &&
1932  LookupDomTree)) {
1933  const DataLayout &DL = GV->getParent()->getDataLayout();
1934 
1935  LLVM_DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV << "\n");
1936  Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction
1937  ->getEntryBlock().begin());
1938  Type *ElemTy = GV->getValueType();
1939  // FIXME: Pass Global's alignment when globals have alignment
1940  AllocaInst *Alloca = new AllocaInst(ElemTy, DL.getAllocaAddrSpace(), nullptr,
1941  GV->getName(), &FirstI);
1942  if (!isa<UndefValue>(GV->getInitializer()))
1943  new StoreInst(GV->getInitializer(), Alloca, &FirstI);
1944 
1946 
1947  GV->replaceAllUsesWith(Alloca);
1948  GV->eraseFromParent();
1949  ++NumLocalized;
1950  return true;
1951  }
1952 
1953  // If the global is never loaded (but may be stored to), it is dead.
1954  // Delete it now.
1955  if (!GS.IsLoaded) {
1956  LLVM_DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV << "\n");
1957 
1958  bool Changed;
1959  if (isLeakCheckerRoot(GV)) {
1960  // Delete any constant stores to the global.
1961  Changed = CleanupPointerRootUsers(GV, TLI);
1962  } else {
1963  // Delete any stores we can find to the global. We may not be able to
1964  // make it completely dead though.
1965  Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI);
1966  }
1967 
1968  // If the global is dead now, delete it.
1969  if (GV->use_empty()) {
1970  GV->eraseFromParent();
1971  ++NumDeleted;
1972  Changed = true;
1973  }
1974  return Changed;
1975 
1976  }
1978  LLVM_DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n");
1979  GV->setConstant(true);
1980 
1981  // Clean up any obviously simplifiable users now.
1982  CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI);
1983 
1984  // If the global is dead now, just nuke it.
1985  if (GV->use_empty()) {
1986  LLVM_DEBUG(dbgs() << " *** Marking constant allowed us to simplify "
1987  << "all users and delete global!\n");
1988  GV->eraseFromParent();
1989  ++NumDeleted;
1990  return true;
1991  }
1992 
1993  // Fall through to the next check; see if we can optimize further.
1994  ++NumMarked;
1995  }
1996  if (!GV->getInitializer()->getType()->isSingleValueType()) {
1997  const DataLayout &DL = GV->getParent()->getDataLayout();
1998  if (SRAGlobal(GV, DL))
1999  return true;
2000  }
2002  // If the initial value for the global was an undef value, and if only
2003  // one other value was stored into it, we can just change the
2004  // initializer to be the stored value, then delete all stores to the
2005  // global. This allows us to mark it constant.
2006  if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
2007  if (isa<UndefValue>(GV->getInitializer())) {
2008  // Change the initial value here.
2009  GV->setInitializer(SOVConstant);
2010 
2011  // Clean up any obviously simplifiable users now.
2012  CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI);
2013 
2014  if (GV->use_empty()) {
2015  LLVM_DEBUG(dbgs() << " *** Substituting initializer allowed us to "
2016  << "simplify all users and delete global!\n");
2017  GV->eraseFromParent();
2018  ++NumDeleted;
2019  }
2020  ++NumSubstitute;
2021  return true;
2022  }
2023 
2024  // Try to optimize globals based on the knowledge that only one value
2025  // (besides its initializer) is ever stored to the global.
2026  if (optimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, DL, TLI))
2027  return true;
2028 
2029  // Otherwise, if the global was not a boolean, we can shrink it to be a
2030  // boolean.
2031  if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) {
2032  if (GS.Ordering == AtomicOrdering::NotAtomic) {
2033  if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
2034  ++NumShrunkToBool;
2035  return true;
2036  }
2037  }
2038  }
2039  }
2040 
2041  return false;
2042 }
2043 
2044 /// Analyze the specified global variable and optimize it if possible. If we
2045 /// make a change, return true.
2046 static bool
2048  function_ref<DominatorTree &(Function &)> LookupDomTree) {
2049  if (GV.getName().startswith("llvm."))
2050  return false;
2051 
2052  GlobalStatus GS;
2053 
2054  if (GlobalStatus::analyzeGlobal(&GV, GS))
2055  return false;
2056 
2057  bool Changed = false;
2058  if (!GS.IsCompared && !GV.hasGlobalUnnamedAddr()) {
2059  auto NewUnnamedAddr = GV.hasLocalLinkage() ? GlobalValue::UnnamedAddr::Global
2061  if (NewUnnamedAddr != GV.getUnnamedAddr()) {
2062  GV.setUnnamedAddr(NewUnnamedAddr);
2063  NumUnnamed++;
2064  Changed = true;
2065  }
2066  }
2067 
2068  // Do more involved optimizations if the global is internal.
2069  if (!GV.hasLocalLinkage())
2070  return Changed;
2071 
2072  auto *GVar = dyn_cast<GlobalVariable>(&GV);
2073  if (!GVar)
2074  return Changed;
2075 
2076  if (GVar->isConstant() || !GVar->hasInitializer())
2077  return Changed;
2078 
2079  return processInternalGlobal(GVar, GS, TLI, LookupDomTree) || Changed;
2080 }
2081 
2082 /// Walk all of the direct calls of the specified function, changing them to
2083 /// FastCC.
2085  for (User *U : F->users()) {
2086  if (isa<BlockAddress>(U))
2087  continue;
2088  CallSite CS(cast<Instruction>(U));
2090  }
2091 }
2092 
2094  // There can be at most one attribute set with a nest attribute.
2095  unsigned NestIndex;
2096  if (Attrs.hasAttrSomewhere(Attribute::Nest, &NestIndex))
2097  return Attrs.removeAttribute(C, NestIndex, Attribute::Nest);
2098  return Attrs;
2099 }
2100 
2103  for (User *U : F->users()) {
2104  if (isa<BlockAddress>(U))
2105  continue;
2106  CallSite CS(cast<Instruction>(U));
2108  }
2109 }
2110 
2111 /// Return true if this is a calling convention that we'd like to change. The
2112 /// idea here is that we don't want to mess with the convention if the user
2113 /// explicitly requested something with performance implications like coldcc,
2114 /// GHC, or anyregcc.
2115 static bool hasChangeableCC(Function *F) {
2116  CallingConv::ID CC = F->getCallingConv();
2117 
2118  // FIXME: Is it worth transforming x86_stdcallcc and x86_fastcallcc?
2119  if (CC != CallingConv::C && CC != CallingConv::X86_ThisCall)
2120  return false;
2121 
2122  // Don't break the invariant that the inalloca parameter is the only parameter
2123  // passed in memory.
2124  // FIXME: GlobalOpt should remove inalloca when possible and hoist the dynamic
2125  // alloca it uses to the entry block if possible.
2126  if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca))
2127  return false;
2128 
2129  // FIXME: Change CC for the whole chain of musttail calls when possible.
2130  //
2131  // Can't change CC of the function that either has musttail calls, or is a
2132  // musttail callee itself
2133  for (User *U : F->users()) {
2134  if (isa<BlockAddress>(U))
2135  continue;
2136  CallInst* CI = dyn_cast<CallInst>(U);
2137  if (!CI)
2138  continue;
2139 
2140  if (CI->isMustTailCall())
2141  return false;
2142  }
2143 
2144  for (BasicBlock &BB : *F)
2145  if (BB.getTerminatingMustTailCall())
2146  return false;
2147 
2148  return true;
2149 }
2150 
2151 /// Return true if the block containing the call site has a BlockFrequency of
2152 /// less than ColdCCRelFreq% of the entry block.
2153 static bool isColdCallSite(CallSite CS, BlockFrequencyInfo &CallerBFI) {
2154  const BranchProbability ColdProb(ColdCCRelFreq, 100);
2155  auto CallSiteBB = CS.getInstruction()->getParent();
2156  auto CallSiteFreq = CallerBFI.getBlockFreq(CallSiteBB);
2157  auto CallerEntryFreq =
2158  CallerBFI.getBlockFreq(&(CS.getCaller()->getEntryBlock()));
2159  return CallSiteFreq < CallerEntryFreq * ColdProb;
2160 }
2161 
2162 // This function checks if the input function F is cold at all call sites. It
2163 // also looks each call site's containing function, returning false if the
2164 // caller function contains other non cold calls. The input vector AllCallsCold
2165 // contains a list of functions that only have call sites in cold blocks.
2166 static bool
2169  const std::vector<Function *> &AllCallsCold) {
2170 
2171  if (F.user_empty())
2172  return false;
2173 
2174  for (User *U : F.users()) {
2175  if (isa<BlockAddress>(U))
2176  continue;
2177 
2178  CallSite CS(cast<Instruction>(U));
2179  Function *CallerFunc = CS.getInstruction()->getParent()->getParent();
2180  BlockFrequencyInfo &CallerBFI = GetBFI(*CallerFunc);
2181  if (!isColdCallSite(CS, CallerBFI))
2182  return false;
2183  auto It = std::find(AllCallsCold.begin(), AllCallsCold.end(), CallerFunc);
2184  if (It == AllCallsCold.end())
2185  return false;
2186  }
2187  return true;
2188 }
2189 
2191  for (User *U : F->users()) {
2192  if (isa<BlockAddress>(U))
2193  continue;
2194  CallSite CS(cast<Instruction>(U));
2196  }
2197 }
2198 
2199 // This function iterates over all the call instructions in the input Function
2200 // and checks that all call sites are in cold blocks and are allowed to use the
2201 // coldcc calling convention.
2202 static bool
2204  function_ref<BlockFrequencyInfo &(Function &)> GetBFI) {
2205  for (BasicBlock &BB : F) {
2206  for (Instruction &I : BB) {
2207  if (CallInst *CI = dyn_cast<CallInst>(&I)) {
2208  CallSite CS(cast<Instruction>(CI));
2209  // Skip over isline asm instructions since they aren't function calls.
2210  if (CI->isInlineAsm())
2211  continue;
2212  Function *CalledFn = CI->getCalledFunction();
2213  if (!CalledFn)
2214  return false;
2215  if (!CalledFn->hasLocalLinkage())
2216  return false;
2217  // Skip over instrinsics since they won't remain as function calls.
2218  if (CalledFn->getIntrinsicID() != Intrinsic::not_intrinsic)
2219  continue;
2220  // Check if it's valid to use coldcc calling convention.
2221  if (!hasChangeableCC(CalledFn) || CalledFn->isVarArg() ||
2222  CalledFn->hasAddressTaken())
2223  return false;
2224  BlockFrequencyInfo &CallerBFI = GetBFI(F);
2225  if (!isColdCallSite(CS, CallerBFI))
2226  return false;
2227  }
2228  }
2229  }
2230  return true;
2231 }
2232 
2233 static bool
2237  function_ref<DominatorTree &(Function &)> LookupDomTree,
2238  SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
2239 
2240  bool Changed = false;
2241 
2242  std::vector<Function *> AllCallsCold;
2243  for (Module::iterator FI = M.begin(), E = M.end(); FI != E;) {
2244  Function *F = &*FI++;
2245  if (hasOnlyColdCalls(*F, GetBFI))
2246  AllCallsCold.push_back(F);
2247  }
2248 
2249  // Optimize functions.
2250  for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) {
2251  Function *F = &*FI++;
2252 
2253  // Don't perform global opt pass on naked functions; we don't want fast
2254  // calling conventions for naked functions.
2255  if (F->hasFnAttribute(Attribute::Naked))
2256  continue;
2257 
2258  // Functions without names cannot be referenced outside this module.
2259  if (!F->hasName() && !F->isDeclaration() && !F->hasLocalLinkage())
2261 
2262  if (deleteIfDead(*F, NotDiscardableComdats)) {
2263  Changed = true;
2264  continue;
2265  }
2266 
2267  // LLVM's definition of dominance allows instructions that are cyclic
2268  // in unreachable blocks, e.g.:
2269  // %pat = select i1 %condition, @global, i16* %pat
2270  // because any instruction dominates an instruction in a block that's
2271  // not reachable from entry.
2272  // So, remove unreachable blocks from the function, because a) there's
2273  // no point in analyzing them and b) GlobalOpt should otherwise grow
2274  // some more complicated logic to break these cycles.
2275  // Removing unreachable blocks might invalidate the dominator so we
2276  // recalculate it.
2277  if (!F->isDeclaration()) {
2278  if (removeUnreachableBlocks(*F)) {
2279  auto &DT = LookupDomTree(*F);
2280  DT.recalculate(*F);
2281  Changed = true;
2282  }
2283  }
2284 
2285  Changed |= processGlobal(*F, TLI, LookupDomTree);
2286 
2287  if (!F->hasLocalLinkage())
2288  continue;
2289 
2290  if (hasChangeableCC(F) && !F->isVarArg() && !F->hasAddressTaken()) {
2291  NumInternalFunc++;
2292  TargetTransformInfo &TTI = GetTTI(*F);
2293  // Change the calling convention to coldcc if either stress testing is
2294  // enabled or the target would like to use coldcc on functions which are
2295  // cold at all call sites and the callers contain no other non coldcc
2296  // calls.
2297  if (EnableColdCCStressTest ||
2298  (isValidCandidateForColdCC(*F, GetBFI, AllCallsCold) &&
2299  TTI.useColdCCForColdCall(*F))) {
2302  Changed = true;
2303  NumColdCC++;
2304  }
2305  }
2306 
2307  if (hasChangeableCC(F) && !F->isVarArg() &&
2308  !F->hasAddressTaken()) {
2309  // If this function has a calling convention worth changing, is not a
2310  // varargs function, and is only called directly, promote it to use the
2311  // Fast calling convention.
2314  ++NumFastCallFns;
2315  Changed = true;
2316  }
2317 
2318  if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) &&
2319  !F->hasAddressTaken()) {
2320  // The function is not used by a trampoline intrinsic, so it is safe
2321  // to remove the 'nest' attribute.
2323  ++NumNestRemoved;
2324  Changed = true;
2325  }
2326  }
2327  return Changed;
2328 }
2329 
2330 static bool
2332  function_ref<DominatorTree &(Function &)> LookupDomTree,
2333  SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
2334  bool Changed = false;
2335 
2336  for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
2337  GVI != E; ) {
2338  GlobalVariable *GV = &*GVI++;
2339  // Global variables without names cannot be referenced outside this module.
2340  if (!GV->hasName() && !GV->isDeclaration() && !GV->hasLocalLinkage())
2342  // Simplify the initializer.
2343  if (GV->hasInitializer())
2344  if (auto *C = dyn_cast<Constant>(GV->getInitializer())) {
2345  auto &DL = M.getDataLayout();
2346  Constant *New = ConstantFoldConstant(C, DL, TLI);
2347  if (New && New != C)
2348  GV->setInitializer(New);
2349  }
2350 
2351  if (deleteIfDead(*GV, NotDiscardableComdats)) {
2352  Changed = true;
2353  continue;
2354  }
2355 
2356  Changed |= processGlobal(*GV, TLI, LookupDomTree);
2357  }
2358  return Changed;
2359 }
2360 
2361 /// Evaluate a piece of a constantexpr store into a global initializer. This
2362 /// returns 'Init' modified to reflect 'Val' stored into it. At this point, the
2363 /// GEP operands of Addr [0, OpNo) have been stepped into.
2365  ConstantExpr *Addr, unsigned OpNo) {
2366  // Base case of the recursion.
2367  if (OpNo == Addr->getNumOperands()) {
2368  assert(Val->getType() == Init->getType() && "Type mismatch!");
2369  return Val;
2370  }
2371 
2373  if (StructType *STy = dyn_cast<StructType>(Init->getType())) {
2374  // Break up the constant into its elements.
2375  for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
2376  Elts.push_back(Init->getAggregateElement(i));
2377 
2378  // Replace the element that we are supposed to.
2379  ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo));
2380  unsigned Idx = CU->getZExtValue();
2381  assert(Idx < STy->getNumElements() && "Struct index out of range!");
2382  Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1);
2383 
2384  // Return the modified struct.
2385  return ConstantStruct::get(STy, Elts);
2386  }
2387 
2388  ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo));
2389  SequentialType *InitTy = cast<SequentialType>(Init->getType());
2390  uint64_t NumElts = InitTy->getNumElements();
2391 
2392  // Break up the array into elements.
2393  for (uint64_t i = 0, e = NumElts; i != e; ++i)
2394  Elts.push_back(Init->getAggregateElement(i));
2395 
2396  assert(CI->getZExtValue() < NumElts);
2397  Elts[CI->getZExtValue()] =
2398  EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
2399 
2400  if (Init->getType()->isArrayTy())
2401  return ConstantArray::get(cast<ArrayType>(InitTy), Elts);
2402  return ConstantVector::get(Elts);
2403 }
2404 
2405 /// We have decided that Addr (which satisfies the predicate
2406 /// isSimpleEnoughPointerToCommit) should get Val as its value. Make it happen.
2407 static void CommitValueTo(Constant *Val, Constant *Addr) {
2408  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
2409  assert(GV->hasInitializer());
2410  GV->setInitializer(Val);
2411  return;
2412  }
2413 
2414  ConstantExpr *CE = cast<ConstantExpr>(Addr);
2415  GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
2416  GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2));
2417 }
2418 
2419 /// Given a map of address -> value, where addresses are expected to be some form
2420 /// of either a global or a constant GEP, set the initializer for the address to
2421 /// be the value. This performs mostly the same function as CommitValueTo()
2422 /// and EvaluateStoreInto() but is optimized to be more efficient for the common
2423 /// case where the set of addresses are GEPs sharing the same underlying global,
2424 /// processing the GEPs in batches rather than individually.
2425 ///
2426 /// To give an example, consider the following C++ code adapted from the clang
2427 /// regression tests:
2428 /// struct S {
2429 /// int n = 10;
2430 /// int m = 2 * n;
2431 /// S(int a) : n(a) {}
2432 /// };
2433 ///
2434 /// template<typename T>
2435 /// struct U {
2436 /// T *r = &q;
2437 /// T q = 42;
2438 /// U *p = this;
2439 /// };
2440 ///
2441 /// U<S> e;
2442 ///
2443 /// The global static constructor for 'e' will need to initialize 'r' and 'p' of
2444 /// the outer struct, while also initializing the inner 'q' structs 'n' and 'm'
2445 /// members. This batch algorithm will simply use general CommitValueTo() method
2446 /// to handle the complex nested S struct initialization of 'q', before
2447 /// processing the outermost members in a single batch. Using CommitValueTo() to
2448 /// handle member in the outer struct is inefficient when the struct/array is
2449 /// very large as we end up creating and destroy constant arrays for each
2450 /// initialization.
2451 /// For the above case, we expect the following IR to be generated:
2452 ///
2453 /// %struct.U = type { %struct.S*, %struct.S, %struct.U* }
2454 /// %struct.S = type { i32, i32 }
2455 /// @e = global %struct.U { %struct.S* gep inbounds (%struct.U, %struct.U* @e,
2456 /// i64 0, i32 1),
2457 /// %struct.S { i32 42, i32 84 }, %struct.U* @e }
2458 /// The %struct.S { i32 42, i32 84 } inner initializer is treated as a complex
2459 /// constant expression, while the other two elements of @e are "simple".
2464  SimpleCEs.reserve(Mem.size());
2465 
2466  for (const auto &I : Mem) {
2467  if (auto *GV = dyn_cast<GlobalVariable>(I.first)) {
2468  GVs.push_back(std::make_pair(GV, I.second));
2469  } else {
2470  ConstantExpr *GEP = cast<ConstantExpr>(I.first);
2471  // We don't handle the deeply recursive case using the batch method.
2472  if (GEP->getNumOperands() > 3)
2473  ComplexCEs.push_back(std::make_pair(GEP, I.second));
2474  else
2475  SimpleCEs.push_back(std::make_pair(GEP, I.second));
2476  }
2477  }
2478 
2479  // The algorithm below doesn't handle cases like nested structs, so use the
2480  // slower fully general method if we have to.
2481  for (auto ComplexCE : ComplexCEs)
2482  CommitValueTo(ComplexCE.second, ComplexCE.first);
2483 
2484  for (auto GVPair : GVs) {
2485  assert(GVPair.first->hasInitializer());
2486  GVPair.first->setInitializer(GVPair.second);
2487  }
2488 
2489  if (SimpleCEs.empty())
2490  return;
2491 
2492  // We cache a single global's initializer elements in the case where the
2493  // subsequent address/val pair uses the same one. This avoids throwing away and
2494  // rebuilding the constant struct/vector/array just because one element is
2495  // modified at a time.
2497  Elts.reserve(SimpleCEs.size());
2498  GlobalVariable *CurrentGV = nullptr;
2499 
2500  auto commitAndSetupCache = [&](GlobalVariable *GV, bool Update) {
2501  Constant *Init = GV->getInitializer();
2502  Type *Ty = Init->getType();
2503  if (Update) {
2504  if (CurrentGV) {
2505  assert(CurrentGV && "Expected a GV to commit to!");
2506  Type *CurrentInitTy = CurrentGV->getInitializer()->getType();
2507  // We have a valid cache that needs to be committed.
2508  if (StructType *STy = dyn_cast<StructType>(CurrentInitTy))
2509  CurrentGV->setInitializer(ConstantStruct::get(STy, Elts));
2510  else if (ArrayType *ArrTy = dyn_cast<ArrayType>(CurrentInitTy))
2511  CurrentGV->setInitializer(ConstantArray::get(ArrTy, Elts));
2512  else
2513  CurrentGV->setInitializer(ConstantVector::get(Elts));
2514  }
2515  if (CurrentGV == GV)
2516  return;
2517  // Need to clear and set up cache for new initializer.
2518  CurrentGV = GV;
2519  Elts.clear();
2520  unsigned NumElts;
2521  if (auto *STy = dyn_cast<StructType>(Ty))
2522  NumElts = STy->getNumElements();
2523  else
2524  NumElts = cast<SequentialType>(Ty)->getNumElements();
2525  for (unsigned i = 0, e = NumElts; i != e; ++i)
2526  Elts.push_back(Init->getAggregateElement(i));
2527  }
2528  };
2529 
2530  for (auto CEPair : SimpleCEs) {
2531  ConstantExpr *GEP = CEPair.first;
2532  Constant *Val = CEPair.second;
2533 
2534  GlobalVariable *GV = cast<GlobalVariable>(GEP->getOperand(0));
2535  commitAndSetupCache(GV, GV != CurrentGV);
2536  ConstantInt *CI = cast<ConstantInt>(GEP->getOperand(2));
2537  Elts[CI->getZExtValue()] = Val;
2538  }
2539  // The last initializer in the list needs to be committed, others
2540  // will be committed on a new initializer being processed.
2541  commitAndSetupCache(CurrentGV, true);
2542 }
2543 
2544 /// Evaluate static constructors in the function, if we can. Return true if we
2545 /// can, false otherwise.
2547  TargetLibraryInfo *TLI) {
2548  // Call the function.
2549  Evaluator Eval(DL, TLI);
2550  Constant *RetValDummy;
2551  bool EvalSuccess = Eval.EvaluateFunction(F, RetValDummy,
2553 
2554  if (EvalSuccess) {
2555  ++NumCtorsEvaluated;
2556 
2557  // We succeeded at evaluation: commit the result.
2558  LLVM_DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
2559  << F->getName() << "' to "
2560  << Eval.getMutatedMemory().size() << " stores.\n");
2562  for (GlobalVariable *GV : Eval.getInvariants())
2563  GV->setConstant(true);
2564  }
2565 
2566  return EvalSuccess;
2567 }
2568 
2569 static int compareNames(Constant *const *A, Constant *const *B) {
2570  Value *AStripped = (*A)->stripPointerCastsNoFollowAliases();
2571  Value *BStripped = (*B)->stripPointerCastsNoFollowAliases();
2572  return AStripped->getName().compare(BStripped->getName());
2573 }
2574 
2577  if (Init.empty()) {
2578  V.eraseFromParent();
2579  return;
2580  }
2581 
2582  // Type of pointer to the array of pointers.
2583  PointerType *Int8PtrTy = Type::getInt8PtrTy(V.getContext(), 0);
2584 
2585  SmallVector<Constant *, 8> UsedArray;
2586  for (GlobalValue *GV : Init) {
2587  Constant *Cast
2589  UsedArray.push_back(Cast);
2590  }
2591  // Sort to get deterministic order.
2592  array_pod_sort(UsedArray.begin(), UsedArray.end(), compareNames);
2593  ArrayType *ATy = ArrayType::get(Int8PtrTy, UsedArray.size());
2594 
2595  Module *M = V.getParent();
2596  V.removeFromParent();
2597  GlobalVariable *NV =
2598  new GlobalVariable(*M, ATy, false, GlobalValue::AppendingLinkage,
2599  ConstantArray::get(ATy, UsedArray), "");
2600  NV->takeName(&V);
2601  NV->setSection("llvm.metadata");
2602  delete &V;
2603 }
2604 
2605 namespace {
2606 
2607 /// An easy to access representation of llvm.used and llvm.compiler.used.
2608 class LLVMUsed {
2610  SmallPtrSet<GlobalValue *, 8> CompilerUsed;
2611  GlobalVariable *UsedV;
2612  GlobalVariable *CompilerUsedV;
2613 
2614 public:
2615  LLVMUsed(Module &M) {
2616  UsedV = collectUsedGlobalVariables(M, Used, false);
2617  CompilerUsedV = collectUsedGlobalVariables(M, CompilerUsed, true);
2618  }
2619 
2620  using iterator = SmallPtrSet<GlobalValue *, 8>::iterator;
2621  using used_iterator_range = iterator_range<iterator>;
2622 
2623  iterator usedBegin() { return Used.begin(); }
2624  iterator usedEnd() { return Used.end(); }
2625 
2626  used_iterator_range used() {
2627  return used_iterator_range(usedBegin(), usedEnd());
2628  }
2629 
2630  iterator compilerUsedBegin() { return CompilerUsed.begin(); }
2631  iterator compilerUsedEnd() { return CompilerUsed.end(); }
2632 
2633  used_iterator_range compilerUsed() {
2634  return used_iterator_range(compilerUsedBegin(), compilerUsedEnd());
2635  }
2636 
2637  bool usedCount(GlobalValue *GV) const { return Used.count(GV); }
2638 
2639  bool compilerUsedCount(GlobalValue *GV) const {
2640  return CompilerUsed.count(GV);
2641  }
2642 
2643  bool usedErase(GlobalValue *GV) { return Used.erase(GV); }
2644  bool compilerUsedErase(GlobalValue *GV) { return CompilerUsed.erase(GV); }
2645  bool usedInsert(GlobalValue *GV) { return Used.insert(GV).second; }
2646 
2647  bool compilerUsedInsert(GlobalValue *GV) {
2648  return CompilerUsed.insert(GV).second;
2649  }
2650 
2651  void syncVariablesAndSets() {
2652  if (UsedV)
2653  setUsedInitializer(*UsedV, Used);
2654  if (CompilerUsedV)
2655  setUsedInitializer(*CompilerUsedV, CompilerUsed);
2656  }
2657 };
2658 
2659 } // end anonymous namespace
2660 
2661 static bool hasUseOtherThanLLVMUsed(GlobalAlias &GA, const LLVMUsed &U) {
2662  if (GA.use_empty()) // No use at all.
2663  return false;
2664 
2665  assert((!U.usedCount(&GA) || !U.compilerUsedCount(&GA)) &&
2666  "We should have removed the duplicated "
2667  "element from llvm.compiler.used");
2668  if (!GA.hasOneUse())
2669  // Strictly more than one use. So at least one is not in llvm.used and
2670  // llvm.compiler.used.
2671  return true;
2672 
2673  // Exactly one use. Check if it is in llvm.used or llvm.compiler.used.
2674  return !U.usedCount(&GA) && !U.compilerUsedCount(&GA);
2675 }
2676 
2678  const LLVMUsed &U) {
2679  unsigned N = 2;
2680  assert((!U.usedCount(&V) || !U.compilerUsedCount(&V)) &&
2681  "We should have removed the duplicated "
2682  "element from llvm.compiler.used");
2683  if (U.usedCount(&V) || U.compilerUsedCount(&V))
2684  ++N;
2685  return V.hasNUsesOrMore(N);
2686 }
2687 
2688 static bool mayHaveOtherReferences(GlobalAlias &GA, const LLVMUsed &U) {
2689  if (!GA.hasLocalLinkage())
2690  return true;
2691 
2692  return U.usedCount(&GA) || U.compilerUsedCount(&GA);
2693 }
2694 
2695 static bool hasUsesToReplace(GlobalAlias &GA, const LLVMUsed &U,
2696  bool &RenameTarget) {
2697  RenameTarget = false;
2698  bool Ret = false;
2699  if (hasUseOtherThanLLVMUsed(GA, U))
2700  Ret = true;
2701 
2702  // If the alias is externally visible, we may still be able to simplify it.
2703  if (!mayHaveOtherReferences(GA, U))
2704  return Ret;
2705 
2706  // If the aliasee has internal linkage, give it the name and linkage
2707  // of the alias, and delete the alias. This turns:
2708  // define internal ... @f(...)
2709  // @a = alias ... @f
2710  // into:
2711  // define ... @a(...)
2712  Constant *Aliasee = GA.getAliasee();
2713  GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts());
2714  if (!Target->hasLocalLinkage())
2715  return Ret;
2716 
2717  // Do not perform the transform if multiple aliases potentially target the
2718  // aliasee. This check also ensures that it is safe to replace the section
2719  // and other attributes of the aliasee with those of the alias.
2720  if (hasMoreThanOneUseOtherThanLLVMUsed(*Target, U))
2721  return Ret;
2722 
2723  RenameTarget = true;
2724  return true;
2725 }
2726 
2727 static bool
2729  SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
2730  bool Changed = false;
2731  LLVMUsed Used(M);
2732 
2733  for (GlobalValue *GV : Used.used())
2734  Used.compilerUsedErase(GV);
2735 
2736  for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
2737  I != E;) {
2738  GlobalAlias *J = &*I++;
2739 
2740  // Aliases without names cannot be referenced outside this module.
2741  if (!J->hasName() && !J->isDeclaration() && !J->hasLocalLinkage())
2743 
2744  if (deleteIfDead(*J, NotDiscardableComdats)) {
2745  Changed = true;
2746  continue;
2747  }
2748 
2749  // If the alias can change at link time, nothing can be done - bail out.
2750  if (J->isInterposable())
2751  continue;
2752 
2753  Constant *Aliasee = J->getAliasee();
2755  // We can't trivially replace the alias with the aliasee if the aliasee is
2756  // non-trivial in some way.
2757  // TODO: Try to handle non-zero GEPs of local aliasees.
2758  if (!Target)
2759  continue;
2760  Target->removeDeadConstantUsers();
2761 
2762  // Make all users of the alias use the aliasee instead.
2763  bool RenameTarget;
2764  if (!hasUsesToReplace(*J, Used, RenameTarget))
2765  continue;
2766 
2768  ++NumAliasesResolved;
2769  Changed = true;
2770 
2771  if (RenameTarget) {
2772  // Give the aliasee the name, linkage and other attributes of the alias.
2773  Target->takeName(&*J);
2774  Target->setLinkage(J->getLinkage());
2775  Target->setDSOLocal(J->isDSOLocal());
2776  Target->setVisibility(J->getVisibility());
2777  Target->setDLLStorageClass(J->getDLLStorageClass());
2778 
2779  if (Used.usedErase(&*J))
2780  Used.usedInsert(Target);
2781 
2782  if (Used.compilerUsedErase(&*J))
2783  Used.compilerUsedInsert(Target);
2784  } else if (mayHaveOtherReferences(*J, Used))
2785  continue;
2786 
2787  // Delete the alias.
2788  M.getAliasList().erase(J);
2789  ++NumAliasesRemoved;
2790  Changed = true;
2791  }
2792 
2793  Used.syncVariablesAndSets();
2794 
2795  return Changed;
2796 }
2797 
2799  LibFunc F = LibFunc_cxa_atexit;
2800  if (!TLI->has(F))
2801  return nullptr;
2802 
2803  Function *Fn = M.getFunction(TLI->getName(F));
2804  if (!Fn)
2805  return nullptr;
2806 
2807  // Make sure that the function has the correct prototype.
2808  if (!TLI->getLibFunc(*Fn, F) || F != LibFunc_cxa_atexit)
2809  return nullptr;
2810 
2811  return Fn;
2812 }
2813 
2814 /// Returns whether the given function is an empty C++ destructor and can
2815 /// therefore be eliminated.
2816 /// Note that we assume that other optimization passes have already simplified
2817 /// the code so we simply check for 'ret'.
2818 static bool cxxDtorIsEmpty(const Function &Fn) {
2819  // FIXME: We could eliminate C++ destructors if they're readonly/readnone and
2820  // nounwind, but that doesn't seem worth doing.
2821  if (Fn.isDeclaration())
2822  return false;
2823 
2824  for (auto &I : Fn.getEntryBlock()) {
2825  if (isa<DbgInfoIntrinsic>(I))
2826  continue;
2827  if (isa<ReturnInst>(I))
2828  return true;
2829  break;
2830  }
2831  return false;
2832 }
2833 
2834 static bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
2835  /// Itanium C++ ABI p3.3.5:
2836  ///
2837  /// After constructing a global (or local static) object, that will require
2838  /// destruction on exit, a termination function is registered as follows:
2839  ///
2840  /// extern "C" int __cxa_atexit ( void (*f)(void *), void *p, void *d );
2841  ///
2842  /// This registration, e.g. __cxa_atexit(f,p,d), is intended to cause the
2843  /// call f(p) when DSO d is unloaded, before all such termination calls
2844  /// registered before this one. It returns zero if registration is
2845  /// successful, nonzero on failure.
2846 
2847  // This pass will look for calls to __cxa_atexit where the function is trivial
2848  // and remove them.
2849  bool Changed = false;
2850 
2851  for (auto I = CXAAtExitFn->user_begin(), E = CXAAtExitFn->user_end();
2852  I != E;) {
2853  // We're only interested in calls. Theoretically, we could handle invoke
2854  // instructions as well, but neither llvm-gcc nor clang generate invokes
2855  // to __cxa_atexit.
2856  CallInst *CI = dyn_cast<CallInst>(*I++);
2857  if (!CI)
2858  continue;
2859 
2860  Function *DtorFn =
2862  if (!DtorFn || !cxxDtorIsEmpty(*DtorFn))
2863  continue;
2864 
2865  // Just remove the call.
2867  CI->eraseFromParent();
2868 
2869  ++NumCXXDtorsRemoved;
2870 
2871  Changed |= true;
2872  }
2873 
2874  return Changed;
2875 }
2876 
2878  Module &M, const DataLayout &DL, TargetLibraryInfo *TLI,
2881  function_ref<DominatorTree &(Function &)> LookupDomTree) {
2882  SmallPtrSet<const Comdat *, 8> NotDiscardableComdats;
2883  bool Changed = false;
2884  bool LocalChange = true;
2885  while (LocalChange) {
2886  LocalChange = false;
2887 
2888  NotDiscardableComdats.clear();
2889  for (const GlobalVariable &GV : M.globals())
2890  if (const Comdat *C = GV.getComdat())
2891  if (!GV.isDiscardableIfUnused() || !GV.use_empty())
2892  NotDiscardableComdats.insert(C);
2893  for (Function &F : M)
2894  if (const Comdat *C = F.getComdat())
2895  if (!F.isDefTriviallyDead())
2896  NotDiscardableComdats.insert(C);
2897  for (GlobalAlias &GA : M.aliases())
2898  if (const Comdat *C = GA.getComdat())
2899  if (!GA.isDiscardableIfUnused() || !GA.use_empty())
2900  NotDiscardableComdats.insert(C);
2901 
2902  // Delete functions that are trivially dead, ccc -> fastcc
2903  LocalChange |= OptimizeFunctions(M, TLI, GetTTI, GetBFI, LookupDomTree,
2904  NotDiscardableComdats);
2905 
2906  // Optimize global_ctors list.
2907  LocalChange |= optimizeGlobalCtorsList(M, [&](Function *F) {
2908  return EvaluateStaticConstructor(F, DL, TLI);
2909  });
2910 
2911  // Optimize non-address-taken globals.
2912  LocalChange |= OptimizeGlobalVars(M, TLI, LookupDomTree,
2913  NotDiscardableComdats);
2914 
2915  // Resolve aliases, when possible.
2916  LocalChange |= OptimizeGlobalAliases(M, NotDiscardableComdats);
2917 
2918  // Try to remove trivial global destructors if they are not removed
2919  // already.
2920  Function *CXAAtExitFn = FindCXAAtExit(M, TLI);
2921  if (CXAAtExitFn)
2922  LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn);
2923 
2924  Changed |= LocalChange;
2925  }
2926 
2927  // TODO: Move all global ctors functions to the end of the module for code
2928  // layout.
2929 
2930  return Changed;
2931 }
2932 
2934  auto &DL = M.getDataLayout();
2935  auto &TLI = AM.getResult<TargetLibraryAnalysis>(M);
2936  auto &FAM =
2937  AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2938  auto LookupDomTree = [&FAM](Function &F) -> DominatorTree &{
2939  return FAM.getResult<DominatorTreeAnalysis>(F);
2940  };
2941  auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
2942  return FAM.getResult<TargetIRAnalysis>(F);
2943  };
2944 
2945  auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & {
2946  return FAM.getResult<BlockFrequencyAnalysis>(F);
2947  };
2948 
2949  if (!optimizeGlobalsInModule(M, DL, &TLI, GetTTI, GetBFI, LookupDomTree))
2950  return PreservedAnalyses::all();
2951  return PreservedAnalyses::none();
2952 }
2953 
2954 namespace {
2955 
2956 struct GlobalOptLegacyPass : public ModulePass {
2957  static char ID; // Pass identification, replacement for typeid
2958 
2959  GlobalOptLegacyPass() : ModulePass(ID) {
2961  }
2962 
2963  bool runOnModule(Module &M) override {
2964  if (skipModule(M))
2965  return false;
2966 
2967  auto &DL = M.getDataLayout();
2968  auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
2969  auto LookupDomTree = [this](Function &F) -> DominatorTree & {
2970  return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
2971  };
2972  auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
2973  return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
2974  };
2975 
2976  auto GetBFI = [this](Function &F) -> BlockFrequencyInfo & {
2977  return this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
2978  };
2979 
2980  return optimizeGlobalsInModule(M, DL, TLI, GetTTI, GetBFI, LookupDomTree);
2981  }
2982 
2983  void getAnalysisUsage(AnalysisUsage &AU) const override {
2988  }
2989 };
2990 
2991 } // end anonymous namespace
2992 
2993 char GlobalOptLegacyPass::ID = 0;
2994 
2995 INITIALIZE_PASS_BEGIN(GlobalOptLegacyPass, "globalopt",
2996  "Global Variable Optimizer", false, false)
3001 INITIALIZE_PASS_END(GlobalOptLegacyPass, "globalopt",
3002  "Global Variable Optimizer", false, false)
3003 
3005  return new GlobalOptLegacyPass();
3006 }
void setVisibility(VisibilityTypes V)
Definition: GlobalValue.h:238
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:176
uint64_t CallInst * C
Value * getValueOperand()
Definition: Instructions.h:409
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
unsigned getAlignment() const
Definition: GlobalObject.h:58
bool isAllocationFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast=false)
Tests if a value is a call or invoke to a library function that allocates or reallocates memory (eith...
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:594
ThreadLocalMode getThreadLocalMode() const
Definition: GlobalValue.h:254
static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, const DataLayout &DL, TargetLibraryInfo *TLI)
We just marked GV constant.
Definition: GlobalOpt.cpp:273
void initializeGlobalOptLegacyPassPass(PassRegistry &)
bool IsLoaded
True if the global is ever loaded.
Definition: GlobalStatus.h:35
static bool isLeakCheckerRoot(GlobalVariable *GV)
Is this global variable possibly used by a leak checker as a root? If so, we might not really want to...
Definition: GlobalOpt.cpp:110
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:172
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
bool hasLocalLinkage() const
Definition: GlobalValue.h:435
unsigned getOpcode() const
Return the opcode at the root of this constant expression.
Definition: Constants.h:1209
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Special purpose, only applies to global arrays.
Definition: GlobalValue.h:54
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Value * StoredOnceValue
If only one value (besides the initializer constant) is ever stored to this global, keep track of what value it is.
Definition: GlobalStatus.h:59
const Function * AccessingFunction
These start out null/false.
Definition: GlobalStatus.h:64
static Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
Definition: Constants.cpp:1611
DiagnosticInfoOptimizationBase::Argument NV
static BinaryOperator * CreateNot(Value *Op, const Twine &Name="", Instruction *InsertBefore=nullptr)
Atomic ordering constants.
bool hasPrivateLinkage() const
Definition: GlobalValue.h:434
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:769
iterator erase(iterator where)
Definition: ilist.h:265
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI)
Given a value that is stored to a global but never read, determine whether it&#39;s safe to remove the st...
Definition: GlobalOpt.cpp:158
Type * getElementType(unsigned N) const
Definition: DerivedTypes.h:345
Constant * ConstantFoldLoadThroughGEPConstantExpr(Constant *C, ConstantExpr *CE)
ConstantFoldLoadThroughGEPConstantExpr - Given a constant and a getelementptr constantexpr, return the constant value being addressed by the constant expression, or null if something is funny and we can&#39;t decide.
void setAttributes(AttributeList PAL)
Set the parameter attributes of the call.
Definition: CallSite.h:341
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant *> IdxList, bool InBounds=false, Optional< unsigned > InRangeIndex=None, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
Definition: Constants.h:1153
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:264
bool user_empty() const
Definition: Value.h:363
static Value * GetHeapSROAValue(Value *V, unsigned FieldNo, DenseMap< Value *, std::vector< Value *>> &InsertedScalarizedValues, std::vector< std::pair< PHINode *, unsigned >> &PHIsToRewrite)
Definition: GlobalOpt.cpp:1151
static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, GlobalVariable *GV)
The Alloc pointer is stored into GV somewhere.
Definition: GlobalOpt.cpp:1010
LLVM_NODISCARD bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:256
This class represents zero extension of integer types.
unsigned getNumElements() const
Random access to the elements.
Definition: DerivedTypes.h:344
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
Definition: DataLayout.cpp:607
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
bool useColdCCForColdCall(Function &F) const
Return true if the input function which is cold at all call sites, should use coldcc calling conventi...
LLVM_NODISCARD int compare(StringRef RHS) const
compare - Compare two strings; the result is -1, 0, or 1 if this string is lexicographically less tha...
Definition: StringRef.h:174
bool removeUnreachableBlocks(Function &F, LazyValueInfo *LVI=nullptr, DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Remove all blocks that can not be reached from the function&#39;s entry.
Definition: Local.cpp:2204
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value *> IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:899
static bool isSafeSROAElementUse(Value *V)
Return true if the specified instruction is a safe user of a derived expression from a global that we...
Definition: GlobalOpt.cpp:396
static bool cxxDtorIsEmpty(const Function &Fn)
Returns whether the given function is an empty C++ destructor and can therefore be eliminated...
Definition: GlobalOpt.cpp:2818
static bool deleteIfDead(GlobalValue &GV, SmallPtrSetImpl< const Comdat *> &NotDiscardableComdats)
Definition: GlobalOpt.cpp:1750
This class represents a function call, abstracting a target machine&#39;s calling convention.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:629
gep_type_iterator gep_type_end(const User *GEP)
unsigned less or equal
Definition: InstrTypes.h:672
unsigned less than
Definition: InstrTypes.h:671
static bool OptimizeGlobalVars(Module &M, TargetLibraryInfo *TLI, function_ref< DominatorTree &(Function &)> LookupDomTree, SmallPtrSetImpl< const Comdat *> &NotDiscardableComdats)
Definition: GlobalOpt.cpp:2331
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:116
static bool hasChangeableCC(Function *F)
Return true if this is a calling convention that we&#39;d like to change.
Definition: GlobalOpt.cpp:2115
static Constant * EvaluateStoreInto(Constant *Init, Constant *Val, ConstantExpr *Addr, unsigned OpNo)
Evaluate a piece of a constantexpr store into a global initializer.
Definition: GlobalOpt.cpp:2364
StringRef getName(LibFunc F) const
Analysis pass providing the TargetTransformInfo.
static bool hasOnlyColdCalls(Function &F, function_ref< BlockFrequencyInfo &(Function &)> GetBFI)
Definition: GlobalOpt.cpp:2203
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:247
static GlobalVariable * OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy, ConstantInt *NElements, const DataLayout &DL, TargetLibraryInfo *TLI)
This function takes the specified global variable, and transforms the program as if it always contain...
Definition: GlobalOpt.cpp:835
static bool isSafeSROAGEP(User *U)
Return true if the specified GEP is a safe user of a derived expression from a global that we want to...
Definition: GlobalOpt.cpp:363
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
void setCallingConv(CallingConv::ID CC)
Set the calling convention of the call.
Definition: CallSite.h:324
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
bool HasMultipleAccessingFunctions
Definition: GlobalStatus.h:65
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:709
bool isInterposable() const
Return true if this global&#39;s definition can be substituted with an arbitrary definition at link time...
Definition: GlobalValue.h:419
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:320
This class wraps the llvm.memset intrinsic.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1185
13: Structures
Definition: Type.h:72
STATISTIC(NumFunctions, "Total number of functions")
static cl::opt< bool > EnableColdCCStressTest("enable-coldcc-stress-test", cl::desc("Enable stress test of coldcc by adding " "calling conv to all internal functions."), cl::init(false), cl::Hidden)
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:230
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:534
An instruction for reading from memory.
Definition: Instructions.h:167
const GlobalListType & getGlobalList() const
Get the Module&#39;s list of global variables (constant).
Definition: Module.h:524
bool isOpaque() const
Return true if this is a type with an identity that has no body specified yet.
Definition: DerivedTypes.h:300
Hexagon Common GEP
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:137
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
static Instruction * CreateFree(Value *Source, Instruction *InsertBefore)
Generate the IR for a call to the builtin free function.
iv Induction Variable Users
Definition: IVUsers.cpp:51
static GlobalVariable * PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, Value *NElems, const DataLayout &DL, const TargetLibraryInfo *TLI)
CI is an allocation of an array of structures.
Definition: GlobalOpt.cpp:1274
#define op(i)
15: Pointers
Definition: Type.h:74
This defines the Use class.
void reserve(size_type N)
Definition: SmallVector.h:369
bool isMustTailCall() const
bool EvaluateFunction(Function *F, Constant *&RetVal, const SmallVectorImpl< Constant *> &ActualArgs)
Evaluate a call to function F, returning true if successful, false if we can&#39;t evaluate it...
Definition: Evaluator.cpp:645
void setAlignment(unsigned Align)
Definition: Globals.cpp:115
static bool processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS, TargetLibraryInfo *TLI, function_ref< DominatorTree &(Function &)> LookupDomTree)
Analyze the specified global variable and optimize it if possible.
Definition: GlobalOpt.cpp:1912
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:32
static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V, SmallPtrSetImpl< const PHINode *> &LoadUsingPHIs, SmallPtrSetImpl< const PHINode *> &LoadUsingPHIsPerLoad)
Verify that all uses of V (a load, or a phi of a load) are simple enough to perform heap SRA on...
Definition: GlobalOpt.cpp:1054
static Constant * get(ArrayType *T, ArrayRef< Constant *> V)
Definition: Constants.cpp:992
This global is stored to, but the only thing stored is the constant it was initialized with...
Definition: GlobalStatus.h:44
static int compareNames(Constant *const *A, Constant *const *B)
Definition: GlobalOpt.cpp:2569
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:274
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:268
const AliasListType & getAliasList() const
Get the Module&#39;s list of aliases (constant).
Definition: Module.h:541
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1155
static DIExpression * prependOpcodes(const DIExpression *Expr, SmallVectorImpl< uint64_t > &Ops, bool StackValue=false)
Prepend DIExpr with the given opcodes and optionally turn it into a stack value.
unsigned getAllocaAddrSpace() const
Definition: DataLayout.h:269
AnalysisUsage & addRequired()
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:554
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
static bool hasUsesToReplace(GlobalAlias &GA, const LLVMUsed &U, bool &RenameTarget)
Definition: GlobalOpt.cpp:2695
static void makeAllConstantUsesInstructions(Constant *C)
C may have non-instruction users, and allNonInstructionUsersCanBeMadeInstructions has returned true...
Definition: GlobalOpt.cpp:1880
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: GlobalOpt.cpp:2933
static bool isColdCallSite(CallSite CS, BlockFrequencyInfo &CallerBFI)
Return true if the block containing the call site has a BlockFrequency of less than ColdCCRelFreq% of...
Definition: GlobalOpt.cpp:2153
Type * getPointerElementType() const
Definition: Type.h:375
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV)
Return true if all uses of any loads from GV will trap if the loaded value is null.
Definition: GlobalOpt.cpp:661
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:353
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:137
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:161
void setInitializer(Constant *InitVal)
setInitializer - Sets the initializer for this global variable, removing any existing initializer if ...
Definition: Globals.cpp:362
bool isDSOLocal() const
Definition: GlobalValue.h:279
static void changeCallSitesToColdCC(Function *F)
Definition: GlobalOpt.cpp:2190
Class to represent struct types.
Definition: DerivedTypes.h:232
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
ValTy * getCalledValue() const
Return the pointer to function that is being called.
Definition: CallSite.h:104
uint64_t getElementOffsetInBits(unsigned Idx) const
Definition: DataLayout.h:581
bool isConstant() const
If the value is a global constant, its value is immutable throughout the runtime execution of the pro...
static Optional< unsigned > getOpcode(ArrayRef< VPValue *> Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:196
static bool optimizeGlobalsInModule(Module &M, const DataLayout &DL, TargetLibraryInfo *TLI, function_ref< TargetTransformInfo &(Function &)> GetTTI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI, function_ref< DominatorTree &(Function &)> LookupDomTree)
Definition: GlobalOpt.cpp:2877
UnnamedAddr getUnnamedAddr() const
Definition: GlobalValue.h:212
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1199
This file contains the simple types necessary to represent the attributes associated with functions a...
Legacy analysis pass which computes BlockFrequencyInfo.
StoredType
Keep track of what stores to the global look like.
Definition: GlobalStatus.h:38
static bool mayHaveOtherReferences(GlobalAlias &GA, const LLVMUsed &U)
Definition: GlobalOpt.cpp:2688
uint64_t getNumElements() const
Definition: DerivedTypes.h:390
void setDLLStorageClass(DLLStorageClassTypes C)
Definition: GlobalValue.h:267
static bool OptimizeFunctions(Module &M, TargetLibraryInfo *TLI, function_ref< TargetTransformInfo &(Function &)> GetTTI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI, function_ref< DominatorTree &(Function &)> LookupDomTree, SmallPtrSetImpl< const Comdat *> &NotDiscardableComdats)
Definition: GlobalOpt.cpp:2234
static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal)
At this point, we have learned that the only two values ever stored into GV are its initializer and O...
Definition: GlobalOpt.cpp:1603
static bool hasUseOtherThanLLVMUsed(GlobalAlias &GA, const LLVMUsed &U)
Definition: GlobalOpt.cpp:2661
AtomicOrdering
Atomic ordering for LLVM&#39;s memory model.
INITIALIZE_PASS_BEGIN(GlobalOptLegacyPass, "globalopt", "Global Variable Optimizer", false, false) INITIALIZE_PASS_END(GlobalOptLegacyPass
global_iterator global_begin()
Definition: Module.h:581
InstrTy * getInstruction() const
Definition: CallSite.h:96
Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Attempt to fold the constant using the specified DataLayout.
static cl::opt< int > ColdCCRelFreq("coldcc-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore, cl::desc("Maximum block frequency, expressed as a percentage of caller's " "entry frequency, for a call site to be considered cold for enabling" "coldcc"))
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:84
User * getUser() const LLVM_READONLY
Returns the User that contains this Use.
Definition: Use.cpp:40
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:888
void setArgument(unsigned ArgNo, Value *newVal)
Definition: CallSite.h:198
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
static bool optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, AtomicOrdering Ordering, const DataLayout &DL, TargetLibraryInfo *TLI)
Definition: GlobalOpt.cpp:1565
static void ChangeCalleesToFastCall(Function *F)
Walk all of the direct calls of the specified function, changing them to FastCC.
Definition: GlobalOpt.cpp:2084
static Function * FindCXAAtExit(Module &M, TargetLibraryInfo *TLI)
Definition: GlobalOpt.cpp:2798
static void setUsedInitializer(GlobalVariable &V, const SmallPtrSetImpl< GlobalValue *> &Init)
Definition: GlobalOpt.cpp:2575
Class to represent array types.
Definition: DerivedTypes.h:400
bool has(LibFunc F) const
Tests whether a library function is available.
static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V, const GlobalVariable *GV, SmallPtrSetImpl< const PHINode *> &PHIs)
Scan the use-list of V checking to make sure that there are no complex uses of V. ...
Definition: GlobalOpt.cpp:963
This class represents a no-op cast from one type to another.
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:223
An instruction for storing to memory.
Definition: Instructions.h:320
LinkageTypes getLinkage() const
Definition: GlobalValue.h:450
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:291
Type::subtype_iterator element_iterator
Definition: DerivedTypes.h:332
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
Value * getOperand(unsigned i) const
Definition: User.h:169
void removeDeadConstantUsers() const
If there are any dead constant users dangling off of this constant, remove them.
Definition: Constants.cpp:546
Class to represent pointers.
Definition: DerivedTypes.h:498
This global is stored to, but only its initializer and one other value is ever stored to it...
Definition: GlobalStatus.h:50
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:156
void replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:20
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:344
Value * getMallocArraySize(CallInst *CI, const DataLayout &DL, const TargetLibraryInfo *TLI, bool LookThroughSExt=false)
getMallocArraySize - Returns the array size of a malloc call.
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1782
const BasicBlock & getEntryBlock() const
Definition: Function.h:639
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:609
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:873
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:769
bool hasAttrSomewhere(Attribute::AttrKind Kind, unsigned *Index=nullptr) const
Return true if the specified attribute is set for at least one parameter or for the return value...
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1165
void setCallingConv(CallingConv::ID CC)
Definition: Function.h:216
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
bool hasNUsesOrMore(unsigned N) const
Return true if this value has N users or more.
Definition: Value.cpp:135
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition: STLExtras.h:1082
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:148
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
As we analyze each global, keep track of some information about it.
Definition: GlobalStatus.h:29
Wrapper pass for TargetTransformInfo.
void setCalledFunction(Value *V)
Set the callee to the specified value.
Definition: CallSite.h:130
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:321
DLLStorageClassTypes getDLLStorageClass() const
Definition: GlobalValue.h:258
VisibilityTypes getVisibility() const
Definition: GlobalValue.h:232
alias_iterator alias_end()
Definition: Module.h:622
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction...
Definition: Instruction.cpp:73
bool hasName() const
Definition: Value.h:250
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
globalopt
Definition: GlobalOpt.cpp:3001
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
const CallInst * extractMallocCall(const Value *I, const TargetLibraryInfo *TLI)
extractMallocCall - Returns the corresponding CallInst if the instruction is a malloc call...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:91
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:223
AttributeList getAttributes() const
Get the parameter attributes of the call.
Definition: CallSite.h:337
void setExternallyInitialized(bool Val)
ValTy * getArgument(unsigned ArgNo) const
Definition: CallSite.h:193
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Definition: Instruction.h:575
void getDebugInfo(SmallVectorImpl< DIGlobalVariableExpression *> &GVs) const
Fill the vector with all debug info attachements.
Definition: Metadata.cpp:1524
Constant * ConstantFoldInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstruction - Try to constant fold the specified instruction.
void eraseFromParent()
eraseFromParent - This method unlinks &#39;this&#39; from the containing module and deletes it...
Definition: Globals.cpp:358
element_iterator element_end() const
Definition: DerivedTypes.h:335
static void RemoveNestAttribute(Function *F)
Definition: GlobalOpt.cpp:2101
A pair of DIGlobalVariable and DIExpression.
bool isUnordered() const
Definition: Instructions.h:278
Represent the analysis usage information of a pass.
This instruction compares its operands according to the predicate given to the constructor.
static bool isDiscardableIfUnused(LinkageTypes Linkage)
Whether the definition of this global may be discarded if it is not used in its compilation unit...
Definition: GlobalValue.h:360
static Constant * get(StructType *T, ArrayRef< Constant *> V)
Definition: Constants.cpp:1053
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:526
Base class for variables.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
self_iterator getIterator()
Definition: ilist_node.h:81
ModulePass * createGlobalOptimizerPass()
createGlobalOptimizerPass - This function returns a new pass that optimizes non-address taken interna...
Definition: GlobalOpt.cpp:3004
static bool isValidCandidateForColdCC(Function &F, function_ref< BlockFrequencyInfo &(Function &)> GetBFI, const std::vector< Function *> &AllCallsCold)
Definition: GlobalOpt.cpp:2167
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
static bool analyzeGlobal(const Value *V, GlobalStatus &GS)
Look at all uses of the global and fill in the GlobalStatus structure.
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:59
void setConstant(bool Val)
static bool hasMoreThanOneUseOtherThanLLVMUsed(GlobalValue &V, const LLVMUsed &U)
Definition: GlobalOpt.cpp:2677
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:192
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1424
const Constant * stripPointerCasts() const
Definition: Constant.h:177
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs, and aliases.
Definition: Value.cpp:529
bool isSafeToDestroyConstant(const Constant *C)
It is safe to destroy a constant iff it is only used by constants itself.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
size_t size() const
Definition: SmallVector.h:52
static wasm::ValType getType(const TargetRegisterClass *RC)
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1206
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:219
static void ConstantPropUsersOf(Value *V, const DataLayout &DL, TargetLibraryInfo *TLI)
Walk the use list of V, constant folding all of the instructions that are foldable.
Definition: GlobalOpt.cpp:813
static void RewriteHeapSROALoadUser(Instruction *LoadUser, DenseMap< Value *, std::vector< Value *>> &InsertedScalarizedValues, std::vector< std::pair< PHINode *, unsigned >> &PHIsToRewrite)
Given a load instruction and a value derived from the load, rewrite the derived value to use the Heap...
Definition: GlobalOpt.cpp:1195
void removeFromParent()
removeFromParent - This method unlinks &#39;this&#39; from the containing module, but does not delete it...
Definition: Globals.cpp:354
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
signed greater than
Definition: InstrTypes.h:673
unsigned first
bool doesNotRecurse() const
Determine if the function is known not to recurse, directly or indirectly.
Definition: Function.h:555
This class evaluates LLVM IR, producing the Constant representing each SSA instruction.
Definition: Evaluator.h:38
global_iterator global_end()
Definition: Module.h:583
14: Arrays
Definition: Type.h:73
This is the superclass of the array and vector type classes.
Definition: DerivedTypes.h:374
static AttributeList StripNest(LLVMContext &C, AttributeList Attrs)
Definition: GlobalOpt.cpp:2093
GlobalVariable * collectUsedGlobalVariables(const Module &M, SmallPtrSetImpl< GlobalValue *> &Set, bool CompilerUsed)
Given "llvm.used" or "llvm.compiler.used" as a global name, collect the initializer elements of that ...
Definition: Module.cpp:598
Analysis pass which computes BlockFrequencyInfo.
This is the common base class for memset/memcpy/memmove.
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:191
Global Variable Optimizer
Definition: GlobalOpt.cpp:3001
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
static void CommitValueTo(Constant *Val, Constant *Addr)
We have decided that Addr (which satisfies the predicate isSimpleEnoughPointerToCommit) should get Va...
Definition: GlobalOpt.cpp:2407
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:377
16: SIMD &#39;packed&#39; format, or other vector type
Definition: Type.h:75
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
Module.h This file contains the declarations for the Module class.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:63
Provides information about what library functions are available for the current target.
unsigned arg_size() const
Definition: CallSite.h:226
X86_ThisCall - Similar to X86_StdCall.
Definition: CallingConv.h:110
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:749
signed less than
Definition: InstrTypes.h:675
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
uint64_t getSizeInBytes() const
Definition: DataLayout.h:562
alias_iterator alias_begin()
Definition: Module.h:620
void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Definition: InstrTypes.h:1751
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:631
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
bool hasGlobalUnnamedAddr() const
Definition: GlobalValue.h:199
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:587
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:1440
void setAttributes(AttributeList Attrs)
Set the attribute list for this Function.
Definition: Function.h:226
DWARF expression.
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:193
void setLinkage(LinkageTypes LT)
Definition: GlobalValue.h:444
BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
Definition: Module.cpp:174
static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL, TargetLibraryInfo *TLI)
Evaluate static constructors in the function, if we can.
Definition: GlobalOpt.cpp:2546
signed less or equal
Definition: InstrTypes.h:676
A range adaptor for a pair of iterators.
This file contains constants used for implementing Dwarf debug support.
const Comdat * getComdat() const
Definition: Globals.cpp:170
Target - Wrapper for Target specific information.
void push_back(pointer val)
Definition: ilist.h:311
Type * getMallocAllocatedType(const CallInst *CI, const TargetLibraryInfo *TLI)
getMallocAllocatedType - Returns the Type allocated by malloc call.
iterator_range< user_iterator > users()
Definition: Value.h:399
static bool allNonInstructionUsersCanBeMadeInstructions(Constant *C)
C may have non-instruction users.
Definition: GlobalOpt.cpp:1854
static cl::opt< unsigned > Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden)
static Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
Definition: Constants.cpp:1539
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
element_iterator element_begin() const
Definition: DerivedTypes.h:334
static Instruction * CreateMalloc(Instruction *InsertBefore, Type *IntPtrTy, Type *AllocTy, Value *AllocSize, Value *ArraySize=nullptr, Function *MallocF=nullptr, const Twine &Name="")
Generate the IR for a call to malloc:
const SmallPtrSetImpl< GlobalVariable * > & getInvariants() const
Definition: Evaluator.h:92
static void BatchCommitValueTo(const DenseMap< Constant *, Constant *> &Mem)
Given a map of address -> value, where addresses are expected to be some form of either a global or a...
Definition: GlobalOpt.cpp:2460
const Comdat * getComdat() const
Definition: GlobalObject.h:100
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:461
use_iterator use_begin()
Definition: Value.h:338
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:721
static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV, Instruction *StoredVal)
If all users of values loaded from GV are simple enough to perform HeapSRA, return true...
Definition: GlobalOpt.cpp:1105
This class wraps the llvm.memcpy/memmove intrinsics.
void eraseFromParent()
This method unlinks &#39;this&#39; from the containing module and deletes it.
Definition: Globals.cpp:84
static bool GlobalUsersSafeToSRA(GlobalValue *GV)
Look at all uses of the global and decide whether it is safe for us to perform this transformation...
Definition: GlobalOpt.cpp:417
iterator insert(iterator where, pointer New)
Definition: ilist.h:226
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:324
iterator end()
Definition: Module.h:600
void setUnnamedAddr(UnnamedAddr Val)
Definition: GlobalValue.h:215
iterator begin() const
Definition: SmallPtrSet.h:396
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:576
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:175
bool IsCompared
True if the global&#39;s address is used in a comparison.
Definition: GlobalStatus.h:31
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
AtomicOrdering Ordering
Set to the strongest atomic ordering requirement.
Definition: GlobalStatus.h:72
unsigned greater or equal
Definition: InstrTypes.h:670
static bool OptimizeGlobalAliases(Module &M, SmallPtrSetImpl< const Comdat *> &NotDiscardableComdats)
Definition: GlobalOpt.cpp:2728
const Value * stripPointerCastsNoFollowAliases() const
Strip off pointer casts and all-zero GEPs.
Definition: Value.cpp:533
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
void copyAttributesFrom(const GlobalVariable *Src)
copyAttributesFrom - copy all additional attributes (those not needed to create a GlobalVariable) fro...
Definition: Globals.cpp:385
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
LLVM_NODISCARD AttributeList removeAttribute(LLVMContext &C, unsigned Index, Attribute::AttrKind Kind) const
Remove the specified attribute at the specified index from this attribute list.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:259
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
user_iterator_impl< User > user_iterator
Definition: Value.h:368
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:224
iterator begin()
Definition: Module.h:598
static bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn)
Definition: GlobalOpt.cpp:2834
static GlobalVariable * SRAGlobal(GlobalVariable *GV, const DataLayout &DL)
Perform scalar replacement of aggregates on the specified global variable.
Definition: GlobalOpt.cpp:460
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:580
const DenseMap< Constant *, Constant * > & getMutatedMemory() const
Definition: Evaluator.h:88
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:322
Type * getValueType() const
Definition: GlobalValue.h:275
uint32_t Size
Definition: Profile.cpp:46
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:407
static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI, Type *AllocTy, AtomicOrdering Ordering, const DataLayout &DL, TargetLibraryInfo *TLI)
This function is called when we see a pointer global variable with a single value stored it that is a...
Definition: GlobalOpt.cpp:1467
iterator end() const
Definition: SmallPtrSet.h:401
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:205
Analysis pass providing the TargetLibraryInfo.
static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, const DataLayout &DL, TargetLibraryInfo *TLI)
The specified global has only one non-null value stored into it.
Definition: GlobalOpt.cpp:748
bool hasAddressTaken(const User **=nullptr) const
hasAddressTaken - returns true if there are any uses of this function other than direct calls or invo...
Definition: Function.cpp:1257
static void transferSRADebugInfo(GlobalVariable *GV, GlobalVariable *NGV, uint64_t FragmentOffsetInBits, uint64_t FragmentSizeInBits, unsigned NumElements)
Copy over the debug info for a variable to its SRA replacements.
Definition: GlobalOpt.cpp:434
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:375
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition: Type.h:249
static bool processGlobal(GlobalValue &GV, TargetLibraryInfo *TLI, function_ref< DominatorTree &(Function &)> LookupDomTree)
Analyze the specified global variable and optimize it if possible.
Definition: GlobalOpt.cpp:2047
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:565
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
Definition: Local.cpp:349
LLVM Value Representation.
Definition: Value.h:72
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:444
static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV)
Definition: GlobalOpt.cpp:677
bool hasInitializer() const
Definitions have initializers, declarations don&#39;t.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition: Operator.h:40
Invoke instruction.
void addDebugInfo(DIGlobalVariableExpression *GV)
Attach a DIGlobalVariableExpression.
Definition: Metadata.cpp:1520
uint64_t getTypeAllocSizeInBits(Type *Ty) const
Returns the offset in bits between successive objects of the specified type, including alignment padd...
Definition: DataLayout.h:471
Type * getElementType() const
Definition: DerivedTypes.h:391
iterator_range< global_iterator > globals()
Definition: Module.h:587
IRTranslator LLVM IR MI
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:412
unsigned greater than
Definition: InstrTypes.h:669
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:259
This pass exposes codegen information to IR-level passes.
static bool CleanupPointerRootUsers(GlobalVariable *GV, const TargetLibraryInfo *TLI)
This GV is a pointer root.
Definition: GlobalOpt.cpp:187
static bool isPointerValueDeadOnEntryToFunction(const Function *F, GlobalValue *GV, function_ref< DominatorTree &(Function &)> LookupDomTree)
Definition: GlobalOpt.cpp:1775
bool optimizeGlobalCtorsList(Module &M, function_ref< bool(Function *)> ShouldRemove)
Call "ShouldRemove" for every entry in M&#39;s global_ctor list and remove the entries for which it retur...
Definition: CtorUtils.cpp:116
#define LLVM_DEBUG(X)
Definition: Debug.h:122
static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, DenseMap< Value *, std::vector< Value *>> &InsertedScalarizedValues, std::vector< std::pair< PHINode *, unsigned > > &PHIsToRewrite)
We are performing Heap SRoA on a global.
Definition: GlobalOpt.cpp:1258
bool isExternallyInitialized() const
void setSection(StringRef S)
Change the section for this global.
Definition: Globals.cpp:188
bool use_empty() const
Definition: Value.h:322
bool isDefTriviallyDead() const
isDefTriviallyDead - Return true if it is trivially safe to remove this function definition from the ...
Definition: Function.cpp:1277
static Constant * get(ArrayRef< Constant *> V)
Definition: Constants.cpp:1088
static bool AllUsesOfValueWillTrapIfNull(const Value *V, SmallPtrSetImpl< const PHINode *> &PHIs)
Return true if all users of the specified value will trap if the value is dynamically null...
Definition: GlobalOpt.cpp:612
void setDSOLocal(bool Local)
Definition: GlobalValue.h:277
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:273
signed greater or equal
Definition: InstrTypes.h:674
User * user_back()
Definition: Value.h:385
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:220
static Optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
const BasicBlock * getParent() const
Definition: Instruction.h:66
an instruction to allocate memory on the stack
Definition: Instructions.h:59
gep_type_iterator gep_type_begin(const User *GEP)
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:1037
user_iterator user_end()
Definition: Value.h:383
const Constant * getAliasee() const
Definition: GlobalAlias.h:77
FunTy * getCaller() const
Return the caller function for this call site.
Definition: CallSite.h:275