LLVM 23.0.0git
GVN.cpp
Go to the documentation of this file.
1//===- GVN.cpp - Eliminate redundant values and loads ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs global value numbering to eliminate fully redundant
10// instructions. It also performs simple dead load elimination.
11//
12// Note that this pass does the value numbering itself; it does not use the
13// ValueNumbering analysis passes.
14//
15//===----------------------------------------------------------------------===//
16
18#include "llvm/ADT/DenseMap.h"
20#include "llvm/ADT/Hashing.h"
21#include "llvm/ADT/MapVector.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/SetVector.h"
27#include "llvm/ADT/Statistic.h"
31#include "llvm/Analysis/CFG.h"
36#include "llvm/Analysis/Loads.h"
46#include "llvm/IR/Attributes.h"
47#include "llvm/IR/BasicBlock.h"
48#include "llvm/IR/Constant.h"
49#include "llvm/IR/Constants.h"
50#include "llvm/IR/DebugLoc.h"
51#include "llvm/IR/Dominators.h"
52#include "llvm/IR/Function.h"
53#include "llvm/IR/InstrTypes.h"
54#include "llvm/IR/Instruction.h"
57#include "llvm/IR/LLVMContext.h"
58#include "llvm/IR/Metadata.h"
59#include "llvm/IR/Module.h"
60#include "llvm/IR/PassManager.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/Value.h"
66#include "llvm/Pass.h"
70#include "llvm/Support/Debug.h"
77#include <algorithm>
78#include <cassert>
79#include <cstdint>
80#include <optional>
81#include <utility>
82
83using namespace llvm;
84using namespace llvm::gvn;
85using namespace llvm::VNCoercion;
86using namespace PatternMatch;
87
88#define DEBUG_TYPE "gvn"
89
90STATISTIC(NumGVNInstr, "Number of instructions deleted");
91STATISTIC(NumGVNLoad, "Number of loads deleted");
92STATISTIC(NumGVNPRE, "Number of instructions PRE'd");
93STATISTIC(NumGVNBlocks, "Number of blocks merged");
94STATISTIC(NumGVNSimpl, "Number of instructions simplified");
95STATISTIC(NumGVNEqProp, "Number of equalities propagated");
96STATISTIC(NumPRELoad, "Number of loads PRE'd");
97STATISTIC(NumPRELoopLoad, "Number of loop loads PRE'd");
98STATISTIC(NumPRELoadMoved2CEPred,
99 "Number of loads moved to predecessor of a critical edge in PRE");
100
101STATISTIC(IsValueFullyAvailableInBlockNumSpeculationsMax,
102 "Number of blocks speculated as available in "
103 "IsValueFullyAvailableInBlock(), max");
104STATISTIC(MaxBBSpeculationCutoffReachedTimes,
105 "Number of times we we reached gvn-max-block-speculations cut-off "
106 "preventing further exploration");
107
108static cl::opt<bool> GVNEnableScalarPRE("enable-scalar-pre", cl::init(true),
109 cl::Hidden);
110static cl::opt<bool> GVNEnableLoadPRE("enable-load-pre", cl::init(true));
111static cl::opt<bool> GVNEnableLoadInLoopPRE("enable-load-in-loop-pre",
112 cl::init(true));
113static cl::opt<bool>
114GVNEnableSplitBackedgeInLoadPRE("enable-split-backedge-in-load-pre",
115 cl::init(false));
116static cl::opt<bool> GVNEnableMemDep("enable-gvn-memdep", cl::init(true));
117static cl::opt<bool> GVNEnableMemorySSA("enable-gvn-memoryssa",
118 cl::init(false));
119
121 "gvn-scan-users-limit", cl::Hidden, cl::init(100),
122 cl::desc("The number of memory accesses to scan in a block in reaching "
123 "memory values analysis (default = 100)"));
124
126 "gvn-max-num-deps", cl::Hidden, cl::init(100),
127 cl::desc("Max number of dependences to attempt Load PRE (default = 100)"));
128
129// This is based on IsValueFullyAvailableInBlockNumSpeculationsMax stat.
131 "gvn-max-block-speculations", cl::Hidden, cl::init(600),
132 cl::desc("Max number of blocks we're willing to speculate on (and recurse "
133 "into) when deducing if a value is fully available or not in GVN "
134 "(default = 600)"));
135
137 "gvn-max-num-visited-insts", cl::Hidden, cl::init(100),
138 cl::desc("Max number of visited instructions when trying to find "
139 "dominating value of select dependency (default = 100)"));
140
142 "gvn-max-num-insns", cl::Hidden, cl::init(100),
143 cl::desc("Max number of instructions to scan in each basic block in GVN "
144 "(default = 100)"));
145
148 bool Commutative = false;
149 // The type is not necessarily the result type of the expression, it may be
150 // any additional type needed to disambiguate the expression.
151 Type *Ty = nullptr;
153
154 AttributeList Attrs;
155
157
158 bool operator==(const Expression &Other) const {
159 if (Opcode != Other.Opcode)
160 return false;
161 if (Opcode == ~0U || Opcode == ~1U)
162 return true;
163 if (Ty != Other.Ty)
164 return false;
165 if (VarArgs != Other.VarArgs)
166 return false;
167 if ((!Attrs.isEmpty() || !Other.Attrs.isEmpty()) &&
168 !Attrs.intersectWith(Ty->getContext(), Other.Attrs).has_value())
169 return false;
170 return true;
171 }
172
174 return hash_combine(Value.Opcode, Value.Ty,
175 hash_combine_range(Value.VarArgs));
176 }
177};
178
180 static unsigned getHashValue(const GVNPass::Expression &E) {
181 using llvm::hash_value;
182
183 return static_cast<unsigned>(hash_value(E));
184 }
185
186 static bool isEqual(const GVNPass::Expression &LHS,
187 const GVNPass::Expression &RHS) {
188 return LHS == RHS;
189 }
190};
191
192/// Represents a particular available value that we know how to materialize.
193/// Materialization of an AvailableValue never fails. An AvailableValue is
194/// implicitly associated with a rematerialization point which is the
195/// location of the instruction from which it was formed.
197 enum class ValType {
198 SimpleVal, // A simple offsetted value that is accessed.
199 LoadVal, // A value produced by a load.
200 MemIntrin, // A memory intrinsic which is loaded from.
201 UndefVal, // A UndefValue representing a value from dead block (which
202 // is not yet physically removed from the CFG).
203 SelectVal, // A pointer select which is loaded from and for which the load
204 // can be replace by a value select.
205 };
206
207 /// Val - The value that is live out of the block.
209 /// Kind of the live-out value.
211
212 /// Offset - The byte offset in Val that is interesting for the load query.
213 unsigned Offset = 0;
214 /// V1, V2 - The dominating non-clobbered values of SelectVal.
215 Value *V1 = nullptr, *V2 = nullptr;
216
217 static AvailableValue get(Value *V, unsigned Offset = 0) {
218 AvailableValue Res;
219 Res.Val = V;
221 Res.Offset = Offset;
222 return Res;
223 }
224
225 static AvailableValue getMI(MemIntrinsic *MI, unsigned Offset = 0) {
226 AvailableValue Res;
227 Res.Val = MI;
229 Res.Offset = Offset;
230 return Res;
231 }
232
233 static AvailableValue getLoad(LoadInst *Load, unsigned Offset = 0) {
234 AvailableValue Res;
235 Res.Val = Load;
237 Res.Offset = Offset;
238 return Res;
239 }
240
242 AvailableValue Res;
243 Res.Val = nullptr;
245 Res.Offset = 0;
246 return Res;
247 }
248
250 AvailableValue Res;
251 Res.Val = Sel;
253 Res.Offset = 0;
254 Res.V1 = V1;
255 Res.V2 = V2;
256 return Res;
257 }
258
259 bool isSimpleValue() const { return Kind == ValType::SimpleVal; }
260 bool isCoercedLoadValue() const { return Kind == ValType::LoadVal; }
261 bool isMemIntrinValue() const { return Kind == ValType::MemIntrin; }
262 bool isUndefValue() const { return Kind == ValType::UndefVal; }
263 bool isSelectValue() const { return Kind == ValType::SelectVal; }
264
266 assert(isSimpleValue() && "Wrong accessor");
267 return Val;
268 }
269
271 assert(isCoercedLoadValue() && "Wrong accessor");
272 return cast<LoadInst>(Val);
273 }
274
276 assert(isMemIntrinValue() && "Wrong accessor");
277 return cast<MemIntrinsic>(Val);
278 }
279
281 assert(isSelectValue() && "Wrong accessor");
282 return cast<SelectInst>(Val);
283 }
284
285 /// Emit code at the specified insertion point to adjust the value defined
286 /// here to the specified type. This handles various coercion cases.
287 Value *MaterializeAdjustedValue(LoadInst *Load, Instruction *InsertPt) const;
288};
289
290/// Represents an AvailableValue which can be rematerialized at the end of
291/// the associated BasicBlock.
293 /// BB - The basic block in question.
294 BasicBlock *BB = nullptr;
295
296 /// AV - The actual available value.
298
301 Res.BB = BB;
302 Res.AV = std::move(AV);
303 return Res;
304 }
305
307 unsigned Offset = 0) {
308 return get(BB, AvailableValue::get(V, Offset));
309 }
310
314
316 Value *V1, Value *V2) {
317 return get(BB, AvailableValue::getSelect(Sel, V1, V2));
318 }
319
320 /// Emit code at the end of this block to adjust the value defined here to
321 /// the specified type. This handles various coercion cases.
323 return AV.MaterializeAdjustedValue(Load, BB->getTerminator());
324 }
325};
326
327//===----------------------------------------------------------------------===//
328// ValueTable Internal Functions
329//===----------------------------------------------------------------------===//
330
331GVNPass::Expression GVNPass::ValueTable::createExpr(Instruction *I) {
332 Expression E;
333 E.Ty = I->getType();
334 E.Opcode = I->getOpcode();
335 if (const GCRelocateInst *GCR = dyn_cast<GCRelocateInst>(I)) {
336 // gc.relocate is 'special' call: its second and third operands are
337 // not real values, but indices into statepoint's argument list.
338 // Use the refered to values for purposes of identity.
339 E.VarArgs.push_back(lookupOrAdd(GCR->getOperand(0)));
340 E.VarArgs.push_back(lookupOrAdd(GCR->getBasePtr()));
341 E.VarArgs.push_back(lookupOrAdd(GCR->getDerivedPtr()));
342 } else {
343 for (Use &Op : I->operands())
344 E.VarArgs.push_back(lookupOrAdd(Op));
345 }
346 if (I->isCommutative()) {
347 // Ensure that commutative instructions that only differ by a permutation
348 // of their operands get the same value number by sorting the operand value
349 // numbers. Since commutative operands are the 1st two operands it is more
350 // efficient to sort by hand rather than using, say, std::sort.
351 assert(I->getNumOperands() >= 2 && "Unsupported commutative instruction!");
352 if (E.VarArgs[0] > E.VarArgs[1])
353 std::swap(E.VarArgs[0], E.VarArgs[1]);
354 E.Commutative = true;
355 }
356
357 if (auto *C = dyn_cast<CmpInst>(I)) {
358 // Sort the operand value numbers so x<y and y>x get the same value number.
359 CmpInst::Predicate Predicate = C->getPredicate();
360 if (E.VarArgs[0] > E.VarArgs[1]) {
361 std::swap(E.VarArgs[0], E.VarArgs[1]);
363 }
364 E.Opcode = (C->getOpcode() << 8) | Predicate;
365 E.Commutative = true;
366 } else if (auto *IVI = dyn_cast<InsertValueInst>(I)) {
367 E.VarArgs.append(IVI->idx_begin(), IVI->idx_end());
368 } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
369 ArrayRef<int> ShuffleMask = SVI->getShuffleMask();
370 E.VarArgs.append(ShuffleMask.begin(), ShuffleMask.end());
371 } else if (auto *CB = dyn_cast<CallBase>(I)) {
372 E.Attrs = CB->getAttributes();
373 }
374
375 return E;
376}
377
378GVNPass::Expression GVNPass::ValueTable::createCmpExpr(
379 unsigned Opcode, CmpInst::Predicate Predicate, Value *LHS, Value *RHS) {
380 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
381 "Not a comparison!");
384 E.VarArgs.push_back(lookupOrAdd(LHS));
385 E.VarArgs.push_back(lookupOrAdd(RHS));
386
387 // Sort the operand value numbers so x<y and y>x get the same value number.
388 if (E.VarArgs[0] > E.VarArgs[1]) {
389 std::swap(E.VarArgs[0], E.VarArgs[1]);
391 }
392 E.Opcode = (Opcode << 8) | Predicate;
393 E.Commutative = true;
394 return E;
395}
396
397GVNPass::Expression
398GVNPass::ValueTable::createExtractvalueExpr(ExtractValueInst *EI) {
399 assert(EI && "Not an ExtractValueInst?");
401 E.Ty = EI->getType();
402 E.Opcode = 0;
403
404 WithOverflowInst *WO = dyn_cast<WithOverflowInst>(EI->getAggregateOperand());
405 if (WO != nullptr && EI->getNumIndices() == 1 && *EI->idx_begin() == 0) {
406 // EI is an extract from one of our with.overflow intrinsics. Synthesize
407 // a semantically equivalent expression instead of an extract value
408 // expression.
409 E.Opcode = WO->getBinaryOp();
410 E.VarArgs.push_back(lookupOrAdd(WO->getLHS()));
411 E.VarArgs.push_back(lookupOrAdd(WO->getRHS()));
412 return E;
413 }
414
415 // Not a recognised intrinsic. Fall back to producing an extract value
416 // expression.
417 E.Opcode = EI->getOpcode();
418 for (Use &Op : EI->operands())
419 E.VarArgs.push_back(lookupOrAdd(Op));
420
421 append_range(E.VarArgs, EI->indices());
422
423 return E;
424}
425
426GVNPass::Expression GVNPass::ValueTable::createGEPExpr(GetElementPtrInst *GEP) {
428 Type *PtrTy = GEP->getType()->getScalarType();
429 const DataLayout &DL = GEP->getDataLayout();
430 unsigned BitWidth = DL.getIndexTypeSizeInBits(PtrTy);
431 SmallMapVector<Value *, APInt, 4> VariableOffsets;
432 APInt ConstantOffset(BitWidth, 0);
433 if (GEP->collectOffset(DL, BitWidth, VariableOffsets, ConstantOffset)) {
434 // Convert into offset representation, to recognize equivalent address
435 // calculations that use different type encoding.
436 LLVMContext &Context = GEP->getContext();
437 E.Opcode = GEP->getOpcode();
438 E.Ty = nullptr;
439 E.VarArgs.push_back(lookupOrAdd(GEP->getPointerOperand()));
440 for (const auto &[V, Scale] : VariableOffsets) {
441 E.VarArgs.push_back(lookupOrAdd(V));
442 E.VarArgs.push_back(lookupOrAdd(ConstantInt::get(Context, Scale)));
443 }
444 if (!ConstantOffset.isZero())
445 E.VarArgs.push_back(
446 lookupOrAdd(ConstantInt::get(Context, ConstantOffset)));
447 } else {
448 // If converting to offset representation fails (for scalable vectors),
449 // fall back to type-based implementation.
450 E.Opcode = GEP->getOpcode();
451 E.Ty = GEP->getSourceElementType();
452 for (Use &Op : GEP->operands())
453 E.VarArgs.push_back(lookupOrAdd(Op));
454 }
455 return E;
456}
457
458//===----------------------------------------------------------------------===//
459// ValueTable External Functions
460//===----------------------------------------------------------------------===//
461
462GVNPass::ValueTable::ValueTable() = default;
463GVNPass::ValueTable::ValueTable(const ValueTable &) = default;
464GVNPass::ValueTable::ValueTable(ValueTable &&) = default;
465GVNPass::ValueTable::~ValueTable() = default;
466GVNPass::ValueTable &
467GVNPass::ValueTable::operator=(const GVNPass::ValueTable &Arg) = default;
468
469/// add - Insert a value into the table with a specified value number.
470void GVNPass::ValueTable::add(Value *V, uint32_t Num) {
471 ValueNumbering.insert(std::make_pair(V, Num));
472 if (PHINode *PN = dyn_cast<PHINode>(V))
473 NumberingPhi[Num] = PN;
474}
475
476/// Include the incoming memory state into the hash of the expression for the
477/// given instruction. If the incoming memory state is:
478/// * LiveOnEntry, add the value number of the entry block,
479/// * a MemoryPhi, add the value number of the basic block corresponding to that
480/// MemoryPhi,
481/// * a MemoryDef, add the value number of the memory setting instruction.
482void GVNPass::ValueTable::addMemoryStateToExp(Instruction *I, Expression &Exp) {
483 assert(MSSA && "addMemoryStateToExp should not be called without MemorySSA");
484 assert(MSSA->getMemoryAccess(I) && "Instruction does not access memory");
485 MemoryAccess *MA = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(I);
486 Exp.VarArgs.push_back(lookupOrAdd(MA));
487}
488
489uint32_t GVNPass::ValueTable::lookupOrAddCall(CallInst *C) {
490 // FIXME: Currently the calls which may access the thread id may
491 // be considered as not accessing the memory. But this is
492 // problematic for coroutines, since coroutines may resume in a
493 // different thread. So we disable the optimization here for the
494 // correctness. However, it may block many other correct
495 // optimizations. Revert this one when we detect the memory
496 // accessing kind more precisely.
497 if (C->getFunction()->isPresplitCoroutine()) {
498 ValueNumbering[C] = NextValueNumber;
499 return NextValueNumber++;
500 }
501
502 // Do not combine convergent calls since they implicitly depend on the set of
503 // threads that is currently executing, and they might be in different basic
504 // blocks.
505 if (C->isConvergent()) {
506 ValueNumbering[C] = NextValueNumber;
507 return NextValueNumber++;
508 }
509
510 if (AA->doesNotAccessMemory(C)) {
511 Expression Exp = createExpr(C);
512 uint32_t E = assignExpNewValueNum(Exp).first;
513 ValueNumbering[C] = E;
514 return E;
515 }
516
517 if (MD && AA->onlyReadsMemory(C)) {
518 Expression Exp = createExpr(C);
519 auto [E, IsValNumNew] = assignExpNewValueNum(Exp);
520 if (IsValNumNew) {
521 ValueNumbering[C] = E;
522 return E;
523 }
524
525 MemDepResult LocalDep = MD->getDependency(C);
526
527 if (!LocalDep.isDef() && !LocalDep.isNonLocal()) {
528 ValueNumbering[C] = NextValueNumber;
529 return NextValueNumber++;
530 }
531
532 if (LocalDep.isDef()) {
533 // For masked load/store intrinsics, the local_dep may actually be
534 // a normal load or store instruction.
535 CallInst *LocalDepCall = dyn_cast<CallInst>(LocalDep.getInst());
536
537 if (!LocalDepCall || LocalDepCall->arg_size() != C->arg_size()) {
538 ValueNumbering[C] = NextValueNumber;
539 return NextValueNumber++;
540 }
541
542 for (unsigned I = 0, E = C->arg_size(); I < E; ++I) {
543 uint32_t CVN = lookupOrAdd(C->getArgOperand(I));
544 uint32_t LocalDepCallVN = lookupOrAdd(LocalDepCall->getArgOperand(I));
545 if (CVN != LocalDepCallVN) {
546 ValueNumbering[C] = NextValueNumber;
547 return NextValueNumber++;
548 }
549 }
550
551 uint32_t V = lookupOrAdd(LocalDepCall);
552 ValueNumbering[C] = V;
553 return V;
554 }
555
556 // Non-local case.
558 MD->getNonLocalCallDependency(C);
559 // FIXME: Move the checking logic to MemDep!
560 CallInst *CDep = nullptr;
561
562 // Check to see if we have a single dominating call instruction that is
563 // identical to C.
564 for (const NonLocalDepEntry &I : Deps) {
565 if (I.getResult().isNonLocal())
566 continue;
567
568 // We don't handle non-definitions. If we already have a call, reject
569 // instruction dependencies.
570 if (!I.getResult().isDef() || CDep != nullptr) {
571 CDep = nullptr;
572 break;
573 }
574
575 CallInst *NonLocalDepCall = dyn_cast<CallInst>(I.getResult().getInst());
576 // FIXME: All duplicated with non-local case.
577 if (NonLocalDepCall && DT->properlyDominates(I.getBB(), C->getParent())) {
578 CDep = NonLocalDepCall;
579 continue;
580 }
581
582 CDep = nullptr;
583 break;
584 }
585
586 if (!CDep) {
587 ValueNumbering[C] = NextValueNumber;
588 return NextValueNumber++;
589 }
590
591 if (CDep->arg_size() != C->arg_size()) {
592 ValueNumbering[C] = NextValueNumber;
593 return NextValueNumber++;
594 }
595 for (unsigned I = 0, E = C->arg_size(); I < E; ++I) {
596 uint32_t CVN = lookupOrAdd(C->getArgOperand(I));
597 uint32_t CDepVN = lookupOrAdd(CDep->getArgOperand(I));
598 if (CVN != CDepVN) {
599 ValueNumbering[C] = NextValueNumber;
600 return NextValueNumber++;
601 }
602 }
603
604 uint32_t V = lookupOrAdd(CDep);
605 ValueNumbering[C] = V;
606 return V;
607 }
608
609 if (MSSA && IsMSSAEnabled && AA->onlyReadsMemory(C)) {
610 Expression Exp = createExpr(C);
611 addMemoryStateToExp(C, Exp);
612 auto [V, _] = assignExpNewValueNum(Exp);
613 ValueNumbering[C] = V;
614 return V;
615 }
616
617 ValueNumbering[C] = NextValueNumber;
618 return NextValueNumber++;
619}
620
621/// Returns the value number for the specified load or store instruction.
622uint32_t GVNPass::ValueTable::computeLoadStoreVN(Instruction *I) {
623 if (!MSSA || !IsMSSAEnabled) {
624 ValueNumbering[I] = NextValueNumber;
625 return NextValueNumber++;
626 }
627
629 Exp.Ty = I->getType();
630 Exp.Opcode = I->getOpcode();
631 for (Use &Op : I->operands())
632 Exp.VarArgs.push_back(lookupOrAdd(Op));
633 addMemoryStateToExp(I, Exp);
634
635 auto [V, _] = assignExpNewValueNum(Exp);
636 ValueNumbering[I] = V;
637 return V;
638}
639
640/// Returns true if a value number exists for the specified value.
641bool GVNPass::ValueTable::exists(Value *V) const {
642 return ValueNumbering.contains(V);
643}
644
645uint32_t GVNPass::ValueTable::lookupOrAdd(MemoryAccess *MA) {
646 return MSSA->isLiveOnEntryDef(MA) || isa<MemoryPhi>(MA)
647 ? lookupOrAdd(MA->getBlock())
648 : lookupOrAdd(cast<MemoryUseOrDef>(MA)->getMemoryInst());
649}
650
651/// lookupOrAdd - Returns the value number for the specified value, assigning
652/// it a new number if it did not have one before.
653uint32_t GVNPass::ValueTable::lookupOrAdd(Value *V) {
654 auto VI = ValueNumbering.find(V);
655 if (VI != ValueNumbering.end())
656 return VI->second;
657
658 auto *I = dyn_cast<Instruction>(V);
659 if (!I) {
660 ValueNumbering[V] = NextValueNumber;
661 if (isa<BasicBlock>(V))
662 NumberingBB[NextValueNumber] = cast<BasicBlock>(V);
663 return NextValueNumber++;
664 }
665
666 Expression Exp;
667 switch (I->getOpcode()) {
668 case Instruction::Call:
669 return lookupOrAddCall(cast<CallInst>(I));
670 case Instruction::FNeg:
671 case Instruction::Add:
672 case Instruction::FAdd:
673 case Instruction::Sub:
674 case Instruction::FSub:
675 case Instruction::Mul:
676 case Instruction::FMul:
677 case Instruction::UDiv:
678 case Instruction::SDiv:
679 case Instruction::FDiv:
680 case Instruction::URem:
681 case Instruction::SRem:
682 case Instruction::FRem:
683 case Instruction::Shl:
684 case Instruction::LShr:
685 case Instruction::AShr:
686 case Instruction::And:
687 case Instruction::Or:
688 case Instruction::Xor:
689 case Instruction::ICmp:
690 case Instruction::FCmp:
691 case Instruction::Trunc:
692 case Instruction::ZExt:
693 case Instruction::SExt:
694 case Instruction::FPToUI:
695 case Instruction::FPToSI:
696 case Instruction::UIToFP:
697 case Instruction::SIToFP:
698 case Instruction::FPTrunc:
699 case Instruction::FPExt:
700 case Instruction::PtrToInt:
701 case Instruction::PtrToAddr:
702 case Instruction::IntToPtr:
703 case Instruction::AddrSpaceCast:
704 case Instruction::BitCast:
705 case Instruction::Select:
706 case Instruction::Freeze:
707 case Instruction::ExtractElement:
708 case Instruction::InsertElement:
709 case Instruction::ShuffleVector:
710 case Instruction::InsertValue:
711 Exp = createExpr(I);
712 break;
713 case Instruction::GetElementPtr:
714 Exp = createGEPExpr(cast<GetElementPtrInst>(I));
715 break;
716 case Instruction::ExtractValue:
717 Exp = createExtractvalueExpr(cast<ExtractValueInst>(I));
718 break;
719 case Instruction::PHI:
720 ValueNumbering[V] = NextValueNumber;
721 NumberingPhi[NextValueNumber] = cast<PHINode>(V);
722 return NextValueNumber++;
723 case Instruction::Load:
724 case Instruction::Store:
725 return computeLoadStoreVN(I);
726 default:
727 ValueNumbering[V] = NextValueNumber;
728 return NextValueNumber++;
729 }
730
731 uint32_t E = assignExpNewValueNum(Exp).first;
732 ValueNumbering[V] = E;
733 return E;
734}
735
736/// Returns the value number of the specified value. Fails if
737/// the value has not yet been numbered.
738uint32_t GVNPass::ValueTable::lookup(Value *V, bool Verify) const {
739 auto VI = ValueNumbering.find(V);
740 if (Verify) {
741 assert(VI != ValueNumbering.end() && "Value not numbered?");
742 return VI->second;
743 }
744 return (VI != ValueNumbering.end()) ? VI->second : 0;
745}
746
747/// Returns the value number of the given comparison,
748/// assigning it a new number if it did not have one before. Useful when
749/// we deduced the result of a comparison, but don't immediately have an
750/// instruction realizing that comparison to hand.
751uint32_t GVNPass::ValueTable::lookupOrAddCmp(unsigned Opcode,
752 CmpInst::Predicate Predicate,
753 Value *LHS, Value *RHS) {
754 Expression Exp = createCmpExpr(Opcode, Predicate, LHS, RHS);
755 return assignExpNewValueNum(Exp).first;
756}
757
758/// Remove all entries from the ValueTable.
760 ValueNumbering.clear();
761 ExpressionNumbering.clear();
762 NumberingPhi.clear();
763 NumberingBB.clear();
764 PhiTranslateTable.clear();
765 NextValueNumber = 1;
766 Expressions.clear();
767 ExprIdx.clear();
768 NextExprNumber = 0;
769}
770
771/// Remove a value from the value numbering.
773 uint32_t Num = ValueNumbering.lookup(V);
774 ValueNumbering.erase(V);
775 // If V is PHINode, V <--> value number is an one-to-one mapping.
776 if (isa<PHINode>(V))
777 NumberingPhi.erase(Num);
778 else if (isa<BasicBlock>(V))
779 NumberingBB.erase(Num);
780}
781
782/// verifyRemoved - Verify that the value is removed from all internal data
783/// structures.
784void GVNPass::ValueTable::verifyRemoved(const Value *V) const {
785 assert(!ValueNumbering.contains(V) &&
786 "Inst still occurs in value numbering map!");
787}
788
789//===----------------------------------------------------------------------===//
790// LeaderMap External Functions
791//===----------------------------------------------------------------------===//
792
793/// Push a new Value to the LeaderTable onto the list for its value number.
794void GVNPass::LeaderMap::insert(uint32_t N, Value *V, const BasicBlock *BB) {
795 const auto &[It, Inserted] = NumToLeaders.try_emplace(N, V, BB, nullptr);
796 if (!Inserted) {
797 // Key already exists: insert new node after the head.
798 auto *NewSlot = TableAllocator.Allocate<LeaderListNode>();
799 new (NewSlot) LeaderListNode(V, BB, It->second.Next);
800 It->second.Next = NewSlot;
801 }
802}
803
804/// Scan the list of values corresponding to a given
805/// value number, and remove the given instruction if encountered.
806void GVNPass::LeaderMap::erase(uint32_t N, Instruction *I,
807 const BasicBlock *BB) {
808 auto It = NumToLeaders.find(N);
809 if (It == NumToLeaders.end())
810 return;
811
812 LeaderListNode *Prev = nullptr;
813 LeaderListNode *Curr = &It->second;
814
815 while (Curr && (Curr->Entry.Val != I || Curr->Entry.BB != BB)) {
816 Prev = Curr;
817 Curr = Curr->Next;
818 }
819
820 if (!Curr)
821 return;
822
823 if (Prev) {
824 // Non-head node: unlink and destroy.
825 Prev->Next = Curr->Next;
826 Curr->~LeaderListNode();
827 TableAllocator.Deallocate<LeaderListNode>(Curr);
828 } else {
829 // Head node (stored by value in DenseMap).
830 if (!Curr->Next) {
831 // Only node; erase from map (DenseMap calls the destructor).
832 NumToLeaders.erase(It);
833 } else {
834 // Move second node's data into head, then destroy second node.
835 LeaderListNode *Next = Curr->Next;
836 Curr->Entry.Val = std::move(Next->Entry.Val);
837 Curr->Entry.BB = Next->Entry.BB;
838 Curr->Next = Next->Next;
839 Next->~LeaderListNode();
840 TableAllocator.Deallocate<LeaderListNode>(Next);
841 }
842 }
843}
844
845//===----------------------------------------------------------------------===//
846// GVN Pass
847//===----------------------------------------------------------------------===//
848
850 return Options.AllowScalarPRE.value_or(GVNEnableScalarPRE);
851}
852
854 return Options.AllowLoadPRE.value_or(GVNEnableLoadPRE);
855}
856
858 return Options.AllowLoadInLoopPRE.value_or(GVNEnableLoadInLoopPRE);
859}
860
862 return Options.AllowLoadPRESplitBackedge.value_or(
864}
865
867 return Options.AllowMemDep.value_or(GVNEnableMemDep);
868}
869
871 return Options.AllowMemorySSA.value_or(GVNEnableMemorySSA);
872}
873
875 // FIXME: The order of evaluation of these 'getResult' calls is very
876 // significant! Re-ordering these variables will cause GVN when run alone to
877 // be less effective! We should fix memdep and basic-aa to not exhibit this
878 // behavior, but until then don't change the order here.
879 auto &AC = AM.getResult<AssumptionAnalysis>(F);
880 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
881 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
882 auto &AA = AM.getResult<AAManager>(F);
883 auto *MemDep =
885 auto &LI = AM.getResult<LoopAnalysis>(F);
886 auto *MSSA = AM.getCachedResult<MemorySSAAnalysis>(F);
887 if (isMemorySSAEnabled() && !MSSA) {
888 assert(!MemDep &&
889 "On-demand computation of MemSSA implies that MemDep is disabled!");
890 MSSA = &AM.getResult<MemorySSAAnalysis>(F);
891 }
893 bool Changed = runImpl(F, AC, DT, TLI, AA, MemDep, LI, &ORE,
894 MSSA ? &MSSA->getMSSA() : nullptr);
895 if (!Changed)
896 return PreservedAnalyses::all();
900 if (MSSA)
903 return PA;
904}
905
907 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
908 static_cast<PassInfoMixin<GVNPass> *>(this)->printPipeline(
909 OS, MapClassName2PassName);
910
911 OS << '<';
912 if (Options.AllowScalarPRE != std::nullopt)
913 OS << (*Options.AllowScalarPRE ? "" : "no-") << "scalar-pre;";
914 if (Options.AllowLoadPRE != std::nullopt)
915 OS << (*Options.AllowLoadPRE ? "" : "no-") << "load-pre;";
916 if (Options.AllowLoadPRESplitBackedge != std::nullopt)
917 OS << (*Options.AllowLoadPRESplitBackedge ? "" : "no-")
918 << "split-backedge-load-pre;";
919 if (Options.AllowMemDep != std::nullopt)
920 OS << (*Options.AllowMemDep ? "" : "no-") << "memdep;";
921 if (Options.AllowMemorySSA != std::nullopt)
922 OS << (*Options.AllowMemorySSA ? "" : "no-") << "memoryssa";
923 OS << '>';
924}
925
927 salvageKnowledge(I, AC);
929 removeInstruction(I);
930}
931
932#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
933LLVM_DUMP_METHOD void GVNPass::dump(DenseMap<uint32_t, Value *> &Map) const {
934 errs() << "{\n";
935 for (const auto &[Num, Exp] : Map) {
936 errs() << Num << "\n";
937 Exp->dump();
938 }
939 errs() << "}\n";
940}
941#endif
942
943enum class AvailabilityState : char {
944 /// We know the block *is not* fully available. This is a fixpoint.
946 /// We know the block *is* fully available. This is a fixpoint.
948 /// We do not know whether the block is fully available or not,
949 /// but we are currently speculating that it will be.
950 /// If it would have turned out that the block was, in fact, not fully
951 /// available, this would have been cleaned up into an Unavailable.
953};
954
955/// Return true if we can prove that the value
956/// we're analyzing is fully available in the specified block. As we go, keep
957/// track of which blocks we know are fully alive in FullyAvailableBlocks. This
958/// map is actually a tri-state map with the following values:
959/// 0) we know the block *is not* fully available.
960/// 1) we know the block *is* fully available.
961/// 2) we do not know whether the block is fully available or not, but we are
962/// currently speculating that it will be.
964 BasicBlock *BB,
965 DenseMap<BasicBlock *, AvailabilityState> &FullyAvailableBlocks) {
967 std::optional<BasicBlock *> UnavailableBB;
968
969 // The number of times we didn't find an entry for a block in a map and
970 // optimistically inserted an entry marking block as speculatively available.
971 unsigned NumNewNewSpeculativelyAvailableBBs = 0;
972
973#ifndef NDEBUG
974 SmallPtrSet<BasicBlock *, 32> NewSpeculativelyAvailableBBs;
976#endif
977
978 Worklist.emplace_back(BB);
979 while (!Worklist.empty()) {
980 BasicBlock *CurrBB = Worklist.pop_back_val(); // LoadFO - depth-first!
981 // Optimistically assume that the block is Speculatively Available and check
982 // to see if we already know about this block in one lookup.
983 std::pair<DenseMap<BasicBlock *, AvailabilityState>::iterator, bool> IV =
984 FullyAvailableBlocks.try_emplace(
986 AvailabilityState &State = IV.first->second;
987
988 // Did the entry already exist for this block?
989 if (!IV.second) {
990 if (State == AvailabilityState::Unavailable) {
991 UnavailableBB = CurrBB;
992 break; // Backpropagate unavailability info.
993 }
994
995#ifndef NDEBUG
996 AvailableBBs.emplace_back(CurrBB);
997#endif
998 continue; // Don't recurse further, but continue processing worklist.
999 }
1000
1001 // No entry found for block.
1002 ++NumNewNewSpeculativelyAvailableBBs;
1003 bool OutOfBudget = NumNewNewSpeculativelyAvailableBBs > MaxBBSpeculations;
1004
1005 // If we have exhausted our budget, mark this block as unavailable.
1006 // Also, if this block has no predecessors, the value isn't live-in here.
1007 if (OutOfBudget || pred_empty(CurrBB)) {
1008 MaxBBSpeculationCutoffReachedTimes += (int)OutOfBudget;
1010 UnavailableBB = CurrBB;
1011 break; // Backpropagate unavailability info.
1012 }
1013
1014 // Tentatively consider this block as speculatively available.
1015#ifndef NDEBUG
1016 NewSpeculativelyAvailableBBs.insert(CurrBB);
1017#endif
1018 // And further recurse into block's predecessors, in depth-first order!
1019 Worklist.append(pred_begin(CurrBB), pred_end(CurrBB));
1020 }
1021
1022#if LLVM_ENABLE_STATS
1023 IsValueFullyAvailableInBlockNumSpeculationsMax.updateMax(
1024 NumNewNewSpeculativelyAvailableBBs);
1025#endif
1026
1027 // If the block isn't marked as fixpoint yet
1028 // (the Unavailable and Available states are fixpoints).
1029 auto MarkAsFixpointAndEnqueueSuccessors =
1030 [&](BasicBlock *BB, AvailabilityState FixpointState) {
1031 auto It = FullyAvailableBlocks.find(BB);
1032 if (It == FullyAvailableBlocks.end())
1033 return; // Never queried this block, leave as-is.
1034 switch (AvailabilityState &State = It->second) {
1037 return; // Don't backpropagate further, continue processing worklist.
1039 State = FixpointState;
1040#ifndef NDEBUG
1041 assert(NewSpeculativelyAvailableBBs.erase(BB) &&
1042 "Found a speculatively available successor leftover?");
1043#endif
1044 // Queue successors for further processing.
1045 Worklist.append(succ_begin(BB), succ_end(BB));
1046 return;
1047 }
1048 };
1049
1050 if (UnavailableBB) {
1051 // Okay, we have encountered an unavailable block.
1052 // Mark speculatively available blocks reachable from UnavailableBB as
1053 // unavailable as well. Paths are terminated when they reach blocks not in
1054 // FullyAvailableBlocks or they are not marked as speculatively available.
1055 Worklist.clear();
1056 Worklist.append(succ_begin(*UnavailableBB), succ_end(*UnavailableBB));
1057 while (!Worklist.empty())
1058 MarkAsFixpointAndEnqueueSuccessors(Worklist.pop_back_val(),
1060 }
1061
1062#ifndef NDEBUG
1063 Worklist.clear();
1064 for (BasicBlock *AvailableBB : AvailableBBs)
1065 Worklist.append(succ_begin(AvailableBB), succ_end(AvailableBB));
1066 while (!Worklist.empty())
1067 MarkAsFixpointAndEnqueueSuccessors(Worklist.pop_back_val(),
1069
1070 assert(NewSpeculativelyAvailableBBs.empty() &&
1071 "Must have fixed all the new speculatively available blocks.");
1072#endif
1073
1074 return !UnavailableBB;
1075}
1076
1077/// If the specified OldValue exists in ValuesPerBlock, replace its value with
1078/// NewValue.
1080 SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock, Value *OldValue,
1081 Value *NewValue) {
1082 for (AvailableValueInBlock &V : ValuesPerBlock) {
1083 if (V.AV.Val == OldValue)
1084 V.AV.Val = NewValue;
1085 if (V.AV.isSelectValue()) {
1086 if (V.AV.V1 == OldValue)
1087 V.AV.V1 = NewValue;
1088 if (V.AV.V2 == OldValue)
1089 V.AV.V2 = NewValue;
1090 }
1091 }
1092}
1093
1094/// Given a set of loads specified by ValuesPerBlock,
1095/// construct SSA form, allowing us to eliminate Load. This returns the value
1096/// that should be used at Load's definition site.
1097static Value *
1100 GVNPass &GVN) {
1101 // Check for the fully redundant, dominating load case. In this case, we can
1102 // just use the dominating value directly.
1103 if (ValuesPerBlock.size() == 1 &&
1104 GVN.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB,
1105 Load->getParent())) {
1106 assert(!ValuesPerBlock[0].AV.isUndefValue() &&
1107 "Dead BB dominate this block");
1108 return ValuesPerBlock[0].MaterializeAdjustedValue(Load);
1109 }
1110
1111 // Otherwise, we have to construct SSA form.
1113 SSAUpdater SSAUpdate(&NewPHIs);
1114 SSAUpdate.Initialize(Load->getType(), Load->getName());
1115
1116 for (const AvailableValueInBlock &AV : ValuesPerBlock) {
1117 BasicBlock *BB = AV.BB;
1118
1119 if (AV.AV.isUndefValue())
1120 continue;
1121
1122 if (SSAUpdate.HasValueForBlock(BB))
1123 continue;
1124
1125 // If the value is the load that we will be eliminating, and the block it's
1126 // available in is the block that the load is in, then don't add it as
1127 // SSAUpdater will resolve the value to the relevant phi which may let it
1128 // avoid phi construction entirely if there's actually only one value.
1129 if (BB == Load->getParent() &&
1130 ((AV.AV.isSimpleValue() && AV.AV.getSimpleValue() == Load) ||
1131 (AV.AV.isCoercedLoadValue() && AV.AV.getCoercedLoadValue() == Load)))
1132 continue;
1133
1134 SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(Load));
1135 }
1136
1137 // Perform PHI construction.
1138 return SSAUpdate.GetValueInMiddleOfBlock(Load->getParent());
1139}
1140
1142 Instruction *InsertPt) const {
1143 Value *Res;
1144 Type *LoadTy = Load->getType();
1145 const DataLayout &DL = Load->getDataLayout();
1146 if (isSimpleValue()) {
1147 Res = getSimpleValue();
1148 if (Res->getType() != LoadTy) {
1149 Res = getValueForLoad(Res, Offset, LoadTy, InsertPt, Load->getFunction());
1150
1151 LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset
1152 << " " << *getSimpleValue() << '\n'
1153 << *Res << '\n'
1154 << "\n\n\n");
1155 }
1156 } else if (isCoercedLoadValue()) {
1157 LoadInst *CoercedLoad = getCoercedLoadValue();
1158 if (CoercedLoad->getType() == LoadTy && Offset == 0) {
1159 Res = CoercedLoad;
1160 combineMetadataForCSE(CoercedLoad, Load, false);
1161 } else {
1162 Res = getValueForLoad(CoercedLoad, Offset, LoadTy, InsertPt,
1163 Load->getFunction());
1164 // We are adding a new user for this load, for which the original
1165 // metadata may not hold. Additionally, the new load may have a different
1166 // size and type, so their metadata cannot be combined in any
1167 // straightforward way.
1168 // Drop all metadata that is not known to cause immediate UB on violation,
1169 // unless the load has !noundef, in which case all metadata violations
1170 // will be promoted to UB.
1171 // TODO: We can combine noalias/alias.scope metadata here, because it is
1172 // independent of the load type.
1173 if (!CoercedLoad->hasMetadata(LLVMContext::MD_noundef))
1174 CoercedLoad->dropUnknownNonDebugMetadata(
1175 {LLVMContext::MD_dereferenceable,
1176 LLVMContext::MD_dereferenceable_or_null,
1177 LLVMContext::MD_invariant_load, LLVMContext::MD_invariant_group});
1178 LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset
1179 << " " << *getCoercedLoadValue() << '\n'
1180 << *Res << '\n'
1181 << "\n\n\n");
1182 }
1183 } else if (isMemIntrinValue()) {
1185 InsertPt, DL);
1186 LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
1187 << " " << *getMemIntrinValue() << '\n'
1188 << *Res << '\n'
1189 << "\n\n\n");
1190 } else if (isSelectValue()) {
1191 // Introduce a new value select for a load from an eligible pointer select.
1192 SelectInst *Sel = getSelectValue();
1193 assert(V1 && V2 && "both value operands of the select must be present");
1194 Res =
1195 SelectInst::Create(Sel->getCondition(), V1, V2, "", Sel->getIterator());
1196 // We use the DebugLoc from the original load here, as this instruction
1197 // materializes the value that would previously have been loaded.
1198 cast<SelectInst>(Res)->setDebugLoc(Load->getDebugLoc());
1199 } else {
1200 llvm_unreachable("Should not materialize value from dead block");
1201 }
1202 assert(Res && "failed to materialize?");
1203 return Res;
1204}
1205
1206static bool isLifetimeStart(const Instruction *Inst) {
1207 if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst))
1208 return II->getIntrinsicID() == Intrinsic::lifetime_start;
1209 return false;
1210}
1211
1212/// Assuming To can be reached from both From and Between, does Between lie on
1213/// every path from From to To?
1214static bool liesBetween(const Instruction *From, Instruction *Between,
1215 const Instruction *To, const DominatorTree *DT) {
1216 if (From->getParent() == Between->getParent())
1217 return DT->dominates(From, Between);
1219 Exclusion.insert(Between->getParent());
1220 return !isPotentiallyReachable(From, To, &Exclusion, DT);
1221}
1222
1224 const DominatorTree *DT) {
1225 Value *PtrOp = Load->getPointerOperand();
1226 if (!PtrOp->hasUseList())
1227 return nullptr;
1228
1229 Instruction *OtherAccess = nullptr;
1230
1231 for (auto *U : PtrOp->users()) {
1232 if (U != Load && (isa<LoadInst>(U) || isa<StoreInst>(U))) {
1233 auto *I = cast<Instruction>(U);
1234 if (I->getFunction() == Load->getFunction() && DT->dominates(I, Load)) {
1235 // Use the most immediately dominating value.
1236 if (OtherAccess) {
1237 if (DT->dominates(OtherAccess, I))
1238 OtherAccess = I;
1239 else
1240 assert(U == OtherAccess || DT->dominates(I, OtherAccess));
1241 } else
1242 OtherAccess = I;
1243 }
1244 }
1245 }
1246
1247 if (OtherAccess)
1248 return OtherAccess;
1249
1250 // There is no dominating use, check if we can find a closest non-dominating
1251 // use that lies between any other potentially available use and Load.
1252 for (auto *U : PtrOp->users()) {
1253 if (U != Load && (isa<LoadInst>(U) || isa<StoreInst>(U))) {
1254 auto *I = cast<Instruction>(U);
1255 if (I->getFunction() == Load->getFunction() &&
1256 isPotentiallyReachable(I, Load, nullptr, DT)) {
1257 if (OtherAccess) {
1258 if (liesBetween(OtherAccess, I, Load, DT)) {
1259 OtherAccess = I;
1260 } else if (!liesBetween(I, OtherAccess, Load, DT)) {
1261 // These uses are both partially available at Load were it not for
1262 // the clobber, but neither lies strictly after the other.
1263 OtherAccess = nullptr;
1264 break;
1265 } // else: keep current OtherAccess since it lies between U and
1266 // Load.
1267 } else {
1268 OtherAccess = I;
1269 }
1270 }
1271 }
1272 }
1273
1274 return OtherAccess;
1275}
1276
1277/// Try to locate the three instruction involved in a missed
1278/// load-elimination case that is due to an intervening store.
1279static void reportMayClobberedLoad(LoadInst *Load, Instruction *DepInst,
1280 const DominatorTree *DT,
1282 using namespace ore;
1283
1284 OptimizationRemarkMissed R(DEBUG_TYPE, "LoadClobbered", Load);
1285 R << "load of type " << NV("Type", Load->getType()) << " not eliminated"
1286 << setExtraArgs();
1287
1288 const Instruction *OtherAccess = findMayClobberedPtrAccess(Load, DT);
1289 if (OtherAccess)
1290 R << " in favor of " << NV("OtherAccess", OtherAccess);
1291
1292 R << " because it is clobbered by " << NV("ClobberedBy", DepInst);
1293
1294 ORE->emit(R);
1295}
1296
1297// Find a dominating value for Loc memory location in the extended basic block
1298// (chain of basic blocks with single predecessors) starting From instruction.
1299// Returns the value from a matching load or a simple store to the same pointer.
1301 Instruction *From, AAResults *AA) {
1302 uint32_t NumVisitedInsts = 0;
1303 BasicBlock *FromBB = From->getParent();
1304 BatchAAResults BatchAA(*AA);
1305 for (BasicBlock *BB = FromBB; BB; BB = BB->getSinglePredecessor())
1306 for (auto *Inst = BB == FromBB ? From : BB->getTerminator();
1307 Inst != nullptr; Inst = Inst->getPrevNode()) {
1308 // Stop the search if limit is reached.
1309 if (++NumVisitedInsts > MaxNumVisitedInsts)
1310 return nullptr;
1311 if (isModSet(BatchAA.getModRefInfo(Inst, Loc))) {
1312 // A simple store to the exact location can forward its value.
1313 if (auto *SI = dyn_cast<StoreInst>(Inst))
1314 if (SI->isSimple() && SI->getPointerOperand() == Loc.Ptr &&
1315 SI->getValueOperand()->getType() == LoadTy)
1316 return SI->getValueOperand();
1317 return nullptr;
1318 }
1319 if (auto *LI = dyn_cast<LoadInst>(Inst))
1320 if (LI->getPointerOperand() == Loc.Ptr && LI->getType() == LoadTy)
1321 return LI;
1322 }
1323 return nullptr;
1324}
1325
1326std::optional<AvailableValue>
1327GVNPass::AnalyzeLoadAvailability(LoadInst *Load, const ReachingMemVal &Dep,
1328 Value *Address) {
1329 assert(Load->isUnordered() && "rules below are incorrect for ordered access");
1330 assert((Dep.Kind == DepKind::Def || Dep.Kind == DepKind::Clobber) &&
1331 "expected a local dependence");
1332
1333 Instruction *DepInst = Dep.Inst;
1334
1335 const DataLayout &DL = Load->getDataLayout();
1336 if (Dep.Kind == DepKind::Clobber) {
1337 // If the dependence is to a store that writes to a superset of the bits
1338 // read by the load, we can extract the bits we need for the load from the
1339 // stored value.
1340 if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
1341 // Can't forward from non-atomic to atomic without violating memory model.
1342 if (Address && Load->isAtomic() <= DepSI->isAtomic()) {
1343 int Offset =
1344 analyzeLoadFromClobberingStore(Load->getType(), Address, DepSI, DL);
1345 if (Offset != -1)
1346 return AvailableValue::get(DepSI->getValueOperand(), Offset);
1347 }
1348 }
1349
1350 // Check to see if we have something like this:
1351 // load i32* P
1352 // load i8* (P+1)
1353 // if we have this, replace the later with an extraction from the former.
1354 if (LoadInst *DepLoad = dyn_cast<LoadInst>(DepInst)) {
1355 // If this is a clobber and L is the first instruction in its block, then
1356 // we have the first instruction in the entry block.
1357 // Can't forward from non-atomic to atomic without violating memory model.
1358 if (DepLoad != Load && Address &&
1359 Load->isAtomic() <= DepLoad->isAtomic()) {
1360 Type *LoadType = Load->getType();
1361 int Offset = Dep.Offset;
1362
1363 if (!isMemorySSAEnabled()) {
1364 // If MD reported clobber, check it was nested.
1365 if (canCoerceMustAliasedValueToLoad(DepLoad, LoadType,
1366 DepLoad->getFunction())) {
1367 const auto ClobberOff = MD->getClobberOffset(DepLoad);
1368 // GVN has no deal with a negative offset.
1369 Offset = (ClobberOff == std::nullopt || *ClobberOff < 0)
1370 ? -1
1371 : *ClobberOff;
1372 }
1373 } else {
1374 if (!canCoerceMustAliasedValueToLoad(DepLoad, LoadType,
1375 DepLoad->getFunction()) ||
1376 Offset < 0)
1377 Offset = -1;
1378 }
1379 if (Offset == -1)
1380 Offset =
1381 analyzeLoadFromClobberingLoad(LoadType, Address, DepLoad, DL);
1382 if (Offset != -1)
1383 return AvailableValue::getLoad(DepLoad, Offset);
1384 }
1385 }
1386
1387 // If the clobbering value is a memset/memcpy/memmove, see if we can
1388 // forward a value on from it.
1389 if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInst)) {
1390 if (Address && !Load->isAtomic()) {
1392 DepMI, DL);
1393 if (Offset != -1)
1394 return AvailableValue::getMI(DepMI, Offset);
1395 }
1396 }
1397
1398 // Nothing known about this clobber, have to be conservative.
1399 LLVM_DEBUG(
1400 // fast print dep, using operator<< on instruction is too slow.
1401 dbgs() << "GVN: load "; Load->printAsOperand(dbgs());
1402 dbgs() << " is clobbered by " << *DepInst << '\n';);
1403 if (ORE->allowExtraAnalysis(DEBUG_TYPE))
1404 reportMayClobberedLoad(Load, DepInst, DT, ORE);
1405
1406 return std::nullopt;
1407 }
1408 assert(Dep.Kind == DepKind::Def && "follows from above");
1409
1410 // Loading the alloca -> undef.
1411 // Loading immediately after lifetime begin -> undef.
1412 if (isa<AllocaInst>(DepInst) || isLifetimeStart(DepInst))
1413 return AvailableValue::get(UndefValue::get(Load->getType()));
1414
1415 if (Constant *InitVal =
1416 getInitialValueOfAllocation(DepInst, TLI, Load->getType()))
1417 return AvailableValue::get(InitVal);
1418
1419 if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
1420 // Reject loads and stores that are to the same address but are of
1421 // different types if we have to. If the stored value is convertable to
1422 // the loaded value, we can reuse it.
1423 if (!canCoerceMustAliasedValueToLoad(S->getValueOperand(), Load->getType(),
1424 S->getFunction()))
1425 return std::nullopt;
1426
1427 // Can't forward from non-atomic to atomic without violating memory model.
1428 if (S->isAtomic() < Load->isAtomic())
1429 return std::nullopt;
1430
1431 return AvailableValue::get(S->getValueOperand());
1432 }
1433
1434 if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) {
1435 // If the types mismatch and we can't handle it, reject reuse of the load.
1436 // If the stored value is larger or equal to the loaded value, we can reuse
1437 // it.
1438 if (!canCoerceMustAliasedValueToLoad(LD, Load->getType(),
1439 LD->getFunction()))
1440 return std::nullopt;
1441
1442 // Can't forward from non-atomic to atomic without violating memory model.
1443 if (LD->isAtomic() < Load->isAtomic())
1444 return std::nullopt;
1445
1446 return AvailableValue::getLoad(LD);
1447 }
1448
1449 // Check if load with Addr dependent from select can be converted to select
1450 // between load values. There must be no instructions between the found
1451 // loads and DepInst that may clobber the loads.
1452 if (auto *Sel = dyn_cast<SelectInst>(DepInst)) {
1453 assert(Sel->getType() == Load->getPointerOperandType());
1454 auto Loc = MemoryLocation::get(Load);
1455 Value *V1 =
1456 findDominatingValue(Loc.getWithNewPtr(Sel->getTrueValue()),
1457 Load->getType(), DepInst, getAliasAnalysis());
1458 if (!V1)
1459 return std::nullopt;
1460 Value *V2 =
1461 findDominatingValue(Loc.getWithNewPtr(Sel->getFalseValue()),
1462 Load->getType(), DepInst, getAliasAnalysis());
1463 if (!V2)
1464 return std::nullopt;
1465 return AvailableValue::getSelect(Sel, V1, V2);
1466 }
1467
1468 // Unknown def - must be conservative.
1469 LLVM_DEBUG(
1470 // fast print dep, using operator<< on instruction is too slow.
1471 dbgs() << "GVN: load "; Load->printAsOperand(dbgs());
1472 dbgs() << " has unknown def " << *DepInst << '\n';);
1473 return std::nullopt;
1474}
1475
1476void GVNPass::AnalyzeLoadAvailability(LoadInst *Load,
1477 SmallVectorImpl<ReachingMemVal> &Deps,
1478 AvailValInBlkVect &ValuesPerBlock,
1479 UnavailBlkVect &UnavailableBlocks) {
1480 // Filter out useless results (non-locals, etc). Keep track of the blocks
1481 // where we have a value available in repl, also keep track of whether we see
1482 // dependencies that produce an unknown value for the load (such as a call
1483 // that could potentially clobber the load).
1484 for (const auto &Dep : Deps) {
1485 BasicBlock *DepBB = Dep.Block;
1486
1487 if (DeadBlocks.count(DepBB)) {
1488 // Dead dependent mem-op disguise as a load evaluating the same value
1489 // as the load in question.
1490 ValuesPerBlock.push_back(AvailableValueInBlock::getUndef(DepBB));
1491 continue;
1492 }
1493
1494 if (Dep.Kind == DepKind::Other) {
1495 UnavailableBlocks.push_back(DepBB);
1496 continue;
1497 }
1498
1499 // The address being loaded in this non-local block may not be the same as
1500 // the pointer operand of the load if PHI translation occurs. Make sure
1501 // to consider the right address.
1502 if (auto AV =
1503 AnalyzeLoadAvailability(Load, Dep, const_cast<Value *>(Dep.Addr))) {
1504 // subtlety: because we know this was a non-local dependency, we know
1505 // it's safe to materialize anywhere between the instruction within
1506 // DepInfo and the end of it's block.
1507 ValuesPerBlock.push_back(
1508 AvailableValueInBlock::get(DepBB, std::move(*AV)));
1509 } else {
1510 UnavailableBlocks.push_back(DepBB);
1511 }
1512 }
1513
1514 assert(Deps.size() == ValuesPerBlock.size() + UnavailableBlocks.size() &&
1515 "post condition violation");
1516}
1517
1518/// Given the following code, v1 is partially available on some edges, but not
1519/// available on the edge from PredBB. This function tries to find if there is
1520/// another identical load in the other successor of PredBB.
1521///
1522/// v0 = load %addr
1523/// br %LoadBB
1524///
1525/// LoadBB:
1526/// v1 = load %addr
1527/// ...
1528///
1529/// PredBB:
1530/// ...
1531/// br %cond, label %LoadBB, label %SuccBB
1532///
1533/// SuccBB:
1534/// v2 = load %addr
1535/// ...
1536///
1537LoadInst *GVNPass::findLoadToHoistIntoPred(BasicBlock *Pred, BasicBlock *LoadBB,
1538 LoadInst *Load) {
1539 // For simplicity we handle a Pred has 2 successors only.
1540 auto *Term = Pred->getTerminator();
1541 if (Term->getNumSuccessors() != 2 || Term->isSpecialTerminator())
1542 return nullptr;
1543 auto *SuccBB = Term->getSuccessor(0);
1544 if (SuccBB == LoadBB)
1545 SuccBB = Term->getSuccessor(1);
1546 if (!SuccBB->getSinglePredecessor())
1547 return nullptr;
1548
1549 unsigned int NumInsts = MaxNumInsnsPerBlock;
1550 for (Instruction &Inst : *SuccBB) {
1551 if (Inst.isDebugOrPseudoInst())
1552 continue;
1553 if (--NumInsts == 0)
1554 return nullptr;
1555
1556 if (!Inst.isIdenticalTo(Load))
1557 continue;
1558
1559 bool HasLocalDep = true;
1560 if (!isMemorySSAEnabled()) {
1561 MemDepResult Dep = MD->getDependency(&Inst);
1562 HasLocalDep = !Dep.isNonLocal();
1563 } else {
1564 auto *MSSA = MSSAU->getMemorySSA();
1565 // Do not hoist if the identical load has ordering constraint.
1566 if (auto *MA = MSSA->getMemoryAccess(&Inst); MA && isa<MemoryUse>(MA)) {
1567 auto *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(MA);
1568 HasLocalDep = Clobber->getBlock() == SuccBB;
1569 }
1570 }
1571
1572 // If an identical load doesn't depends on any local instructions, it can
1573 // be safely moved to PredBB.
1574 // Also check for the implicit control flow instructions. See the comments
1575 // in PerformLoadPRE for details.
1576 if (!HasLocalDep && !ICF->isDominatedByICFIFromSameBlock(&Inst))
1577 return cast<LoadInst>(&Inst);
1578
1579 // Otherwise there is something in the same BB clobbers the memory, we can't
1580 // move this and later load to PredBB.
1581 return nullptr;
1582 }
1583
1584 return nullptr;
1585}
1586
1587void GVNPass::eliminatePartiallyRedundantLoad(
1588 LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
1589 MapVector<BasicBlock *, Value *> &AvailableLoads,
1590 MapVector<BasicBlock *, LoadInst *> *CriticalEdgePredAndLoad) {
1591 for (const auto &AvailableLoad : AvailableLoads) {
1592 BasicBlock *UnavailableBlock = AvailableLoad.first;
1593 Value *LoadPtr = AvailableLoad.second;
1594
1595 auto *NewLoad = new LoadInst(
1596 Load->getType(), LoadPtr, Load->getName() + ".pre", Load->isVolatile(),
1597 Load->getAlign(), Load->getOrdering(), Load->getSyncScopeID(),
1598 UnavailableBlock->getTerminator()->getIterator());
1599 NewLoad->setDebugLoc(Load->getDebugLoc());
1600 if (MSSAU) {
1601 auto *NewAccess = MSSAU->createMemoryAccessInBB(
1602 NewLoad, nullptr, NewLoad->getParent(), MemorySSA::BeforeTerminator);
1603 if (auto *NewDef = dyn_cast<MemoryDef>(NewAccess))
1604 MSSAU->insertDef(NewDef, /*RenameUses=*/true);
1605 else
1606 MSSAU->insertUse(cast<MemoryUse>(NewAccess), /*RenameUses=*/true);
1607 }
1608
1609 // Transfer the old load's AA tags to the new load.
1610 AAMDNodes Tags = Load->getAAMetadata();
1611 if (Tags)
1612 NewLoad->setAAMetadata(Tags);
1613
1614 if (auto *MD = Load->getMetadata(LLVMContext::MD_invariant_load))
1615 NewLoad->setMetadata(LLVMContext::MD_invariant_load, MD);
1616 if (auto *InvGroupMD = Load->getMetadata(LLVMContext::MD_invariant_group))
1617 NewLoad->setMetadata(LLVMContext::MD_invariant_group, InvGroupMD);
1618 if (auto *RangeMD = Load->getMetadata(LLVMContext::MD_range))
1619 NewLoad->setMetadata(LLVMContext::MD_range, RangeMD);
1620 if (auto *NoFPClassMD = Load->getMetadata(LLVMContext::MD_nofpclass))
1621 NewLoad->setMetadata(LLVMContext::MD_nofpclass, NoFPClassMD);
1622
1623 if (auto *AccessMD = Load->getMetadata(LLVMContext::MD_access_group))
1624 if (LI->getLoopFor(Load->getParent()) == LI->getLoopFor(UnavailableBlock))
1625 NewLoad->setMetadata(LLVMContext::MD_access_group, AccessMD);
1626
1627 // We do not propagate the old load's debug location, because the new
1628 // load now lives in a different BB, and we want to avoid a jumpy line
1629 // table.
1630 // FIXME: How do we retain source locations without causing poor debugging
1631 // behavior?
1632
1633 // Add the newly created load.
1634 ValuesPerBlock.push_back(
1635 AvailableValueInBlock::get(UnavailableBlock, NewLoad));
1636 if (MD)
1637 MD->invalidateCachedPointerInfo(LoadPtr);
1638 LLVM_DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n');
1639
1640 // For PredBB in CriticalEdgePredAndLoad we need to replace the uses of old
1641 // load instruction with the new created load instruction.
1642 if (CriticalEdgePredAndLoad) {
1643 auto It = CriticalEdgePredAndLoad->find(UnavailableBlock);
1644 if (It != CriticalEdgePredAndLoad->end()) {
1645 ++NumPRELoadMoved2CEPred;
1646 ICF->insertInstructionTo(NewLoad, UnavailableBlock);
1647 LoadInst *OldLoad = It->second;
1648 combineMetadataForCSE(NewLoad, OldLoad, /*DoesKMove=*/true);
1649 OldLoad->replaceAllUsesWith(NewLoad);
1650 replaceValuesPerBlockEntry(ValuesPerBlock, OldLoad, NewLoad);
1651 if (uint32_t ValNo = VN.lookup(OldLoad, false))
1652 LeaderTable.erase(ValNo, OldLoad, OldLoad->getParent());
1653 removeInstruction(OldLoad);
1654 }
1655 }
1656 }
1657
1658 // Perform PHI construction.
1659 Value *V = ConstructSSAForLoadSet(Load, ValuesPerBlock, *this);
1660 // ConstructSSAForLoadSet is responsible for combining metadata.
1661 ICF->removeUsersOf(Load);
1662 Load->replaceAllUsesWith(V);
1663 if (isa<PHINode>(V))
1664 V->takeName(Load);
1665 if (Instruction *I = dyn_cast<Instruction>(V))
1666 I->setDebugLoc(Load->getDebugLoc());
1667 if (MD && V->getType()->isPtrOrPtrVectorTy())
1668 MD->invalidateCachedPointerInfo(V);
1669 ORE->emit([&]() {
1670 return OptimizationRemark(DEBUG_TYPE, "LoadPRE", Load)
1671 << "load eliminated by PRE";
1672 });
1674}
1675
1676bool GVNPass::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
1677 UnavailBlkVect &UnavailableBlocks) {
1678 // Okay, we have *some* definitions of the value. This means that the value
1679 // is available in some of our (transitive) predecessors. Lets think about
1680 // doing PRE of this load. This will involve inserting a new load into the
1681 // predecessor when it's not available. We could do this in general, but
1682 // prefer to not increase code size. As such, we only do this when we know
1683 // that we only have to insert *one* load (which means we're basically moving
1684 // the load, not inserting a new one).
1685
1686 SmallPtrSet<BasicBlock *, 4> Blockers(llvm::from_range, UnavailableBlocks);
1687
1688 // Let's find the first basic block with more than one predecessor. Walk
1689 // backwards through predecessors if needed.
1690 BasicBlock *LoadBB = Load->getParent();
1691 BasicBlock *TmpBB = LoadBB;
1692
1693 // Check that there is no implicit control flow instructions above our load in
1694 // its block. If there is an instruction that doesn't always pass the
1695 // execution to the following instruction, then moving through it may become
1696 // invalid. For example:
1697 //
1698 // int arr[LEN];
1699 // int index = ???;
1700 // ...
1701 // guard(0 <= index && index < LEN);
1702 // use(arr[index]);
1703 //
1704 // It is illegal to move the array access to any point above the guard,
1705 // because if the index is out of bounds we should deoptimize rather than
1706 // access the array.
1707 // Check that there is no guard in this block above our instruction.
1708 bool MustEnsureSafetyOfSpeculativeExecution =
1709 ICF->isDominatedByICFIFromSameBlock(Load);
1710
1711 while (TmpBB->getSinglePredecessor()) {
1712 TmpBB = TmpBB->getSinglePredecessor();
1713 if (TmpBB == LoadBB) // Infinite (unreachable) loop.
1714 return false;
1715 if (Blockers.count(TmpBB))
1716 return false;
1717
1718 // If any of these blocks has more than one successor (i.e. if the edge we
1719 // just traversed was critical), then there are other paths through this
1720 // block along which the load may not be anticipated. Hoisting the load
1721 // above this block would be adding the load to execution paths along
1722 // which it was not previously executed.
1723 if (TmpBB->getTerminator()->getNumSuccessors() != 1)
1724 return false;
1725
1726 // Check that there is no implicit control flow in a block above.
1727 MustEnsureSafetyOfSpeculativeExecution =
1728 MustEnsureSafetyOfSpeculativeExecution || ICF->hasICF(TmpBB);
1729 }
1730
1731 assert(TmpBB);
1732 LoadBB = TmpBB;
1733
1734 // Check to see how many predecessors have the loaded value fully
1735 // available.
1736 MapVector<BasicBlock *, Value *> PredLoads;
1737 DenseMap<BasicBlock *, AvailabilityState> FullyAvailableBlocks;
1738 for (const AvailableValueInBlock &AV : ValuesPerBlock)
1739 FullyAvailableBlocks[AV.BB] = AvailabilityState::Available;
1740 for (BasicBlock *UnavailableBB : UnavailableBlocks)
1741 FullyAvailableBlocks[UnavailableBB] = AvailabilityState::Unavailable;
1742
1743 // The edge from Pred to LoadBB is a critical edge will be splitted.
1744 SmallVector<BasicBlock *, 4> CriticalEdgePredSplit;
1745 // The edge from Pred to LoadBB is a critical edge, another successor of Pred
1746 // contains a load can be moved to Pred. This data structure maps the Pred to
1747 // the movable load.
1748 MapVector<BasicBlock *, LoadInst *> CriticalEdgePredAndLoad;
1749 for (BasicBlock *Pred : predecessors(LoadBB)) {
1750 // If any predecessor block is an EH pad that does not allow non-PHI
1751 // instructions before the terminator, we can't PRE the load.
1752 if (Pred->getTerminator()->isEHPad()) {
1753 LLVM_DEBUG(
1754 dbgs() << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD PREDECESSOR '"
1755 << Pred->getName() << "': " << *Load << '\n');
1756 return false;
1757 }
1758
1759 if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks)) {
1760 continue;
1761 }
1762
1763 if (Pred->getTerminator()->getNumSuccessors() != 1) {
1764 if (isa<IndirectBrInst>(Pred->getTerminator())) {
1765 LLVM_DEBUG(
1766 dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '"
1767 << Pred->getName() << "': " << *Load << '\n');
1768 return false;
1769 }
1770
1771 if (LoadBB->isEHPad()) {
1772 LLVM_DEBUG(
1773 dbgs() << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD CRITICAL EDGE '"
1774 << Pred->getName() << "': " << *Load << '\n');
1775 return false;
1776 }
1777
1778 // Do not split backedge as it will break the canonical loop form.
1780 if (DT->dominates(LoadBB, Pred)) {
1781 LLVM_DEBUG(
1782 dbgs()
1783 << "COULD NOT PRE LOAD BECAUSE OF A BACKEDGE CRITICAL EDGE '"
1784 << Pred->getName() << "': " << *Load << '\n');
1785 return false;
1786 }
1787
1788 if (LoadInst *LI = findLoadToHoistIntoPred(Pred, LoadBB, Load))
1789 CriticalEdgePredAndLoad[Pred] = LI;
1790 else
1791 CriticalEdgePredSplit.push_back(Pred);
1792 } else {
1793 // Only add the predecessors that will not be split for now.
1794 PredLoads[Pred] = nullptr;
1795 }
1796 }
1797
1798 // Decide whether PRE is profitable for this load.
1799 unsigned NumInsertPreds = PredLoads.size() + CriticalEdgePredSplit.size();
1800 unsigned NumUnavailablePreds = NumInsertPreds +
1801 CriticalEdgePredAndLoad.size();
1802 assert(NumUnavailablePreds != 0 &&
1803 "Fully available value should already be eliminated!");
1804 (void)NumUnavailablePreds;
1805
1806 // If we need to insert new load in multiple predecessors, reject it.
1807 // FIXME: If we could restructure the CFG, we could make a common pred with
1808 // all the preds that don't have an available Load and insert a new load into
1809 // that one block.
1810 if (NumInsertPreds > 1)
1811 return false;
1812
1813 // Now we know where we will insert load. We must ensure that it is safe
1814 // to speculatively execute the load at that points.
1815 if (MustEnsureSafetyOfSpeculativeExecution) {
1816 if (CriticalEdgePredSplit.size())
1817 if (!isSafeToSpeculativelyExecute(Load, &*LoadBB->getFirstNonPHIIt(), AC,
1818 DT))
1819 return false;
1820 for (auto &PL : PredLoads)
1821 if (!isSafeToSpeculativelyExecute(Load, PL.first->getTerminator(), AC,
1822 DT))
1823 return false;
1824 for (auto &CEP : CriticalEdgePredAndLoad)
1825 if (!isSafeToSpeculativelyExecute(Load, CEP.first->getTerminator(), AC,
1826 DT))
1827 return false;
1828 }
1829
1830 // Split critical edges, and update the unavailable predecessors accordingly.
1831 for (BasicBlock *OrigPred : CriticalEdgePredSplit) {
1832 BasicBlock *NewPred = splitCriticalEdges(OrigPred, LoadBB);
1833 assert(!PredLoads.count(OrigPred) && "Split edges shouldn't be in map!");
1834 PredLoads[NewPred] = nullptr;
1835 LLVM_DEBUG(dbgs() << "Split critical edge " << OrigPred->getName() << "->"
1836 << LoadBB->getName() << '\n');
1837 }
1838
1839 for (auto &CEP : CriticalEdgePredAndLoad)
1840 PredLoads[CEP.first] = nullptr;
1841
1842 // Check if the load can safely be moved to all the unavailable predecessors.
1843 bool CanDoPRE = true;
1844 const DataLayout &DL = Load->getDataLayout();
1845 SmallVector<Instruction*, 8> NewInsts;
1846 for (auto &PredLoad : PredLoads) {
1847 BasicBlock *UnavailablePred = PredLoad.first;
1848
1849 // Do PHI translation to get its value in the predecessor if necessary. The
1850 // returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
1851 // We do the translation for each edge we skipped by going from Load's block
1852 // to LoadBB, otherwise we might miss pieces needing translation.
1853
1854 // If all preds have a single successor, then we know it is safe to insert
1855 // the load on the pred (?!?), so we can insert code to materialize the
1856 // pointer if it is not available.
1857 Value *LoadPtr = Load->getPointerOperand();
1858 BasicBlock *Cur = Load->getParent();
1859 while (Cur != LoadBB) {
1860 PHITransAddr Address(LoadPtr, DL, AC);
1861 LoadPtr = Address.translateWithInsertion(Cur, Cur->getSinglePredecessor(),
1862 *DT, NewInsts);
1863 if (!LoadPtr) {
1864 CanDoPRE = false;
1865 break;
1866 }
1867 Cur = Cur->getSinglePredecessor();
1868 }
1869
1870 if (LoadPtr) {
1871 PHITransAddr Address(LoadPtr, DL, AC);
1872 LoadPtr = Address.translateWithInsertion(LoadBB, UnavailablePred, *DT,
1873 NewInsts);
1874 }
1875 // If we couldn't find or insert a computation of this phi translated value,
1876 // we fail PRE.
1877 if (!LoadPtr) {
1878 LLVM_DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
1879 << *Load->getPointerOperand() << "\n");
1880 CanDoPRE = false;
1881 break;
1882 }
1883
1884 PredLoad.second = LoadPtr;
1885 }
1886
1887 if (!CanDoPRE) {
1888 while (!NewInsts.empty()) {
1889 // Erase instructions generated by the failed PHI translation before
1890 // trying to number them. PHI translation might insert instructions
1891 // in basic blocks other than the current one, and we delete them
1892 // directly, as salvageAndRemoveInstruction only allows removing from the
1893 // current basic block.
1894 NewInsts.pop_back_val()->eraseFromParent();
1895 }
1896 // HINT: Don't revert the edge-splitting as following transformation may
1897 // also need to split these critical edges.
1898 return !CriticalEdgePredSplit.empty();
1899 }
1900
1901 // Okay, we can eliminate this load by inserting a reload in the predecessor
1902 // and using PHI construction to get the value in the other predecessors, do
1903 // it.
1904 LLVM_DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *Load << '\n');
1905 LLVM_DEBUG(if (!NewInsts.empty()) dbgs() << "INSERTED " << NewInsts.size()
1906 << " INSTS: " << *NewInsts.back()
1907 << '\n');
1908
1909 // Assign value numbers to the new instructions.
1910 for (Instruction *I : NewInsts) {
1911 // Instructions that have been inserted in predecessor(s) to materialize
1912 // the load address do not retain their original debug locations. Doing
1913 // so could lead to confusing (but correct) source attributions.
1914 I->updateLocationAfterHoist();
1915
1916 // FIXME: We really _ought_ to insert these value numbers into their
1917 // parent's availability map. However, in doing so, we risk getting into
1918 // ordering issues. If a block hasn't been processed yet, we would be
1919 // marking a value as AVAIL-IN, which isn't what we intend.
1920 VN.lookupOrAdd(I);
1921 }
1922
1923 eliminatePartiallyRedundantLoad(Load, ValuesPerBlock, PredLoads,
1924 &CriticalEdgePredAndLoad);
1925 ++NumPRELoad;
1926 return true;
1927}
1928
1929bool GVNPass::performLoopLoadPRE(LoadInst *Load,
1930 AvailValInBlkVect &ValuesPerBlock,
1931 UnavailBlkVect &UnavailableBlocks) {
1932 const Loop *L = LI->getLoopFor(Load->getParent());
1933 // TODO: Generalize to other loop blocks that dominate the latch.
1934 if (!L || L->getHeader() != Load->getParent())
1935 return false;
1936
1937 BasicBlock *Preheader = L->getLoopPreheader();
1938 BasicBlock *Latch = L->getLoopLatch();
1939 if (!Preheader || !Latch)
1940 return false;
1941
1942 Value *LoadPtr = Load->getPointerOperand();
1943 // Must be available in preheader.
1944 if (!L->isLoopInvariant(LoadPtr))
1945 return false;
1946
1947 // We plan to hoist the load to preheader without introducing a new fault.
1948 // In order to do it, we need to prove that we cannot side-exit the loop
1949 // once loop header is first entered before execution of the load.
1950 if (ICF->isDominatedByICFIFromSameBlock(Load))
1951 return false;
1952
1953 BasicBlock *LoopBlock = nullptr;
1954 for (auto *Blocker : UnavailableBlocks) {
1955 // Blockers from outside the loop are handled in preheader.
1956 if (!L->contains(Blocker))
1957 continue;
1958
1959 // Only allow one loop block. Loop header is not less frequently executed
1960 // than each loop block, and likely it is much more frequently executed. But
1961 // in case of multiple loop blocks, we need extra information (such as block
1962 // frequency info) to understand whether it is profitable to PRE into
1963 // multiple loop blocks.
1964 if (LoopBlock)
1965 return false;
1966
1967 // Do not sink into inner loops. This may be non-profitable.
1968 if (L != LI->getLoopFor(Blocker))
1969 return false;
1970
1971 // Blocks that dominate the latch execute on every single iteration, maybe
1972 // except the last one. So PREing into these blocks doesn't make much sense
1973 // in most cases. But the blocks that do not necessarily execute on each
1974 // iteration are sometimes much colder than the header, and this is when
1975 // PRE is potentially profitable.
1976 if (DT->dominates(Blocker, Latch))
1977 return false;
1978
1979 // Make sure that the terminator itself doesn't clobber.
1980 if (Blocker->getTerminator()->mayWriteToMemory())
1981 return false;
1982
1983 LoopBlock = Blocker;
1984 }
1985
1986 if (!LoopBlock)
1987 return false;
1988
1989 // Make sure the memory at this pointer cannot be freed, therefore we can
1990 // safely reload from it after clobber.
1991 if (LoadPtr->canBeFreed())
1992 return false;
1993
1994 // TODO: Support critical edge splitting if blocker has more than 1 successor.
1995 MapVector<BasicBlock *, Value *> AvailableLoads;
1996 AvailableLoads[LoopBlock] = LoadPtr;
1997 AvailableLoads[Preheader] = LoadPtr;
1998
1999 LLVM_DEBUG(dbgs() << "GVN REMOVING PRE LOOP LOAD: " << *Load << '\n');
2000 eliminatePartiallyRedundantLoad(Load, ValuesPerBlock, AvailableLoads,
2001 /*CriticalEdgePredAndLoad*/ nullptr);
2002 ++NumPRELoopLoad;
2003 return true;
2004}
2005
2008 using namespace ore;
2009
2010 ORE->emit([&]() {
2011 return OptimizationRemark(DEBUG_TYPE, "LoadElim", Load)
2012 << "load of type " << NV("Type", Load->getType()) << " eliminated"
2013 << setExtraArgs() << " in favor of "
2014 << NV("InfavorOfValue", AvailableValue);
2015 });
2016}
2017
2018/// Attempt to eliminate a load whose dependencies are
2019/// non-local by performing PHI construction.
2020bool GVNPass::processNonLocalLoad(LoadInst *Load) {
2021 // Non-local speculations are not allowed under asan.
2022 if (Load->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) ||
2023 Load->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress))
2024 return false;
2025
2026 // Find the non-local dependencies of the load.
2027 LoadDepVect Deps;
2028 MD->getNonLocalPointerDependency(Load, Deps);
2029
2030 // If we had to process more than one hundred blocks to find the
2031 // dependencies, this load isn't worth worrying about. Optimizing
2032 // it will be too expensive.
2033 unsigned NumDeps = Deps.size();
2034 if (NumDeps > MaxNumDeps)
2035 return false;
2036
2038 MemVals.reserve(Deps.size());
2039
2040 for (const NonLocalDepResult &Dep : Deps) {
2041 const auto &R = Dep.getResult();
2042 Value *Address = Dep.getAddress();
2043 BasicBlock *BB = Dep.getBB();
2044 Instruction *Inst = R.getInst();
2045 if (R.isClobber())
2046 MemVals.emplace_back(ReachingMemVal::getClobber(Address, Inst));
2047 else if (R.isDef())
2048 MemVals.emplace_back(ReachingMemVal::getDef(Address, Inst));
2049 else
2050 MemVals.emplace_back(ReachingMemVal::getUnknown(BB, Address, Inst));
2051 }
2052
2053 return processNonLocalLoad(Load, MemVals);
2054}
2055
2056bool GVNPass::processNonLocalLoad(LoadInst *Load,
2057 SmallVectorImpl<ReachingMemVal> &Deps) {
2058 // If we had a phi translation failure, we'll have a single entry which is a
2059 // clobber in the current block. Reject this early.
2060 if (Deps.size() == 1 && Deps[0].Kind == DepKind::Other) {
2061 LLVM_DEBUG(dbgs() << "GVN: non-local load "; Load->printAsOperand(dbgs());
2062 dbgs() << " has unknown dependencies\n";);
2063 return false;
2064 }
2065
2066 bool Changed = false;
2067 // This is a limited form of scalar PRE for load indices. If this load follows
2068 // a GEP, see if we can PRE the indices before analyzing.
2069 if (isScalarPREEnabled()) {
2070 if (GetElementPtrInst *GEP =
2071 dyn_cast<GetElementPtrInst>(Load->getOperand(0))) {
2072 for (Use &U : GEP->indices())
2073 if (Instruction *I = dyn_cast<Instruction>(U.get()))
2074 Changed |= performScalarPRE(I);
2075 }
2076 }
2077
2078 // Step 1: Analyze the availability of the load.
2079 AvailValInBlkVect ValuesPerBlock;
2080 UnavailBlkVect UnavailableBlocks;
2081 AnalyzeLoadAvailability(Load, Deps, ValuesPerBlock, UnavailableBlocks);
2082
2083 // If we have no predecessors that produce a known value for this load, exit
2084 // early.
2085 if (ValuesPerBlock.empty())
2086 return Changed;
2087
2088 // Step 2: Eliminate fully redundancy.
2089 //
2090 // If all of the instructions we depend on produce a known value for this
2091 // load, then it is fully redundant and we can use PHI insertion to compute
2092 // its value. Insert PHIs and remove the fully redundant value now.
2093 if (UnavailableBlocks.empty()) {
2094 LLVM_DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *Load << '\n');
2095
2096 // Perform PHI construction.
2097 Value *V = ConstructSSAForLoadSet(Load, ValuesPerBlock, *this);
2098 // ConstructSSAForLoadSet is responsible for combining metadata.
2099 ICF->removeUsersOf(Load);
2100 Load->replaceAllUsesWith(V);
2101
2102 if (isa<PHINode>(V))
2103 V->takeName(Load);
2104 if (Instruction *I = dyn_cast<Instruction>(V))
2105 // If instruction I has debug info, then we should not update it.
2106 // Also, if I has a null DebugLoc, then it is still potentially incorrect
2107 // to propagate Load's DebugLoc because Load may not post-dominate I.
2108 if (Load->getDebugLoc() && Load->getParent() == I->getParent())
2109 I->setDebugLoc(Load->getDebugLoc());
2110 if (MD && V->getType()->isPtrOrPtrVectorTy())
2111 MD->invalidateCachedPointerInfo(V);
2112 ++NumGVNLoad;
2113 reportLoadElim(Load, V, ORE);
2115 return true;
2116 }
2117
2118 // Step 3: Eliminate partial redundancy.
2119 if (!isLoadPREEnabled())
2120 return Changed;
2121 if (!isLoadInLoopPREEnabled() && LI->getLoopFor(Load->getParent()))
2122 return Changed;
2123
2124 if (performLoopLoadPRE(Load, ValuesPerBlock, UnavailableBlocks) ||
2125 PerformLoadPRE(Load, ValuesPerBlock, UnavailableBlocks))
2126 return true;
2127
2128 return Changed;
2129}
2130
2131bool GVNPass::processAssumeIntrinsic(AssumeInst *IntrinsicI) {
2132 Value *V = IntrinsicI->getArgOperand(0);
2133
2134 if (ConstantInt *Cond = dyn_cast<ConstantInt>(V)) {
2135 if (Cond->isZero()) {
2136 Type *Int8Ty = Type::getInt8Ty(V->getContext());
2137 Type *PtrTy = PointerType::get(V->getContext(), 0);
2138 // Insert a new store to null instruction before the load to indicate that
2139 // this code is not reachable. FIXME: We could insert unreachable
2140 // instruction directly because we can modify the CFG.
2141 auto *NewS =
2142 new StoreInst(PoisonValue::get(Int8Ty), Constant::getNullValue(PtrTy),
2143 IntrinsicI->getIterator());
2144 if (MSSAU) {
2145 const MemoryUseOrDef *FirstNonDom = nullptr;
2146 const auto *AL =
2147 MSSAU->getMemorySSA()->getBlockAccesses(IntrinsicI->getParent());
2148
2149 // If there are accesses in the current basic block, find the first one
2150 // that does not come before NewS. The new memory access is inserted
2151 // after the found access or before the terminator if no such access is
2152 // found.
2153 if (AL) {
2154 for (const auto &Acc : *AL) {
2155 if (auto *Current = dyn_cast<MemoryUseOrDef>(&Acc))
2156 if (!Current->getMemoryInst()->comesBefore(NewS)) {
2157 FirstNonDom = Current;
2158 break;
2159 }
2160 }
2161 }
2162
2163 auto *NewDef =
2164 FirstNonDom ? MSSAU->createMemoryAccessBefore(
2165 NewS, nullptr,
2166 const_cast<MemoryUseOrDef *>(FirstNonDom))
2167 : MSSAU->createMemoryAccessInBB(
2168 NewS, nullptr,
2169 NewS->getParent(), MemorySSA::BeforeTerminator);
2170
2171 MSSAU->insertDef(cast<MemoryDef>(NewDef), /*RenameUses=*/false);
2172 }
2173 }
2174 if (isAssumeWithEmptyBundle(*IntrinsicI)) {
2175 salvageAndRemoveInstruction(IntrinsicI);
2176 return true;
2177 }
2178 return false;
2179 }
2180
2181 if (isa<Constant>(V)) {
2182 // If it's not false, and constant, it must evaluate to true. This means our
2183 // assume is assume(true), and thus, pointless, and we don't want to do
2184 // anything more here.
2185 return false;
2186 }
2187
2188 Constant *True = ConstantInt::getTrue(V->getContext());
2189 return propagateEquality(V, True, IntrinsicI);
2190}
2191
2194 I->replaceAllUsesWith(Repl);
2195}
2196
2197/// If a load has !invariant.group, try to find the most-dominating instruction
2198/// with the same metadata and equivalent pointer (modulo bitcasts and zero
2199/// GEPs). If one is found that dominates the load, its value can be reused.
2201 Value *PointerOperand = L->getPointerOperand()->stripPointerCasts();
2202
2203 // It's not safe to walk the use list of a global value because function
2204 // passes aren't allowed to look outside their functions.
2205 // FIXME: this could be fixed by filtering instructions from outside of
2206 // current function.
2207 if (isa<Constant>(PointerOperand))
2208 return nullptr;
2209
2210 // Queue to process all pointers that are equivalent to load operand.
2211 SmallVector<Value *, 8> PointerUsesQueue;
2212 PointerUsesQueue.push_back(PointerOperand);
2213
2214 Instruction *MostDominatingInstruction = L;
2215
2216 // FIXME: This loop is potentially O(n^2) due to repeated dominates checks.
2217 while (!PointerUsesQueue.empty()) {
2218 Value *Ptr = PointerUsesQueue.pop_back_val();
2219 assert(Ptr && !isa<GlobalValue>(Ptr) &&
2220 "Null or GlobalValue should not be inserted");
2221
2222 for (User *U : Ptr->users()) {
2223 auto *I = dyn_cast<Instruction>(U);
2224 if (!I || I == L || !DT.dominates(I, MostDominatingInstruction))
2225 continue;
2226
2227 // Add bitcasts and zero GEPs to queue.
2228 // TODO: Should drop bitcast?
2229 if (isa<BitCastInst>(I) ||
2231 cast<GetElementPtrInst>(I)->hasAllZeroIndices())) {
2232 PointerUsesQueue.push_back(I);
2233 continue;
2234 }
2235
2236 // If we hit a load/store with an invariant.group metadata and the same
2237 // pointer operand, we can assume that value pointed to by the pointer
2238 // operand didn't change.
2239 if (I->hasMetadata(LLVMContext::MD_invariant_group) &&
2240 Ptr == getLoadStorePointerOperand(I) && !I->isVolatile())
2241 MostDominatingInstruction = I;
2242 }
2243 }
2244
2245 return MostDominatingInstruction != L ? MostDominatingInstruction : nullptr;
2246}
2247
2248/// Return the memory location accessed by the (masked) load/store instruction
2249/// `I`, if the instruction could potentially provide a useful value for
2250/// eliminating the load.
2251static std::optional<MemoryLocation>
2253 const TargetLibraryInfo *TLI) {
2254 if (auto *LI = dyn_cast<LoadInst>(I))
2255 return MemoryLocation::get(LI);
2256
2257 if (auto *II = dyn_cast<IntrinsicInst>(I)) {
2258 switch (II->getIntrinsicID()) {
2259 case Intrinsic::masked_load:
2260 return MemoryLocation::getForArgument(II, 0, TLI);
2261 case Intrinsic::masked_store:
2262 if (AllowStores)
2263 return MemoryLocation::getForArgument(II, 1, TLI);
2264 return std::nullopt;
2265 default:
2266 break;
2267 }
2268 }
2269
2270 if (!AllowStores)
2271 return std::nullopt;
2272
2273 if (auto *SI = dyn_cast<StoreInst>(I))
2274 return MemoryLocation::get(SI);
2275 return std::nullopt;
2276}
2277
2278/// Scan the users of each MemoryAccess in `ClobbersList` that belong to `BB`,
2279/// looking for memory reads whose location aliases `Loc` and dominates our
2280/// load.
2281std::optional<GVNPass::ReachingMemVal> GVNPass::scanMemoryAccessesUsers(
2282 const MemoryLocation &Loc, bool IsInvariantLoad, BasicBlock *BB,
2283 const SmallVectorImpl<MemoryAccess *> &ClobbersList, MemorySSA &MSSA,
2284 BatchAAResults &AA, LoadInst *L) {
2285
2286 // Prefer a candidate that is closer to the load within the same block.
2287 auto UpdateChoice = [&](std::optional<ReachingMemVal> &Choice,
2288 AliasResult &AR, Instruction *Candidate) {
2289 if (!Choice) {
2290 if (AR == AliasResult::PartialAlias)
2291 Choice = ReachingMemVal::getClobber(Loc.Ptr, Candidate, AR.getOffset());
2292 else
2293 Choice = ReachingMemVal::getDef(Loc.Ptr, Candidate);
2294 return;
2295 }
2296 if (!MSSA.locallyDominates(MSSA.getMemoryAccess(Choice->Inst),
2297 MSSA.getMemoryAccess(Candidate)))
2298 return;
2299
2300 if (AR == AliasResult::PartialAlias) {
2301 Choice->Kind = DepKind::Clobber;
2302 Choice->Offset = AR.getOffset();
2303 } else {
2304 Choice->Kind = DepKind::Def;
2305 Choice->Offset = -1;
2306 }
2307
2308 Choice->Inst = Candidate;
2309 Choice->Block = Candidate->getParent();
2310 };
2311
2312 std::optional<ReachingMemVal> ReachingVal;
2313 for (MemoryAccess *MA : ClobbersList) {
2314 unsigned Scanned = 0;
2315 for (User *U : MA->users()) {
2316 if (++Scanned >= ScanUsersLimit)
2317 return ReachingMemVal::getUnknown(BB, Loc.Ptr);
2318
2319 auto *UseOrDef = dyn_cast<MemoryUseOrDef>(U);
2320 if (!UseOrDef || UseOrDef->getBlock() != BB)
2321 continue;
2322
2323 Instruction *MemI = UseOrDef->getMemoryInst();
2324 if (MemI == L ||
2325 (L && !MSSA.locallyDominates(UseOrDef, MSSA.getMemoryAccess(L))))
2326 continue;
2327
2328 if (auto MaybeLoc = maybeLoadStoreLocation(MemI, IsInvariantLoad, TLI)) {
2329 AliasResult AR = AA.alias(*MaybeLoc, Loc);
2330 // If the locations do not certainly alias, we cannot possibly infer the
2331 // following load loads the same value.
2333 continue;
2334
2335 // Locations partially overlap, but neither is a subset of the other, or
2336 // the second location is before the first.
2337 if (AR == AliasResult::PartialAlias &&
2338 (!AR.hasOffset() || AR.getOffset() < 0))
2339 continue;
2340
2341 // Found candidate, the new load memory location and the given location
2342 // must alias: precise overlap, or subset with non-negative offset.
2343 UpdateChoice(ReachingVal, AR, MemI);
2344 }
2345 }
2346 if (ReachingVal)
2347 break;
2348 }
2349
2350 return ReachingVal;
2351}
2352
2353/// Check if a given MemoryAccess (usually a MemoryDef) actually modifies a
2354/// given location. Returns a ReachingMemVal describing the dependency.
2355std::optional<GVNPass::ReachingMemVal> GVNPass::accessMayModifyLocation(
2356 MemoryAccess *ClobberMA, const MemoryLocation &Loc, bool IsInvariantLoad,
2357 BasicBlock *BB, MemorySSA &MSSA, BatchAAResults &AA) {
2358 assert(ClobberMA->getBlock() == BB);
2359
2360 // If the clobbering access is the entry memory state, we cannot say anything
2361 // about the content of the memory, except when we are accessing a local
2362 // object, which can be turned later into producing `undef`.
2363 if (MSSA.isLiveOnEntryDef(ClobberMA)) {
2365 if (Alloc->getParent() == BB)
2366 return ReachingMemVal::getDef(Loc.Ptr, const_cast<AllocaInst *>(Alloc));
2367 return ReachingMemVal::getUnknown(BB, Loc.Ptr);
2368 }
2369
2370 // Loads from "constant" memory can't be clobbered.
2371 if (IsInvariantLoad || AA.pointsToConstantMemory(Loc))
2372 return std::nullopt;
2373
2374 auto GetOrdering = [](const Instruction *I) {
2375 if (auto *L = dyn_cast<LoadInst>(I))
2376 return L->getOrdering();
2377 return cast<StoreInst>(I)->getOrdering();
2378 };
2379 Instruction *ClobberI = cast<MemoryDef>(ClobberMA)->getMemoryInst();
2380
2381 // Check if the clobbering access is a load or a store that we can reuse.
2382 if (auto MaybeLoc = maybeLoadStoreLocation(ClobberI, true, TLI)) {
2383 AliasResult AR = AA.alias(*MaybeLoc, Loc);
2384 if (AR == AliasResult::MustAlias)
2385 return ReachingMemVal::getDef(Loc.Ptr, ClobberI);
2386
2387 if (AR == AliasResult::NoAlias) {
2388 // If the locations do not alias we may still be able to skip over the
2389 // clobbering instruction, even if it is atomic.
2390 // The original load is either non-atomic or unordered. We can reorder
2391 // these across non-atomic, unordered or monotonic loads or across any
2392 // store.
2393 if (!ClobberI->isAtomic() ||
2394 !isStrongerThan(GetOrdering(ClobberI), AtomicOrdering::Monotonic) ||
2395 isa<StoreInst>(ClobberI))
2396 return std::nullopt;
2397 return ReachingMemVal::getClobber(Loc.Ptr, ClobberI);
2398 }
2399
2400 // Skip over volatile loads (the original load is non-volatile, non-atomic).
2401 if (!ClobberI->isAtomic() && isa<LoadInst>(ClobberI))
2402 return std::nullopt;
2403
2404 if (AR == AliasResult::MayAlias ||
2406 (!AR.hasOffset() || AR.getOffset() < 0)))
2407 return ReachingMemVal::getClobber(Loc.Ptr, ClobberI);
2408
2409 // The only option left is a store of the superset of the required bits.
2411 AR.getOffset() > 0 &&
2412 "Must be the superset/partial overlap case with positive offset");
2413 return ReachingMemVal::getClobber(Loc.Ptr, ClobberI, AR.getOffset());
2414 }
2415
2416 if (auto *II = dyn_cast<IntrinsicInst>(ClobberI)) {
2418 return std::nullopt;
2419 if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
2420 MemoryLocation IIObjLoc = MemoryLocation::getForArgument(II, 0, TLI);
2421 if (AA.isMustAlias(IIObjLoc, Loc))
2422 return ReachingMemVal::getDef(Loc.Ptr, ClobberI);
2423 return std::nullopt;
2424 }
2425 }
2426
2427 // If we are at a malloc-like function call, we can turn the load into `undef`
2428 // or zero.
2429 if (isNoAliasCall(ClobberI)) {
2430 const Value *Obj = getUnderlyingObject(Loc.Ptr);
2431 if (Obj == ClobberI || AA.isMustAlias(ClobberI, Loc.Ptr))
2432 return ReachingMemVal::getDef(Loc.Ptr, ClobberI);
2433 }
2434
2435 // Can reorder loads across a release fence.
2436 if (auto *FI = dyn_cast<FenceInst>(ClobberI))
2437 if (FI->getOrdering() == AtomicOrdering::Release)
2438 return std::nullopt;
2439
2440 // See if the clobber instruction (e.g., a generic call) may modify the
2441 // location.
2442 ModRefInfo MR = AA.getModRefInfo(ClobberI, Loc);
2443 // If may modify the location, analyze deeper, to exclude accesses to
2444 // non-escaping local allocations.
2445 if (MR == ModRefInfo::NoModRef || MR == ModRefInfo::Ref)
2446 return std::nullopt;
2447
2448 // Conservatively assume the clobbering memory access may overwrite the
2449 // location.
2450 return ReachingMemVal::getClobber(Loc.Ptr, ClobberI);
2451}
2452
2453/// Collect the predecessors of block, while doing phi-translation of the memory
2454/// address and the memory clobber. Return false if the block should be marked
2455/// as clobbering the memory location in an unknown way.
2456bool GVNPass::collectPredecessors(BasicBlock *BB, const PHITransAddr &Addr,
2457 MemoryAccess *ClobberMA,
2458 DependencyBlockSet &Blocks,
2459 SmallVectorImpl<BasicBlock *> &Worklist) {
2460 if (Addr.needsPHITranslationFromBlock(BB) &&
2462 return false;
2463
2464 auto *MPhi =
2465 ClobberMA->getBlock() == BB ? dyn_cast<MemoryPhi>(ClobberMA) : nullptr;
2467 for (BasicBlock *Pred : predecessors(BB)) {
2468 // Skip unreachable predecessors.
2469 if (!DT->isReachableFromEntry(Pred))
2470 continue;
2471
2472 // Skip already visited predecessors.
2473 if (llvm::any_of(Preds, [Pred](const auto &P) { return P.first == Pred; }))
2474 continue;
2475
2476 PHITransAddr TransAddr = Addr;
2477 if (TransAddr.needsPHITranslationFromBlock(BB))
2478 TransAddr.translateValue(BB, Pred, DT, false);
2479
2480 auto It = Blocks.find(Pred);
2481 if (It != Blocks.end()) {
2482 // If we reach a visited block with a different address, set the
2483 // current block as clobbering the memory location in an unknown way
2484 // (by returning false).
2485 if (It->second.Addr.getAddr() != TransAddr.getAddr())
2486 return false;
2487 // Otherwise, just stop the traversal.
2488 continue;
2489 }
2490
2491 Preds.emplace_back(
2492 Pred, DependencyBlockInfo(TransAddr,
2493 MPhi ? MPhi->getIncomingValueForBlock(Pred)
2494 : ClobberMA));
2495 }
2496
2497 // We collected the predecessors and stored them in Preds. Now, populate the
2498 // worklist with the predecessors found, and cache the eventual translated
2499 // address for each block.
2500 for (auto &P : Preds) {
2501 [[maybe_unused]] auto It =
2502 Blocks.try_emplace(P.first, std::move(P.second)).first;
2503 Worklist.push_back(P.first);
2504 }
2505
2506 return true;
2507}
2508
2509/// Build a list of MemoryAccesses whose users could potentially alias the
2510/// memory location being queried. Starts from StartInfo's initial clobber,
2511/// walk the use-def chain to the final clobber. If the chain extends beyond
2512/// `BB`, continue into that block but only if it is in the previously collected
2513/// set.
2514void GVNPass::collectClobberList(SmallVectorImpl<MemoryAccess *> &Clobbers,
2515 BasicBlock *BB,
2516 const DependencyBlockInfo &StartInfo,
2517 const DependencyBlockSet &Blocks,
2518 MemorySSA &MSSA) {
2519 MemoryAccess *MA = StartInfo.InitialClobberMA;
2520 MemoryAccess *LastMA = StartInfo.ClobberMA;
2521
2522 for (;;) {
2523 while (MA != LastMA) {
2524 Clobbers.push_back(MA);
2525 MA = cast<MemoryUseOrDef>(MA)->getDefiningAccess();
2526 }
2527 Clobbers.push_back(MA);
2528
2529 if (MSSA.isLiveOnEntryDef(MA) ||
2530 (MA->getBlock() == BB && !isa<MemoryPhi>(MA)))
2531 break;
2532
2533 // If the final clobber in the current block is a MemoryPhi, go to the
2534 // immediate dominator; otherwise, just get to the block containing the
2535 // final clobber.
2536 if (MA->getBlock() == BB)
2537 BB = DT->getNode(BB)->getIDom()->getBlock();
2538 else
2539 BB = MA->getBlock();
2540
2541 auto It = Blocks.find(BB);
2542 if (It == Blocks.end())
2543 break;
2544
2545 MA = It->second.InitialClobberMA;
2546 LastMA = It->second.ClobberMA;
2547 if (MA == Clobbers.back())
2548 Clobbers.pop_back();
2549 }
2550}
2551
2552/// Entrypoint for the MemorySSA-based redundant load elimination algorithm.
2553/// Given as input a load instruction, the function computes the set of reaching
2554/// memory values, one per predecessor path, that AnalyzeLoadAvailability can
2555/// later use to establish whether the load may be eliminated. A reaching value
2556/// may be of the following descriptor kind:
2557/// * Def: a precise instruction that produces the exact bits the load would
2558/// read (e.g., an equivalent load or a MustAlias store);
2559/// * Clobber: a write that clobbers a superset of the bits the load would read
2560/// (e.g., a memset over a larger region);
2561/// * Other: we know which block defines the memory location in some way, but
2562/// could not identify a precise instruction (e.g., memory already live at
2563/// function entry).
2564bool GVNPass::findReachingValuesForLoad(LoadInst *L,
2565 SmallVectorImpl<ReachingMemVal> &Values,
2566 MemorySSA &MSSA, AAResults &AAR) {
2567 EarliestEscapeAnalysis EA(*DT, LI);
2568 BatchAAResults AA(AAR, &EA);
2569 BasicBlock *StartBlock = L->getParent();
2570 bool IsInvariantLoad = L->hasMetadata(LLVMContext::MD_invariant_load);
2571 // TODO: Simplify later work by just getClobberingMemoryAccess().
2572 MemoryAccess *ClobberMA = MSSA.getMemoryAccess(L)->getDefiningAccess();
2573 const MemoryLocation Loc = MemoryLocation::get(L);
2574
2575 // Fast path for load tagged with !invariant.group.
2576 if (L->hasMetadata(LLVMContext::MD_invariant_group)) {
2577 if (Instruction *G = findInvariantGroupValue(L, *DT)) {
2578 Values.emplace_back(
2579 ReachingMemVal::getDef(getLoadStorePointerOperand(G), G));
2580 return true;
2581 }
2582 }
2583
2584 // Phase 1. First off, look for a local dependency to avoid having to
2585 // disambiguate between before the load and after the load of the starting
2586 // block (as the load may be visited from a backedge).
2587 do {
2588 // Scan users of the clobbering memory access.
2589 if (auto RMV = scanMemoryAccessesUsers(
2590 Loc, IsInvariantLoad, StartBlock,
2591 SmallVector<MemoryAccess *, 1>{ClobberMA}, MSSA, AA, L)) {
2592 Values.emplace_back(*RMV);
2593 return true;
2594 }
2595
2596 // Exit from here, and proceed visiting predecessors if the clobbering
2597 // access is non-local or is a MemoryPhi.
2598 if (ClobberMA->getBlock() != StartBlock || isa<MemoryPhi>(ClobberMA))
2599 break;
2600
2601 // Check if the clobber actually aliases the load location.
2602 if (auto RMV = accessMayModifyLocation(ClobberMA, Loc, IsInvariantLoad,
2603 StartBlock, MSSA, AA)) {
2604 Values.emplace_back(*RMV);
2605 return true;
2606 }
2607
2608 // It may happen that the clobbering memory access does not actually
2609 // clobber our load location, transition to its defining memory access.
2610 ClobberMA = cast<MemoryUseOrDef>(ClobberMA)->getDefiningAccess();
2611 } while (ClobberMA->getBlock() == StartBlock);
2612
2613 // Non-local speculations are not allowed under ASan.
2614 if (L->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) ||
2615 L->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress))
2616 return false;
2617
2618 // Phase 2. Walk backwards through the CFG, collecting all the blocks that
2619 // contain an instruction that modifies the load memory location, or that lie
2620 // on a path between a clobbering block and our load. Start off by collecting
2621 // the predecessors of `StartBlock`. All the visited blocks are stored in a
2622 // the set `Blocks`. If possible, the memory address maintained for the block
2623 // visited does get phi-translated.
2624 DependencyBlockSet Blocks;
2625 SmallVector<BasicBlock *, 16> InitialWorklist;
2626 const DataLayout &DL = L->getModule()->getDataLayout();
2627 if (!collectPredecessors(StartBlock,
2628 PHITransAddr(L->getPointerOperand(), DL, AC),
2629 ClobberMA, Blocks, InitialWorklist))
2630 return false;
2631
2632 // Do a bottom-up DFS.
2633 auto Worklist = InitialWorklist;
2634 while (!Worklist.empty()) {
2635 auto *BB = Worklist.pop_back_val();
2636 DependencyBlockInfo &Info = Blocks.find(BB)->second;
2637
2638 // Phi-translation may have failed.
2639 if (!Info.Addr.getAddr())
2640 continue;
2641
2642 // If the clobbering memory access is in the current block and it indeed
2643 // clobbers our load location, record the dependency and do not visit the
2644 // predecessors of this block further, continue with the blocks in the
2645 // worklist.
2646 if (Info.ClobberMA->getBlock() == BB && !isa<MemoryPhi>(Info.ClobberMA)) {
2647 if (auto RMV = accessMayModifyLocation(
2648 Info.ClobberMA, Loc.getWithNewPtr(Info.Addr.getAddr()),
2649 IsInvariantLoad, BB, MSSA, AA)) {
2650 Info.MemVal = RMV;
2651 continue;
2652 }
2653 assert(!MSSA.isLiveOnEntryDef(Info.ClobberMA) &&
2654 "LiveOnEntry aliases everything");
2655
2656 // If, however, the clobbering memory access does not actually clobber
2657 // our load location, transition to its defining memory access, but
2658 // keep examining the same basic block.
2659 Info.ClobberMA =
2660 cast<MemoryUseOrDef>(Info.ClobberMA)->getDefiningAccess();
2661 Worklist.emplace_back(BB);
2662 continue;
2663 }
2664
2665 // At this point we know the current block is "transparent", i.e. the memory
2666 // location is not modified when execution goes through this block.
2667 // Continue to its predecessors, unless a predecessor has already been
2668 // visited with a different address. We currently cannot represent such a
2669 // dependency.
2670 if (BB == StartBlock && Info.Addr.getAddr() != L->getPointerOperand()) {
2671 Info.ForceUnknown = true;
2672 continue;
2673 }
2674 if (BB != StartBlock &&
2675 !collectPredecessors(BB, Info.Addr, Info.ClobberMA, Blocks, Worklist))
2676 Info.ForceUnknown = true;
2677 }
2678
2679 // Phase 3. We have collected all the blocks that either write a value to the
2680 // memory location of the load, or there exists a path to the load, along
2681 // which the memory location is not modified. Perform a second DFS to find
2682 // load-to-load dependencies; namely, look at the dominating memory reads,
2683 // that alias our load. These are the MemoryUses that are users of the
2684 // MemoryDefs we previously identified. If no memory read is encountered,
2685 // either confirm the clobbering write found before or set to unknown.
2686 Worklist = InitialWorklist;
2687 for (BasicBlock *BB : Worklist) {
2688 DependencyBlockInfo &Info = Blocks.find(BB)->second;
2689 Info.Visited = true;
2690 }
2691
2693 while (!Worklist.empty()) {
2694 auto *BB = Worklist.pop_back_val();
2695 DependencyBlockInfo &Info = Blocks.find(BB)->second;
2696
2697 // If phi-translation failed, assume the memory location is modified in
2698 // unknown way.
2699 if (!Info.Addr.getAddr()) {
2700 Values.push_back(ReachingMemVal::getUnknown(BB, nullptr));
2701 continue;
2702 }
2703
2704 Clobbers.clear();
2705 collectClobberList(Clobbers, BB, Info, Blocks, MSSA);
2706 if (auto RMV =
2707 scanMemoryAccessesUsers(Loc.getWithNewPtr(Info.Addr.getAddr()),
2708 IsInvariantLoad, BB, Clobbers, MSSA, AA)) {
2709 Values.push_back(*RMV);
2710 continue;
2711 }
2712
2713 // If no reusable memory use was found, and the current block is not
2714 // transparent, use the already established memory def.
2715 if (Info.MemVal) {
2716 Values.push_back(*Info.MemVal);
2717 continue;
2718 }
2719
2720 if (Info.ForceUnknown) {
2721 Values.push_back(ReachingMemVal::getUnknown(BB, Info.Addr.getAddr()));
2722 continue;
2723 }
2724
2725 // If the current block is transparent, continue to its predecessors.
2726 for (BasicBlock *Pred : predecessors(BB)) {
2727 auto It = Blocks.find(Pred);
2728 if (It == Blocks.end())
2729 continue;
2730 DependencyBlockInfo &PredInfo = It->second;
2731 if (PredInfo.Visited)
2732 continue;
2733 PredInfo.Visited = true;
2734 Worklist.push_back(Pred);
2735 }
2736 }
2737
2738 return true;
2739}
2740
2741/// Attempt to eliminate a load, first by eliminating it
2742/// locally, and then attempting non-local elimination if that fails.
2743bool GVNPass::processLoad(LoadInst *L) {
2744 if (!MD && !isMemorySSAEnabled())
2745 return false;
2746
2747 // This code hasn't been audited for ordered or volatile memory access.
2748 if (!L->isUnordered())
2749 return false;
2750
2751 if (L->getType()->isTokenLikeTy())
2752 return false;
2753
2754 if (L->use_empty()) {
2756 return true;
2757 }
2758
2759 ReachingMemVal MemVal = ReachingMemVal::getUnknown(nullptr, nullptr);
2760 if (!isMemorySSAEnabled()) {
2761 // ... to a pointer that has been loaded from before...
2762 MemDepResult Dep = MD->getDependency(L);
2763
2764 // If it is defined in another block, try harder.
2765 if (Dep.isNonLocal())
2766 return processNonLocalLoad(L);
2767
2768 // Only handle the local case below.
2769 if (Dep.isDef())
2770 MemVal = ReachingMemVal::getDef(L->getPointerOperand(), Dep.getInst());
2771 else if (Dep.isClobber())
2772 MemVal =
2773 ReachingMemVal::getClobber(L->getPointerOperand(), Dep.getInst());
2774 } else {
2776 if (!findReachingValuesForLoad(L, MemVals, *MSSAU->getMemorySSA(), *AA))
2777 return false; // Too many dependencies.
2778 assert(MemVals.size() && "Expected at least an unknown value");
2779 if (MemVals.size() > 1 || MemVals[0].Block != L->getParent())
2780 return processNonLocalLoad(L, MemVals);
2781
2782 MemVal = MemVals[0];
2783 }
2784
2785 if (MemVal.Kind == DepKind::Other) {
2786 // This might be a NonFuncLocal or an Unknown.
2787 LLVM_DEBUG(
2788 // fast print dep, using operator<< on instruction is too slow.
2789 dbgs() << "GVN: load "; L->printAsOperand(dbgs());
2790 dbgs() << " has unknown dependence\n";);
2791 return false;
2792 }
2793
2794 auto AV = AnalyzeLoadAvailability(L, MemVal, L->getPointerOperand());
2795 if (!AV)
2796 return false;
2797
2798 Value *AvailableValue = AV->MaterializeAdjustedValue(L, L);
2799
2800 // MaterializeAdjustedValue is responsible for combining metadata.
2801 ICF->removeUsersOf(L);
2802 L->replaceAllUsesWith(AvailableValue);
2803 if (MSSAU)
2804 MSSAU->removeMemoryAccess(L);
2805 ++NumGVNLoad;
2806 reportLoadElim(L, AvailableValue, ORE);
2808 // Tell MDA to reexamine the reused pointer since we might have more
2809 // information after forwarding it.
2810 if (MD && AvailableValue->getType()->isPtrOrPtrVectorTy())
2811 MD->invalidateCachedPointerInfo(AvailableValue);
2812 return true;
2813}
2814
2815// Attempt to process masked loads which have loaded from
2816// masked stores with the same mask
2817bool GVNPass::processMaskedLoad(IntrinsicInst *I) {
2818 if (!MD)
2819 return false;
2820 MemDepResult Dep = MD->getDependency(I);
2821 Instruction *DepInst = Dep.getInst();
2822 if (!DepInst || !Dep.isLocal() || !Dep.isDef())
2823 return false;
2824
2825 Value *Mask = I->getOperand(1);
2826 Value *Passthrough = I->getOperand(2);
2827 Value *StoreVal;
2828 if (!match(DepInst,
2829 m_MaskedStore(m_Value(StoreVal), m_Value(), m_Specific(Mask))) ||
2830 StoreVal->getType() != I->getType())
2831 return false;
2832
2833 // Remove the load but generate a select for the passthrough
2834 Value *OpToForward = llvm::SelectInst::Create(Mask, StoreVal, Passthrough, "",
2835 I->getIterator());
2836
2837 ICF->removeUsersOf(I);
2838 I->replaceAllUsesWith(OpToForward);
2840 ++NumGVNLoad;
2841 return true;
2842}
2843
2844/// Return a pair the first field showing the value number of \p Exp and the
2845/// second field showing whether it is a value number newly created.
2846std::pair<uint32_t, bool>
2847GVNPass::ValueTable::assignExpNewValueNum(Expression &Exp) {
2848 uint32_t &E = ExpressionNumbering[Exp];
2849 bool CreateNewValNum = !E;
2850 if (CreateNewValNum) {
2851 Expressions.push_back(Exp);
2852 if (ExprIdx.size() < NextValueNumber + 1)
2853 ExprIdx.resize(NextValueNumber * 2);
2854 E = NextValueNumber;
2855 ExprIdx[NextValueNumber++] = NextExprNumber++;
2856 }
2857 return {E, CreateNewValNum};
2858}
2859
2860/// Return whether all the values related with the same \p num are
2861/// defined in \p BB.
2862bool GVNPass::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB,
2863 GVNPass &GVN) {
2864 return all_of(
2865 GVN.LeaderTable.getLeaders(Num),
2866 [=](const LeaderMap::LeaderTableEntry &L) { return L.BB == BB; });
2867}
2868
2869/// Wrap phiTranslateImpl to provide caching functionality.
2870uint32_t GVNPass::ValueTable::phiTranslate(const BasicBlock *Pred,
2871 const BasicBlock *PhiBlock,
2872 uint32_t Num, GVNPass &GVN) {
2873 auto FindRes = PhiTranslateTable.find({Num, Pred});
2874 if (FindRes != PhiTranslateTable.end())
2875 return FindRes->second;
2876 uint32_t NewNum = phiTranslateImpl(Pred, PhiBlock, Num, GVN);
2877 PhiTranslateTable.insert({{Num, Pred}, NewNum});
2878 return NewNum;
2879}
2880
2881// Return true if the value number \p Num and NewNum have equal value.
2882// Return false if the result is unknown.
2883bool GVNPass::ValueTable::areCallValsEqual(uint32_t Num, uint32_t NewNum,
2884 const BasicBlock *Pred,
2885 const BasicBlock *PhiBlock,
2886 GVNPass &GVN) {
2887 CallInst *Call = nullptr;
2888 auto Leaders = GVN.LeaderTable.getLeaders(Num);
2889 for (const auto &Entry : Leaders) {
2890 Call = dyn_cast<CallInst>(&*Entry.Val);
2891 if (Call && Call->getParent() == PhiBlock)
2892 break;
2893 }
2894
2895 if (AA->doesNotAccessMemory(Call))
2896 return true;
2897
2898 if (!MD || !AA->onlyReadsMemory(Call))
2899 return false;
2900
2901 MemDepResult LocalDep = MD->getDependency(Call);
2902 if (!LocalDep.isNonLocal())
2903 return false;
2904
2907
2908 // Check to see if the Call has no function local clobber.
2909 for (const NonLocalDepEntry &D : Deps) {
2910 if (D.getResult().isNonFuncLocal())
2911 return true;
2912 }
2913 return false;
2914}
2915
2916/// Translate value number \p Num using phis, so that it has the values of
2917/// the phis in BB.
2918uint32_t GVNPass::ValueTable::phiTranslateImpl(const BasicBlock *Pred,
2919 const BasicBlock *PhiBlock,
2920 uint32_t Num, GVNPass &GVN) {
2921 // See if we can refine the value number by looking at the PN incoming value
2922 // for the given predecessor.
2923 if (PHINode *PN = NumberingPhi[Num]) {
2924 if (PN->getParent() != PhiBlock)
2925 return Num;
2926 for (unsigned I = 0; I != PN->getNumIncomingValues(); ++I) {
2927 if (PN->getIncomingBlock(I) != Pred)
2928 continue;
2929 if (uint32_t TransVal = lookup(PN->getIncomingValue(I), false))
2930 return TransVal;
2931 }
2932 return Num;
2933 }
2934
2935 if (BasicBlock *BB = NumberingBB[Num]) {
2936 assert(MSSA && "NumberingBB is non-empty only when using MemorySSA");
2937 // Value numbers of basic blocks are used to represent memory state in
2938 // load/store instructions and read-only function calls when said state is
2939 // set by a MemoryPhi.
2940 if (BB != PhiBlock)
2941 return Num;
2942 MemoryPhi *MPhi = MSSA->getMemoryAccess(BB);
2943 for (unsigned i = 0, N = MPhi->getNumIncomingValues(); i != N; ++i) {
2944 if (MPhi->getIncomingBlock(i) != Pred)
2945 continue;
2946 MemoryAccess *MA = MPhi->getIncomingValue(i);
2947 if (auto *PredPhi = dyn_cast<MemoryPhi>(MA))
2948 return lookupOrAdd(PredPhi->getBlock());
2949 if (MSSA->isLiveOnEntryDef(MA))
2950 return lookupOrAdd(&BB->getParent()->getEntryBlock());
2951 return lookupOrAdd(cast<MemoryUseOrDef>(MA)->getMemoryInst());
2952 }
2954 "CFG/MemorySSA mismatch: predecessor not found among incoming blocks");
2955 }
2956
2957 // If there is any value related with Num is defined in a BB other than
2958 // PhiBlock, it cannot depend on a phi in PhiBlock without going through
2959 // a backedge. We can do an early exit in that case to save compile time.
2960 if (!areAllValsInBB(Num, PhiBlock, GVN))
2961 return Num;
2962
2963 if (Num >= ExprIdx.size() || ExprIdx[Num] == 0)
2964 return Num;
2965 Expression Exp = Expressions[ExprIdx[Num]];
2966
2967 for (unsigned I = 0; I < Exp.VarArgs.size(); I++) {
2968 // For InsertValue and ExtractValue, some varargs are index numbers
2969 // instead of value numbers. Those index numbers should not be
2970 // translated.
2971 if ((I > 1 && Exp.Opcode == Instruction::InsertValue) ||
2972 (I > 0 && Exp.Opcode == Instruction::ExtractValue) ||
2973 (I > 1 && Exp.Opcode == Instruction::ShuffleVector))
2974 continue;
2975 Exp.VarArgs[I] = phiTranslate(Pred, PhiBlock, Exp.VarArgs[I], GVN);
2976 }
2977
2978 if (Exp.Commutative) {
2979 assert(Exp.VarArgs.size() >= 2 && "Unsupported commutative instruction!");
2980 if (Exp.VarArgs[0] > Exp.VarArgs[1]) {
2981 std::swap(Exp.VarArgs[0], Exp.VarArgs[1]);
2982 uint32_t Opcode = Exp.Opcode >> 8;
2983 if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp)
2984 Exp.Opcode = (Opcode << 8) |
2986 static_cast<CmpInst::Predicate>(Exp.Opcode & 255));
2987 }
2988 }
2989
2990 if (uint32_t NewNum = ExpressionNumbering[Exp]) {
2991 if (Exp.Opcode == Instruction::Call && NewNum != Num)
2992 return areCallValsEqual(Num, NewNum, Pred, PhiBlock, GVN) ? NewNum : Num;
2993 return NewNum;
2994 }
2995 return Num;
2996}
2997
2998/// Erase stale entry from phiTranslate cache so phiTranslate can be computed
2999/// again.
3000void GVNPass::ValueTable::eraseTranslateCacheEntry(
3001 uint32_t Num, const BasicBlock &CurrBlock) {
3002 for (const BasicBlock *Pred : predecessors(&CurrBlock))
3003 PhiTranslateTable.erase({Num, Pred});
3004}
3005
3006// In order to find a leader for a given value number at a
3007// specific basic block, we first obtain the list of all Values for that number,
3008// and then scan the list to find one whose block dominates the block in
3009// question. This is fast because dominator tree queries consist of only
3010// a few comparisons of DFS numbers.
3011Value *GVNPass::findLeader(const BasicBlock *BB, uint32_t Num) {
3012 auto Leaders = LeaderTable.getLeaders(Num);
3013 if (Leaders.empty())
3014 return nullptr;
3015
3016 Value *Val = nullptr;
3017 for (const auto &Entry : Leaders) {
3018 if (DT->dominates(Entry.BB, BB)) {
3019 Val = Entry.Val;
3020 if (isa<Constant>(Val))
3021 return Val;
3022 }
3023 }
3024
3025 return Val;
3026}
3027
3028/// There is an edge from 'Src' to 'Dst'. Return
3029/// true if every path from the entry block to 'Dst' passes via this edge. In
3030/// particular 'Dst' must not be reachable via another edge from 'Src'.
3032 DominatorTree *DT) {
3033 // While in theory it is interesting to consider the case in which Dst has
3034 // more than one predecessor, because Dst might be part of a loop which is
3035 // only reachable from Src, in practice it is pointless since at the time
3036 // GVN runs all such loops have preheaders, which means that Dst will have
3037 // been changed to have only one predecessor, namely Src.
3038 const BasicBlock *Pred = E.getEnd()->getSinglePredecessor();
3039 assert((!Pred || Pred == E.getStart()) &&
3040 "No edge between these basic blocks!");
3041 return Pred != nullptr;
3042}
3043
3044void GVNPass::assignBlockRPONumber(Function &F) {
3045 BlockRPONumber.clear();
3046 uint32_t NextBlockNumber = 1;
3047 ReversePostOrderTraversal<Function *> RPOT(&F);
3048 for (BasicBlock *BB : RPOT)
3049 BlockRPONumber[BB] = NextBlockNumber++;
3050 InvalidBlockRPONumbers = false;
3051}
3052
3053/// The given values are known to be equal in every use
3054/// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with
3055/// 'RHS' everywhere in the scope. Returns whether a change was made.
3056/// The Root may either be a basic block edge (for conditions) or an
3057/// instruction (for assumes).
3058bool GVNPass::propagateEquality(
3059 Value *LHS, Value *RHS,
3060 const std::variant<BasicBlockEdge, Instruction *> &Root) {
3062 Worklist.push_back(std::make_pair(LHS, RHS));
3063 bool Changed = false;
3064 SmallVector<const BasicBlock *> DominatedBlocks;
3065 if (const BasicBlockEdge *Edge = std::get_if<BasicBlockEdge>(&Root)) {
3066 // For speed, compute a conservative fast approximation to
3067 // DT->dominates(Root, Root.getEnd());
3069 DominatedBlocks.push_back(Edge->getEnd());
3070 } else {
3071 Instruction *I = std::get<Instruction *>(Root);
3072 for (const auto *Node : DT->getNode(I->getParent())->children())
3073 DominatedBlocks.push_back(Node->getBlock());
3074 }
3075
3076 while (!Worklist.empty()) {
3077 std::pair<Value*, Value*> Item = Worklist.pop_back_val();
3078 LHS = Item.first; RHS = Item.second;
3079
3080 if (LHS == RHS)
3081 continue;
3082 assert(LHS->getType() == RHS->getType() && "Equality but unequal types!");
3083
3084 // Don't try to propagate equalities between constants.
3086 continue;
3087
3088 // Prefer a constant on the right-hand side, or an Argument if no constants.
3090 std::swap(LHS, RHS);
3091 assert((isa<Argument>(LHS) || isa<Instruction>(LHS)) && "Unexpected value!");
3092 const DataLayout &DL =
3094 ? cast<Argument>(LHS)->getParent()->getDataLayout()
3095 : cast<Instruction>(LHS)->getDataLayout();
3096
3097 // If there is no obvious reason to prefer the left-hand side over the
3098 // right-hand side, ensure the longest lived term is on the right-hand side,
3099 // so the shortest lived term will be replaced by the longest lived.
3100 // This tends to expose more simplifications.
3101 uint32_t LVN = VN.lookupOrAdd(LHS);
3102 if ((isa<Argument>(LHS) && isa<Argument>(RHS)) ||
3104 // Move the 'oldest' value to the right-hand side, using the value number
3105 // as a proxy for age.
3106 uint32_t RVN = VN.lookupOrAdd(RHS);
3107 if (LVN < RVN) {
3108 std::swap(LHS, RHS);
3109 LVN = RVN;
3110 }
3111 }
3112
3113 // If value numbering later sees that an instruction in the scope is equal
3114 // to 'LHS' then ensure it will be turned into 'RHS'. In order to preserve
3115 // the invariant that instructions only occur in the leader table for their
3116 // own value number (this is used by removeFromLeaderTable), do not do this
3117 // if RHS is an instruction (if an instruction in the scope is morphed into
3118 // LHS then it will be turned into RHS by the next GVN iteration anyway, so
3119 // using the leader table is about compiling faster, not optimizing better).
3120 // The leader table only tracks basic blocks, not edges. Only add to if we
3121 // have the simple case where the edge dominates the end.
3123 for (const BasicBlock *BB : DominatedBlocks)
3124 LeaderTable.insert(LVN, RHS, BB);
3125
3126 // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. As
3127 // LHS always has at least one use that is not dominated by Root, this will
3128 // never do anything if LHS has only one use.
3129 if (!LHS->hasOneUse()) {
3130 // Create a callback that captures the DL.
3131 auto CanReplacePointersCallBack = [&DL](const Use &U, const Value *To) {
3132 return canReplacePointersInUseIfEqual(U, To, DL);
3133 };
3134 unsigned NumReplacements;
3135 if (const BasicBlockEdge *Edge = std::get_if<BasicBlockEdge>(&Root))
3136 NumReplacements = replaceDominatedUsesWithIf(
3137 LHS, RHS, *DT, *Edge, CanReplacePointersCallBack);
3138 else
3139 NumReplacements = replaceDominatedUsesWithIf(
3140 LHS, RHS, *DT, std::get<Instruction *>(Root),
3141 CanReplacePointersCallBack);
3142
3143 if (NumReplacements > 0) {
3144 Changed = true;
3145 NumGVNEqProp += NumReplacements;
3146 // Cached information for anything that uses LHS will be invalid.
3147 if (MD)
3148 MD->invalidateCachedPointerInfo(LHS);
3149 }
3150 }
3151
3152 // Now try to deduce additional equalities from this one. For example, if
3153 // the known equality was "(A != B)" == "false" then it follows that A and B
3154 // are equal in the scope. Only boolean equalities with an explicit true or
3155 // false RHS are currently supported.
3156 if (!RHS->getType()->isIntegerTy(1))
3157 // Not a boolean equality - bail out.
3158 continue;
3159 ConstantInt *CI = dyn_cast<ConstantInt>(RHS);
3160 if (!CI)
3161 // RHS neither 'true' nor 'false' - bail out.
3162 continue;
3163 // Whether RHS equals 'true'. Otherwise it equals 'false'.
3164 bool IsKnownTrue = CI->isMinusOne();
3165 bool IsKnownFalse = !IsKnownTrue;
3166
3167 // If "A && B" is known true then both A and B are known true. If "A || B"
3168 // is known false then both A and B are known false.
3169 Value *A, *B;
3170 if ((IsKnownTrue && match(LHS, m_LogicalAnd(m_Value(A), m_Value(B)))) ||
3171 (IsKnownFalse && match(LHS, m_LogicalOr(m_Value(A), m_Value(B))))) {
3172 Worklist.push_back(std::make_pair(A, RHS));
3173 Worklist.push_back(std::make_pair(B, RHS));
3174 continue;
3175 }
3176
3177 // If we are propagating an equality like "(A == B)" == "true" then also
3178 // propagate the equality A == B. When propagating a comparison such as
3179 // "(A >= B)" == "true", replace all instances of "A < B" with "false".
3180 if (CmpInst *Cmp = dyn_cast<CmpInst>(LHS)) {
3181 Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1);
3182
3183 // If "A == B" is known true, or "A != B" is known false, then replace
3184 // A with B everywhere in the scope. For floating point operations, we
3185 // have to be careful since equality does not always imply equivalance.
3186 if (Cmp->isEquivalence(IsKnownFalse))
3187 Worklist.push_back(std::make_pair(Op0, Op1));
3188
3189 // If "A >= B" is known true, replace "A < B" with false everywhere.
3190 CmpInst::Predicate NotPred = Cmp->getInversePredicate();
3191 Constant *NotVal = ConstantInt::get(Cmp->getType(), IsKnownFalse);
3192 // Since we don't have the instruction "A < B" immediately to hand, work
3193 // out the value number that it would have and use that to find an
3194 // appropriate instruction (if any).
3195 uint32_t NextNum = VN.getNextUnusedValueNumber();
3196 uint32_t Num = VN.lookupOrAddCmp(Cmp->getOpcode(), NotPred, Op0, Op1);
3197 // If the number we were assigned was brand new then there is no point in
3198 // looking for an instruction realizing it: there cannot be one!
3199 if (Num < NextNum) {
3200 for (const auto &Entry : LeaderTable.getLeaders(Num)) {
3201 // Only look at leaders that either dominate the start of the edge,
3202 // or are dominated by the end. This check is not necessary for
3203 // correctness, it only discards cases for which the following
3204 // use replacement will not work anyway.
3205 if (const BasicBlockEdge *Edge = std::get_if<BasicBlockEdge>(&Root)) {
3206 if (!DT->dominates(Entry.BB, Edge->getStart()) &&
3207 !DT->dominates(Edge->getEnd(), Entry.BB))
3208 continue;
3209 } else {
3210 auto *InstBB = std::get<Instruction *>(Root)->getParent();
3211 if (!DT->dominates(Entry.BB, InstBB) &&
3212 !DT->dominates(InstBB, Entry.BB))
3213 continue;
3214 }
3215
3216 Value *NotCmp = Entry.Val;
3217 if (NotCmp && isa<Instruction>(NotCmp)) {
3218 unsigned NumReplacements;
3219 if (const BasicBlockEdge *Edge = std::get_if<BasicBlockEdge>(&Root))
3220 NumReplacements =
3221 replaceDominatedUsesWith(NotCmp, NotVal, *DT, *Edge);
3222 else
3223 NumReplacements = replaceDominatedUsesWith(
3224 NotCmp, NotVal, *DT, std::get<Instruction *>(Root));
3225 Changed |= NumReplacements > 0;
3226 NumGVNEqProp += NumReplacements;
3227 // Cached information for anything that uses NotCmp will be invalid.
3228 if (MD)
3229 MD->invalidateCachedPointerInfo(NotCmp);
3230 }
3231 }
3232 }
3233 // Ensure that any instruction in scope that gets the "A < B" value number
3234 // is replaced with false.
3235 // The leader table only tracks basic blocks, not edges. Only add to if we
3236 // have the simple case where the edge dominates the end.
3237 for (const BasicBlock *BB : DominatedBlocks)
3238 LeaderTable.insert(Num, NotVal, BB);
3239
3240 continue;
3241 }
3242
3243 // Propagate equalities that results from truncation with no unsigned wrap
3244 // like (trunc nuw i64 %v to i1) == "true" or (trunc nuw i64 %v to i1) ==
3245 // "false"
3246 if (match(LHS, m_NUWTrunc(m_Value(A)))) {
3247 Worklist.emplace_back(A, ConstantInt::get(A->getType(), IsKnownTrue));
3248 continue;
3249 }
3250
3251 if (match(LHS, m_Not(m_Value(A)))) {
3252 Worklist.emplace_back(A, ConstantInt::get(A->getType(), !IsKnownTrue));
3253 continue;
3254 }
3255 }
3256
3257 return Changed;
3258}
3259
3260/// When calculating availability, handle an instruction
3261/// by inserting it into the appropriate sets.
3262bool GVNPass::processInstruction(Instruction *I) {
3263 // If the instruction can be easily simplified then do so now in preference
3264 // to value numbering it. Value numbering often exposes redundancies, for
3265 // example if it determines that %y is equal to %x then the instruction
3266 // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
3267 const DataLayout &DL = I->getDataLayout();
3268 if (Value *V = simplifyInstruction(I, {DL, TLI, DT, AC})) {
3269 bool Changed = false;
3270 if (!I->use_empty()) {
3271 // Simplification can cause a special instruction to become not special.
3272 // For example, devirtualization to a willreturn function.
3273 ICF->removeUsersOf(I);
3274 I->replaceAllUsesWith(V);
3275 Changed = true;
3276 }
3277 if (isInstructionTriviallyDead(I, TLI)) {
3279 Changed = true;
3280 }
3281 if (Changed) {
3282 if (MD && V->getType()->isPtrOrPtrVectorTy())
3283 MD->invalidateCachedPointerInfo(V);
3284 ++NumGVNSimpl;
3285 return true;
3286 }
3287 }
3288
3289 if (auto *Assume = dyn_cast<AssumeInst>(I))
3290 return processAssumeIntrinsic(Assume);
3291
3292 if (LoadInst *Load = dyn_cast<LoadInst>(I)) {
3293 if (processLoad(Load))
3294 return true;
3295
3296 unsigned Num = VN.lookupOrAdd(Load);
3297 LeaderTable.insert(Num, Load, Load->getParent());
3298 return false;
3299 }
3300
3302 processMaskedLoad(cast<IntrinsicInst>(I)))
3303 return true;
3304
3305 // For conditional branches, we can perform simple conditional propagation on
3306 // the condition value itself.
3307 if (CondBrInst *BI = dyn_cast<CondBrInst>(I)) {
3308 if (isa<Constant>(BI->getCondition()))
3309 return processFoldableCondBr(BI);
3310
3311 Value *BranchCond = BI->getCondition();
3312 BasicBlock *TrueSucc = BI->getSuccessor(0);
3313 BasicBlock *FalseSucc = BI->getSuccessor(1);
3314 // Avoid multiple edges early.
3315 if (TrueSucc == FalseSucc)
3316 return false;
3317
3318 BasicBlock *Parent = BI->getParent();
3319 bool Changed = false;
3320
3322 BasicBlockEdge TrueE(Parent, TrueSucc);
3323 Changed |= propagateEquality(BranchCond, TrueVal, TrueE);
3324
3326 BasicBlockEdge FalseE(Parent, FalseSucc);
3327 Changed |= propagateEquality(BranchCond, FalseVal, FalseE);
3328
3329 return Changed;
3330 }
3331
3332 // For switches, propagate the case values into the case destinations.
3333 if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
3334 Value *SwitchCond = SI->getCondition();
3335 BasicBlock *Parent = SI->getParent();
3336 bool Changed = false;
3337
3338 // Remember how many outgoing edges there are to every successor.
3339 SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges;
3340 for (BasicBlock *Succ : successors(Parent))
3341 ++SwitchEdges[Succ];
3342
3343 for (const auto &Case : SI->cases()) {
3344 BasicBlock *Dst = Case.getCaseSuccessor();
3345 // If there is only a single edge, propagate the case value into it.
3346 if (SwitchEdges.lookup(Dst) == 1) {
3347 BasicBlockEdge E(Parent, Dst);
3348 Changed |= propagateEquality(SwitchCond, Case.getCaseValue(), E);
3349 }
3350 }
3351 return Changed;
3352 }
3353
3354 // Instructions with void type don't return a value, so there's
3355 // no point in trying to find redundancies in them.
3356 if (I->getType()->isVoidTy())
3357 return false;
3358
3359 uint32_t NextNum = VN.getNextUnusedValueNumber();
3360 unsigned Num = VN.lookupOrAdd(I);
3361
3362 // Allocations are always uniquely numbered, so we can save time and memory
3363 // by fast failing them.
3364 if (isa<AllocaInst>(I) || I->isTerminator() || isa<PHINode>(I)) {
3365 LeaderTable.insert(Num, I, I->getParent());
3366 return false;
3367 }
3368
3369 // If the number we were assigned was a brand new VN, then we don't
3370 // need to do a lookup to see if the number already exists
3371 // somewhere in the domtree: it can't!
3372 if (Num >= NextNum) {
3373 LeaderTable.insert(Num, I, I->getParent());
3374 return false;
3375 }
3376
3377 // Perform fast-path value-number based elimination of values inherited from
3378 // dominators.
3379 Value *Repl = findLeader(I->getParent(), Num);
3380 if (!Repl) {
3381 // Failure, just remember this instance for future use.
3382 LeaderTable.insert(Num, I, I->getParent());
3383 return false;
3384 }
3385
3386 if (Repl == I) {
3387 // If I was the result of a shortcut PRE, it might already be in the table
3388 // and the best replacement for itself. Nothing to do.
3389 return false;
3390 }
3391
3392 // Remove it!
3394 if (MD && Repl->getType()->isPtrOrPtrVectorTy())
3395 MD->invalidateCachedPointerInfo(Repl);
3397 return true;
3398}
3399
3400/// runOnFunction - This is the main transformation entry point for a function.
3401bool GVNPass::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
3402 const TargetLibraryInfo &RunTLI, AAResults &RunAA,
3403 MemoryDependenceResults *RunMD, LoopInfo &LI,
3404 OptimizationRemarkEmitter *RunORE, MemorySSA *MSSA) {
3405 AC = &RunAC;
3406 DT = &RunDT;
3407 VN.setDomTree(DT);
3408 TLI = &RunTLI;
3409 AA = &RunAA;
3410 VN.setAliasAnalysis(&RunAA);
3411 MD = RunMD;
3412 ImplicitControlFlowTracking ImplicitCFT;
3413 ICF = &ImplicitCFT;
3414 this->LI = &LI;
3415 VN.setMemDep(MD);
3416 // Propagate the MSSA-enabled flag so the value-numbering paths in
3417 // lookupOrAddCall() and computeLoadStoreVN(), which depends on whether
3418 // IsMSSAEnabled is turned on.
3419 VN.setMemorySSA(MSSA, isMemorySSAEnabled());
3420 ORE = RunORE;
3421 InvalidBlockRPONumbers = true;
3422 MemorySSAUpdater Updater(MSSA);
3423 MSSAU = MSSA ? &Updater : nullptr;
3424
3425 bool Changed = false;
3426 bool ShouldContinue = true;
3427
3428 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
3429 // Merge unconditional branches, allowing PRE to catch more
3430 // optimization opportunities.
3431 for (BasicBlock &BB : make_early_inc_range(F)) {
3432 bool RemovedBlock = MergeBlockIntoPredecessor(&BB, &DTU, &LI, MSSAU, MD);
3433 if (RemovedBlock)
3434 ++NumGVNBlocks;
3435
3436 Changed |= RemovedBlock;
3437 }
3438 DTU.flush();
3439
3440 unsigned Iteration = 0;
3441 while (ShouldContinue) {
3442 LLVM_DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n");
3443 (void) Iteration;
3444 ShouldContinue = iterateOnFunction(F);
3445 Changed |= ShouldContinue;
3446 ++Iteration;
3447 }
3448
3449 if (isScalarPREEnabled()) {
3450 // Fabricate val-num for dead-code in order to suppress assertion in
3451 // performPRE().
3452 assignValNumForDeadCode();
3453 bool PREChanged = true;
3454 while (PREChanged) {
3455 PREChanged = performPRE(F);
3456 Changed |= PREChanged;
3457 }
3458 }
3459
3460 // FIXME: Should perform GVN again after PRE does something. PRE can move
3461 // computations into blocks where they become fully redundant. Note that
3462 // we can't do this until PRE's critical edge splitting updates memdep.
3463 // Actually, when this happens, we should just fully integrate PRE into GVN.
3464
3465 cleanupGlobalSets();
3466 // Do not cleanup DeadBlocks in cleanupGlobalSets() as it's called for each
3467 // iteration.
3468 DeadBlocks.clear();
3469
3470 if (MSSA && VerifyMemorySSA)
3471 MSSA->verifyMemorySSA();
3472
3473 return Changed;
3474}
3475
3476bool GVNPass::processBlock(BasicBlock *BB) {
3477 if (DeadBlocks.count(BB))
3478 return false;
3479
3480 bool ChangedFunction = false;
3481
3482 // Since we may not have visited the input blocks of the phis, we can't
3483 // use our normal hash approach for phis. Instead, simply look for
3484 // obvious duplicates. The first pass of GVN will tend to create
3485 // identical phis, and the second or later passes can eliminate them.
3486 SmallPtrSet<PHINode *, 8> PHINodesToRemove;
3487 ChangedFunction |= EliminateDuplicatePHINodes(BB, PHINodesToRemove);
3488 for (PHINode *PN : PHINodesToRemove) {
3489 removeInstruction(PN);
3490 }
3491 for (Instruction &Inst : make_early_inc_range(*BB))
3492 ChangedFunction |= processInstruction(&Inst);
3493 return ChangedFunction;
3494}
3495
3496// Instantiate an expression in a predecessor that lacked it.
3497bool GVNPass::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
3498 BasicBlock *Curr, unsigned int ValNo) {
3499 // Because we are going top-down through the block, all value numbers
3500 // will be available in the predecessor by the time we need them. Any
3501 // that weren't originally present will have been instantiated earlier
3502 // in this loop.
3503 bool Success = true;
3504 for (unsigned I = 0, E = Instr->getNumOperands(); I != E; ++I) {
3505 Value *Op = Instr->getOperand(I);
3507 continue;
3508 // This could be a newly inserted instruction, in which case, we won't
3509 // find a value number, and should give up before we hurt ourselves.
3510 // FIXME: Rewrite the infrastructure to let it easier to value number
3511 // and process newly inserted instructions.
3512 if (!VN.exists(Op)) {
3513 Success = false;
3514 break;
3515 }
3516 uint32_t TValNo =
3517 VN.phiTranslate(Pred, Curr, VN.lookup(Op), *this);
3518 if (Value *V = findLeader(Pred, TValNo)) {
3519 Instr->setOperand(I, V);
3520 } else {
3521 Success = false;
3522 break;
3523 }
3524 }
3525
3526 // Fail out if we encounter an operand that is not available in
3527 // the PRE predecessor. This is typically because of loads which
3528 // are not value numbered precisely.
3529 if (!Success)
3530 return false;
3531
3532 Instr->insertBefore(Pred->getTerminator()->getIterator());
3533 Instr->setName(Instr->getName() + ".pre");
3534 Instr->setDebugLoc(Instr->getDebugLoc());
3535
3536 ICF->insertInstructionTo(Instr, Pred);
3537
3538 unsigned Num = VN.lookupOrAdd(Instr);
3539 VN.add(Instr, Num);
3540
3541 // Update the availability map to include the new instruction.
3542 LeaderTable.insert(Num, Instr, Pred);
3543 return true;
3544}
3545
3546bool GVNPass::performScalarPRE(Instruction *CurInst) {
3547 if (isa<AllocaInst>(CurInst) || CurInst->isTerminator() ||
3548 isa<PHINode>(CurInst) || CurInst->getType()->isVoidTy() ||
3549 CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
3550 CurInst->getType()->isTokenLikeTy())
3551 return false;
3552
3553 // Don't do PRE on compares. The PHI would prevent CodeGenPrepare from
3554 // sinking the compare again, and it would force the code generator to
3555 // move the i1 from processor flags or predicate registers into a general
3556 // purpose register.
3557 if (isa<CmpInst>(CurInst))
3558 return false;
3559
3560 // Don't do PRE on GEPs. The inserted PHI would prevent CodeGenPrepare from
3561 // sinking the addressing mode computation back to its uses. Extending the
3562 // GEP's live range increases the register pressure, and therefore it can
3563 // introduce unnecessary spills.
3564 //
3565 // This doesn't prevent Load PRE. PHI translation will make the GEP available
3566 // to the load by moving it to the predecessor block if necessary.
3567 if (isa<GetElementPtrInst>(CurInst))
3568 return false;
3569
3570 if (auto *CallB = dyn_cast<CallBase>(CurInst)) {
3571 // We don't currently value number ANY inline asm calls.
3572 if (CallB->isInlineAsm())
3573 return false;
3574 }
3575
3576 uint32_t ValNo = VN.lookup(CurInst);
3577
3578 // Look for the predecessors for PRE opportunities. We're
3579 // only trying to solve the basic diamond case, where
3580 // a value is computed in the successor and one predecessor,
3581 // but not the other. We also explicitly disallow cases
3582 // where the successor is its own predecessor, because they're
3583 // more complicated to get right.
3584 unsigned NumWith = 0;
3585 unsigned NumWithout = 0;
3586 BasicBlock *PREPred = nullptr;
3587 BasicBlock *CurrentBlock = CurInst->getParent();
3588
3589 // Update the RPO numbers for this function.
3590 if (InvalidBlockRPONumbers)
3591 assignBlockRPONumber(*CurrentBlock->getParent());
3592
3594 for (BasicBlock *P : predecessors(CurrentBlock)) {
3595 // We're not interested in PRE where blocks with predecessors that are
3596 // not reachable.
3597 if (!DT->isReachableFromEntry(P)) {
3598 NumWithout = 2;
3599 break;
3600 }
3601 // It is not safe to do PRE when P->CurrentBlock is a loop backedge.
3602 assert(BlockRPONumber.count(P) && BlockRPONumber.count(CurrentBlock) &&
3603 "Invalid BlockRPONumber map.");
3604 if (BlockRPONumber[P] >= BlockRPONumber[CurrentBlock]) {
3605 NumWithout = 2;
3606 break;
3607 }
3608
3609 uint32_t TValNo = VN.phiTranslate(P, CurrentBlock, ValNo, *this);
3610 Value *PredV = findLeader(P, TValNo);
3611 if (!PredV) {
3612 PredMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P));
3613 PREPred = P;
3614 ++NumWithout;
3615 } else if (PredV == CurInst) {
3616 // CurInst dominates this predecessor.
3617 NumWithout = 2;
3618 break;
3619 } else {
3620 PredMap.push_back(std::make_pair(PredV, P));
3621 ++NumWith;
3622 }
3623 }
3624
3625 // Don't do PRE when it might increase code size, i.e. when
3626 // we would need to insert instructions in more than one pred.
3627 if (NumWithout > 1 || NumWith == 0)
3628 return false;
3629
3630 // We may have a case where all predecessors have the instruction,
3631 // and we just need to insert a phi node. Otherwise, perform
3632 // insertion.
3633 Instruction *PREInstr = nullptr;
3634
3635 if (NumWithout != 0) {
3636 if (!isSafeToSpeculativelyExecute(CurInst)) {
3637 // It is only valid to insert a new instruction if the current instruction
3638 // is always executed. An instruction with implicit control flow could
3639 // prevent us from doing it. If we cannot speculate the execution, then
3640 // PRE should be prohibited.
3641 if (ICF->isDominatedByICFIFromSameBlock(CurInst))
3642 return false;
3643 }
3644
3645 // Don't do PRE across indirect branch.
3646 if (isa<IndirectBrInst>(PREPred->getTerminator()))
3647 return false;
3648
3649 // We can't do PRE safely on a critical edge, so instead we schedule
3650 // the edge to be split and perform the PRE the next time we iterate
3651 // on the function.
3652 unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock);
3653 if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) {
3654 ToSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum));
3655 return false;
3656 }
3657 // We need to insert somewhere, so let's give it a shot.
3658 PREInstr = CurInst->clone();
3659 if (!performScalarPREInsertion(PREInstr, PREPred, CurrentBlock, ValNo)) {
3660 // If we failed insertion, make sure we remove the instruction.
3661#ifndef NDEBUG
3662 verifyRemoved(PREInstr);
3663#endif
3664 PREInstr->deleteValue();
3665 return false;
3666 }
3667 }
3668
3669 // Either we should have filled in the PRE instruction, or we should
3670 // not have needed insertions.
3671 assert(PREInstr != nullptr || NumWithout == 0);
3672
3673 ++NumGVNPRE;
3674
3675 // Create a PHI to make the value available in this block.
3676 PHINode *Phi = PHINode::Create(CurInst->getType(), PredMap.size(),
3677 CurInst->getName() + ".pre-phi");
3678 Phi->insertBefore(CurrentBlock->begin());
3679 for (auto &[V, BB] : PredMap) {
3680 if (V) {
3681 // If we use an existing value in this phi, we have to patch the original
3682 // value because the phi will be used to replace a later value.
3683 patchReplacementInstruction(CurInst, V);
3684 Phi->addIncoming(V, BB);
3685 } else
3686 Phi->addIncoming(PREInstr, PREPred);
3687 }
3688
3689 VN.add(Phi, ValNo);
3690 // After creating a new PHI for ValNo, the phi translate result for ValNo will
3691 // be changed, so erase the related stale entries in phi translate cache.
3692 VN.eraseTranslateCacheEntry(ValNo, *CurrentBlock);
3693 LeaderTable.insert(ValNo, Phi, CurrentBlock);
3694 Phi->setDebugLoc(CurInst->getDebugLoc());
3695 CurInst->replaceAllUsesWith(Phi);
3696 if (MD && Phi->getType()->isPtrOrPtrVectorTy())
3697 MD->invalidateCachedPointerInfo(Phi);
3698 LeaderTable.erase(ValNo, CurInst, CurrentBlock);
3699
3700 LLVM_DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
3701 removeInstruction(CurInst);
3702
3703 return true;
3704}
3705
3706/// Perform a purely local form of PRE that looks for diamond
3707/// control flow patterns and attempts to perform simple PRE at the join point.
3708bool GVNPass::performPRE(Function &F) {
3709 bool Changed = false;
3710 for (BasicBlock *CurrentBlock : depth_first(&F.getEntryBlock())) {
3711 // Nothing to PRE in the entry block.
3712 if (CurrentBlock == &F.getEntryBlock())
3713 continue;
3714
3715 // Don't perform PRE on an EH pad.
3716 if (CurrentBlock->isEHPad())
3717 continue;
3718
3719 for (BasicBlock::iterator BI = CurrentBlock->begin(),
3720 BE = CurrentBlock->end();
3721 BI != BE;) {
3722 Instruction *CurInst = &*BI++;
3723 Changed |= performScalarPRE(CurInst);
3724 }
3725 }
3726
3727 if (splitCriticalEdges())
3728 Changed = true;
3729
3730 return Changed;
3731}
3732
3733/// Split the critical edge connecting the given two blocks, and return
3734/// the block inserted to the critical edge.
3735BasicBlock *GVNPass::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
3736 // GVN does not require loop-simplify, do not try to preserve it if it is not
3737 // possible.
3739 Pred, Succ,
3740 CriticalEdgeSplittingOptions(DT, LI, MSSAU).unsetPreserveLoopSimplify());
3741 if (BB) {
3742 if (MD)
3743 MD->invalidateCachedPredecessors();
3744 InvalidBlockRPONumbers = true;
3745 }
3746 return BB;
3747}
3748
3749/// Split critical edges found during the previous
3750/// iteration that may enable further optimization.
3751bool GVNPass::splitCriticalEdges() {
3752 if (ToSplit.empty())
3753 return false;
3754
3755 bool Changed = false;
3756 do {
3757 std::pair<Instruction *, unsigned> Edge = ToSplit.pop_back_val();
3758 Changed |= SplitCriticalEdge(Edge.first, Edge.second,
3759 CriticalEdgeSplittingOptions(DT, LI, MSSAU)) !=
3760 nullptr;
3761 } while (!ToSplit.empty());
3762 if (Changed) {
3763 if (MD)
3764 MD->invalidateCachedPredecessors();
3765 InvalidBlockRPONumbers = true;
3766 }
3767 return Changed;
3768}
3769
3770/// Executes one iteration of GVN.
3771bool GVNPass::iterateOnFunction(Function &F) {
3772 cleanupGlobalSets();
3773
3774 // Top-down walk of the dominator tree.
3775 bool Changed = false;
3776 // Needed for value numbering with phi construction to work.
3777 // RPOT walks the graph in its constructor and will not be invalidated during
3778 // processBlock.
3779 ReversePostOrderTraversal<Function *> RPOT(&F);
3780
3781 for (BasicBlock *BB : RPOT)
3782 Changed |= processBlock(BB);
3783
3784 return Changed;
3785}
3786
3787void GVNPass::cleanupGlobalSets() {
3788 VN.clear();
3789 LeaderTable.clear();
3790 BlockRPONumber.clear();
3791 ICF->clear();
3792 InvalidBlockRPONumbers = true;
3793}
3794
3795void GVNPass::removeInstruction(Instruction *I) {
3796 VN.erase(I);
3797 if (MD) MD->removeInstruction(I);
3798 if (MSSAU)
3799 MSSAU->removeMemoryAccess(I);
3800#ifndef NDEBUG
3801 verifyRemoved(I);
3802#endif
3803 ICF->removeInstruction(I);
3804 I->eraseFromParent();
3805 ++NumGVNInstr;
3806}
3807
3808/// Verify that the specified instruction does not occur in our
3809/// internal data structures.
3810void GVNPass::verifyRemoved(const Instruction *Inst) const {
3811 VN.verifyRemoved(Inst);
3812}
3813
3814/// BB is declared dead, which implied other blocks become dead as well. This
3815/// function is to add all these blocks to "DeadBlocks". For the dead blocks'
3816/// live successors, update their phi nodes by replacing the operands
3817/// corresponding to dead blocks with UndefVal.
3818void GVNPass::addDeadBlock(BasicBlock *BB) {
3820 SmallSetVector<BasicBlock *, 4> DF;
3821
3822 NewDead.push_back(BB);
3823 while (!NewDead.empty()) {
3824 BasicBlock *D = NewDead.pop_back_val();
3825 if (DeadBlocks.count(D))
3826 continue;
3827
3828 // All blocks dominated by D are dead.
3829 SmallVector<BasicBlock *, 8> Dom;
3830 DT->getDescendants(D, Dom);
3831 DeadBlocks.insert_range(Dom);
3832
3833 // Figure out the dominance-frontier(D).
3834 for (BasicBlock *B : Dom) {
3835 for (BasicBlock *S : successors(B)) {
3836 if (DeadBlocks.count(S))
3837 continue;
3838
3839 bool AllPredDead = true;
3840 for (BasicBlock *P : predecessors(S))
3841 if (!DeadBlocks.count(P)) {
3842 AllPredDead = false;
3843 break;
3844 }
3845
3846 if (!AllPredDead) {
3847 // S could be proved dead later on. That is why we don't update phi
3848 // operands at this moment.
3849 DF.insert(S);
3850 } else {
3851 // While S is not dominated by D, it is dead by now. This could take
3852 // place if S already have a dead predecessor before D is declared
3853 // dead.
3854 NewDead.push_back(S);
3855 }
3856 }
3857 }
3858 }
3859
3860 // For the dead blocks' live successors, update their phi nodes by replacing
3861 // the operands corresponding to dead blocks with UndefVal.
3862 for (BasicBlock *B : DF) {
3863 if (DeadBlocks.count(B))
3864 continue;
3865
3866 // First, split the critical edges. This might also create additional blocks
3867 // to preserve LoopSimplify form and adjust edges accordingly.
3869 for (BasicBlock *P : Preds) {
3870 if (!DeadBlocks.count(P))
3871 continue;
3872
3873 if (is_contained(successors(P), B) &&
3874 isCriticalEdge(P->getTerminator(), B)) {
3875 if (BasicBlock *S = splitCriticalEdges(P, B))
3876 DeadBlocks.insert(P = S);
3877 }
3878 }
3879
3880 // Now poison the incoming values from the dead predecessors.
3881 for (BasicBlock *P : predecessors(B)) {
3882 if (!DeadBlocks.count(P))
3883 continue;
3884 for (PHINode &Phi : B->phis()) {
3885 Phi.setIncomingValueForBlock(P, PoisonValue::get(Phi.getType()));
3886 if (MD)
3887 MD->invalidateCachedPointerInfo(&Phi);
3888 }
3889 }
3890 }
3891}
3892
3893// If the given branch is recognized as a foldable branch (i.e. conditional
3894// branch with constant condition), it will perform following analyses and
3895// transformation.
3896// 1) If the dead out-coming edge is a critical-edge, split it. Let
3897// R be the target of the dead out-coming edge.
3898// 1) Identify the set of dead blocks implied by the branch's dead outcoming
3899// edge. The result of this step will be {X| X is dominated by R}
3900// 2) Identify those blocks which haves at least one dead predecessor. The
3901// result of this step will be dominance-frontier(R).
3902// 3) Update the PHIs in DF(R) by replacing the operands corresponding to
3903// dead blocks with "UndefVal" in an hope these PHIs will optimized away.
3904//
3905// Return true iff *NEW* dead code are found.
3906bool GVNPass::processFoldableCondBr(CondBrInst *BI) {
3907 // If a branch has two identical successors, we cannot declare either dead.
3908 if (BI->getSuccessor(0) == BI->getSuccessor(1))
3909 return false;
3910
3911 ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
3912 if (!Cond)
3913 return false;
3914
3915 BasicBlock *DeadRoot =
3916 Cond->getZExtValue() ? BI->getSuccessor(1) : BI->getSuccessor(0);
3917 if (DeadBlocks.count(DeadRoot))
3918 return false;
3919
3920 if (!DeadRoot->getSinglePredecessor())
3921 DeadRoot = splitCriticalEdges(BI->getParent(), DeadRoot);
3922
3923 addDeadBlock(DeadRoot);
3924 return true;
3925}
3926
3927// performPRE() will trigger assert if it comes across an instruction without
3928// associated val-num. As it normally has far more live instructions than dead
3929// instructions, it makes more sense just to "fabricate" a val-number for the
3930// dead code than checking if instruction involved is dead or not.
3931void GVNPass::assignValNumForDeadCode() {
3932 for (BasicBlock *BB : DeadBlocks) {
3933 for (Instruction &Inst : *BB) {
3934 unsigned ValNum = VN.lookupOrAdd(&Inst);
3935 LeaderTable.insert(ValNum, &Inst, BB);
3936 }
3937 }
3938}
3939
3941public:
3942 static char ID; // Pass identification, replacement for typeid.
3943
3944 explicit GVNLegacyPass(bool MemDepAnalysis = GVNEnableMemDep,
3945 bool MemSSAAnalysis = GVNEnableMemorySSA,
3946 bool ScalarPRE = true)
3947 : FunctionPass(ID), Impl(GVNOptions()
3948 .setMemDep(MemDepAnalysis)
3949 .setMemorySSA(MemSSAAnalysis)
3950 .setScalarPRE(ScalarPRE)) {
3952 }
3953
3954 bool runOnFunction(Function &F) override {
3955 if (skipFunction(F))
3956 return false;
3957
3959 if (Impl.isMemorySSAEnabled() && !MSSAWP)
3961
3962 return Impl.runImpl(
3963 F, getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
3966 getAnalysis<AAResultsWrapperPass>().getAAResults(),
3967 Impl.isMemDepEnabled()
3969 : nullptr,
3970 getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
3972 MSSAWP ? &MSSAWP->getMSSA() : nullptr);
3973 }
3974
3992
3993private:
3994 GVNPass Impl;
3995};
3996
3997char GVNLegacyPass::ID = 0;
3998
3999INITIALIZE_PASS_BEGIN(GVNLegacyPass, "gvn", "Global Value Numbering", false, false)
4008INITIALIZE_PASS_END(GVNLegacyPass, "gvn", "Global Value Numbering", false, false)
4009
4010// The public interface to this file...
4013 return new GVNLegacyPass(GVNEnableMemDep, GVNEnableMemorySSA, ScalarPRE);
4014}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static RegisterPass< DebugifyFunctionPass > DF("debugify-function", "Attach debug info to a function")
This file defines the DenseMap class.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
early cse Early CSE w MemorySSA
static void reportMayClobberedLoad(LoadInst *Load, Instruction *DepInst, const DominatorTree *DT, OptimizationRemarkEmitter *ORE)
Try to locate the three instruction involved in a missed load-elimination case that is due to an inte...
Definition GVN.cpp:1279
static Instruction * findInvariantGroupValue(LoadInst *L, DominatorTree &DT)
If a load has !invariant.group, try to find the most-dominating instruction with the same metadata an...
Definition GVN.cpp:2200
static void reportLoadElim(LoadInst *Load, Value *AvailableValue, OptimizationRemarkEmitter *ORE)
Definition GVN.cpp:2006
static cl::opt< uint32_t > MaxNumInsnsPerBlock("gvn-max-num-insns", cl::Hidden, cl::init(100), cl::desc("Max number of instructions to scan in each basic block in GVN " "(default = 100)"))
static cl::opt< bool > GVNEnableMemDep("enable-gvn-memdep", cl::init(true))
static cl::opt< bool > GVNEnableLoadInLoopPRE("enable-load-in-loop-pre", cl::init(true))
static const Instruction * findMayClobberedPtrAccess(LoadInst *Load, const DominatorTree *DT)
Definition GVN.cpp:1223
static cl::opt< uint32_t > MaxNumDeps("gvn-max-num-deps", cl::Hidden, cl::init(100), cl::desc("Max number of dependences to attempt Load PRE (default = 100)"))
static std::optional< MemoryLocation > maybeLoadStoreLocation(Instruction *I, bool AllowStores, const TargetLibraryInfo *TLI)
Return the memory location accessed by the (masked) load/store instruction I, if the instruction coul...
Definition GVN.cpp:2252
static cl::opt< bool > GVNEnableMemorySSA("enable-gvn-memoryssa", cl::init(false))
static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E, DominatorTree *DT)
There is an edge from 'Src' to 'Dst'.
Definition GVN.cpp:3031
static bool IsValueFullyAvailableInBlock(BasicBlock *BB, DenseMap< BasicBlock *, AvailabilityState > &FullyAvailableBlocks)
Return true if we can prove that the value we're analyzing is fully available in the specified block.
Definition GVN.cpp:963
static cl::opt< bool > GVNEnableScalarPRE("enable-scalar-pre", cl::init(true), cl::Hidden)
static Value * findDominatingValue(const MemoryLocation &Loc, Type *LoadTy, Instruction *From, AAResults *AA)
Definition GVN.cpp:1300
static bool liesBetween(const Instruction *From, Instruction *Between, const Instruction *To, const DominatorTree *DT)
Assuming To can be reached from both From and Between, does Between lie on every path from From to To...
Definition GVN.cpp:1214
static bool isLifetimeStart(const Instruction *Inst)
Definition GVN.cpp:1206
static cl::opt< bool > GVNEnableSplitBackedgeInLoadPRE("enable-split-backedge-in-load-pre", cl::init(false))
static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl)
Definition GVN.cpp:2192
static void replaceValuesPerBlockEntry(SmallVectorImpl< AvailableValueInBlock > &ValuesPerBlock, Value *OldValue, Value *NewValue)
If the specified OldValue exists in ValuesPerBlock, replace its value with NewValue.
Definition GVN.cpp:1079
static cl::opt< unsigned > ScanUsersLimit("gvn-scan-users-limit", cl::Hidden, cl::init(100), cl::desc("The number of memory accesses to scan in a block in reaching " "memory values analysis (default = 100)"))
static Value * ConstructSSAForLoadSet(LoadInst *Load, SmallVectorImpl< AvailableValueInBlock > &ValuesPerBlock, GVNPass &GVN)
Given a set of loads specified by ValuesPerBlock, construct SSA form, allowing us to eliminate Load.
Definition GVN.cpp:1098
AvailabilityState
Definition GVN.cpp:943
@ Unavailable
We know the block is not fully available. This is a fixpoint.
Definition GVN.cpp:945
@ Available
We know the block is fully available. This is a fixpoint.
Definition GVN.cpp:947
@ SpeculativelyAvailable
We do not know whether the block is fully available or not, but we are currently speculating that it ...
Definition GVN.cpp:952
static cl::opt< uint32_t > MaxNumVisitedInsts("gvn-max-num-visited-insts", cl::Hidden, cl::init(100), cl::desc("Max number of visited instructions when trying to find " "dominating value of select dependency (default = 100)"))
static cl::opt< uint32_t > MaxBBSpeculations("gvn-max-block-speculations", cl::Hidden, cl::init(600), cl::desc("Max number of blocks we're willing to speculate on (and recurse " "into) when deducing if a value is fully available or not in GVN " "(default = 600)"))
static cl::opt< bool > GVNEnableLoadPRE("enable-load-pre", cl::init(true))
This file provides the interface for LLVM's Global Value Numbering pass which eliminates fully redund...
#define DEBUG_TYPE
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
#define _
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
static bool lookup(const GsymReader &GR, GsymDataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr, uint64_t Addr, SourceLocations &SrcLocs, llvm::Error &Err)
A Lookup helper functions.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
This file implements a map that provides insertion order iteration.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
This file contains the declarations for metadata subclasses.
uint64_t IntrinsicInst * II
#define P(N)
ppc ctr loops PowerPC CTR Loops Verify
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
static DominatorTree getDomTree(Function &F)
std::pair< BasicBlock *, BasicBlock * > Edge
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:119
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
@ MayAlias
The two locations may or may not alias.
@ NoAlias
The two locations do not alias at all.
@ PartialAlias
The two locations alias, but only due to a partial overlap.
@ MustAlias
The two locations precisely alias each other.
constexpr int32_t getOffset() const
constexpr bool hasOffset() const
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
iterator end() const
Definition ArrayRef.h:130
iterator begin() const
Definition ArrayRef.h:129
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
bool isEHPad() const
Return true if this basic block is an exception handling block.
Definition BasicBlock.h:704
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
LLVM_ABI Instruction::BinaryOps getBinaryOp() const
Returns the binary operation underlying the intrinsic.
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:890
Value * getCondition() const
BasicBlock * getSuccessor(unsigned i) const
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:231
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
Definition DenseMap.h:252
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:225
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:301
iterator end()
Definition DenseMap.h:143
Analysis pass which computes a DominatorTree.
Definition Dominators.h:270
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:306
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:151
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Class representing an expression and its matching format.
unsigned getNumIndices() const
iterator_range< idx_iterator > indices() const
idx_iterator idx_begin() const
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
FunctionPass(char &pid)
Definition Pass.h:316
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Definition Pass.cpp:188
const BasicBlock & getEntryBlock() const
Definition Function.h:809
Represents calls to the gc.relocate intrinsic.
This class holds the mapping between values and value numbers.
Definition GVN.h:164
LLVM_ABI uint32_t lookupOrAdd(MemoryAccess *MA)
Definition GVN.cpp:645
The core GVN pass object.
Definition GVN.h:131
friend class gvn::GVNLegacyPass
Definition GVN.h:250
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
Definition GVN.cpp:874
LLVM_ABI void salvageAndRemoveInstruction(Instruction *I)
This removes the specified instruction from our various maps and marks it for deletion.
Definition GVN.cpp:926
AAResults * getAliasAnalysis() const
Definition GVN.h:151
LLVM_ABI bool isLoadPREEnabled() const
Definition GVN.cpp:853
GVNPass(GVNOptions Options={})
Definition GVN.h:137
LLVM_ABI void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Definition GVN.cpp:906
LLVM_ABI bool isMemorySSAEnabled() const
Definition GVN.cpp:870
DominatorTree & getDominatorTree() const
Definition GVN.h:150
LLVM_ABI bool isLoadInLoopPREEnabled() const
Definition GVN.cpp:857
LLVM_ABI bool isScalarPREEnabled() const
Definition GVN.cpp:849
LLVM_ABI bool isLoadPRESplitBackedgeEnabled() const
Definition GVN.cpp:861
LLVM_ABI bool isMemDepEnabled() const
Definition GVN.cpp:866
Legacy wrapper pass to provide the GlobalsAAResult object.
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
LLVM_ABI bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool isTerminator() const
LLVM_ABI bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
LLVM_ABI void dropUnknownNonDebugMetadata(ArrayRef< unsigned > KnownIDs={})
Drop all unknown metadata except for debug locations.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
A wrapper class for inspecting calls to intrinsic functions.
An instruction for reading from memory.
Analysis pass that exposes the LoopInfo for a function.
Definition LoopInfo.h:587
The legacy pass manager's analysis pass to compute loop information.
Definition LoopInfo.h:612
iterator find(const KeyT &Key)
Definition MapVector.h:156
iterator end()
Definition MapVector.h:69
size_type size() const
Definition MapVector.h:58
A memory dependence query can return one of three different answers.
bool isClobber() const
Tests if this MemDepResult represents a query that is an instruction clobber dependency.
bool isNonLocal() const
Tests if this MemDepResult represents a query that is transparent to the start of the block,...
bool isDef() const
Tests if this MemDepResult represents a query that is an instruction definition dependency.
bool isLocal() const
Tests if this MemDepResult represents a valid local query (Clobber/Def).
Instruction * getInst() const
If this is a normal dependency, returns the instruction that is depended on.
This is the common base class for memset/memcpy/memmove.
BasicBlock * getBlock() const
Definition MemorySSA.h:162
An analysis that produces MemoryDependenceResults for a function.
std::vector< NonLocalDepEntry > NonLocalDepInfo
LLVM_ABI MemDepResult getDependency(Instruction *QueryInst)
Returns the instruction on which a memory operation depends.
LLVM_ABI const NonLocalDepInfo & getNonLocalCallDependency(CallBase *QueryCall)
Perform a full dependency query for the specified call, returning the set of blocks that the value is...
A wrapper analysis pass for the legacy pass manager that exposes a MemoryDepnedenceResults instance.
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
MemoryLocation getWithNewPtr(const Value *NewPtr) const
const Value * Ptr
The address of the start of the location.
static LLVM_ABI MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition MemorySSA.h:529
BasicBlock * getIncomingBlock(unsigned I) const
Return incoming basic block number i.
Definition MemorySSA.h:542
MemoryAccess * getIncomingValue(unsigned I) const
Return incoming value number x.
Definition MemorySSA.h:532
An analysis that produces MemorySSA for a function.
Definition MemorySSA.h:922
Legacy analysis pass which computes MemorySSA.
Definition MemorySSA.h:975
LLVM_ABI void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition MemorySSA.h:720
LLVM_ABI bool locallyDominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in the same basic block, determine whether MemoryAccess A dominates MemoryA...
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition MemorySSA.h:740
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition MemorySSA.h:260
This is an entry in the NonLocalDepInfo cache.
OptimizationRemarkEmitter legacy analysis pass.
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
LLVM_ABI Value * translateValue(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree *DT, bool MustDominate)
translateValue - PHI translate the current address up the CFG from CurBB to Pred, updating our state ...
LLVM_ABI bool isPotentiallyPHITranslatable() const
isPotentiallyPHITranslatable - If this needs PHI translation, return true if we have some hope of doi...
bool needsPHITranslationFromBlock(BasicBlock *BB) const
needsPHITranslationFromBlock - Return true if moving from the specified BasicBlock to its predecessor...
Value * getAddr() const
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
AnalysisType * getAnalysisIfAvailable() const
getAnalysisIfAvailable<AnalysisType>() - Subclasses use this function to get analysis information tha...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
Helper class for SSA formation on a set of values defined in multiple blocks.
Definition SSAUpdater.h:39
LLVM_ABI void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
LLVM_ABI Value * GetValueInMiddleOfBlock(BasicBlock *BB)
Construct SSA form, materializing a value that is live in the middle of the specified block.
LLVM_ABI bool HasValueForBlock(BasicBlock *BB) const
Return true if the SSAUpdater already has a value for the specified block.
LLVM_ABI void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
This class represents the LLVM 'select' instruction.
const Value * getCondition() const
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, const Instruction *MDFrom=nullptr)
bool erase(PtrType Ptr)
Remove pointer from the set.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
SmallVector & operator=(const SmallVector &RHS)
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_ABI bool isTokenLikeTy() const
Returns true if this is 'token' or a token-like target type.s.
Definition Type.cpp:1140
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:307
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:285
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:141
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
op_range operands()
Definition User.h:267
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:552
iterator_range< user_iterator > users()
Definition Value.h:426
bool hasUseList() const
Check if this Value has a use-list.
Definition Value.h:344
LLVM_ABI bool canBeFreed() const
Return true if the memory object referred to by V can by freed in the scope for which the SSA value d...
Definition Value.cpp:827
LLVM_ABI void deleteValue()
Delete a pointer to a generic Value.
Definition Value.cpp:107
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
An efficient, type-erasing, non-owning reference to a callable.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition GVN.cpp:3975
GVNLegacyPass(bool MemDepAnalysis=GVNEnableMemDep, bool MemSSAAnalysis=GVNEnableMemorySSA, bool ScalarPRE=true)
Definition GVN.cpp:3944
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
Definition GVN.cpp:3954
An opaque object representing a hash code.
Definition Hashing.h:77
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Entry
Definition COFF.h:862
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedStore(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedStore Intrinsic.
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
auto m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
NoWrapTrunc_match< OpTy, TruncInst::NoUnsignedWrap > m_NUWTrunc(const OpTy &Op)
Matches trunc nuw.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
LLVM_ABI int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr, StoreInst *DepSI, const DataLayout &DL)
This function determines whether a value for the pointer LoadPtr can be extracted from the store at D...
LLVM_ABI Value * getMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, Type *LoadTy, Instruction *InsertPt, const DataLayout &DL)
If analyzeLoadFromClobberingMemInst returned an offset, this function can be used to actually perform...
LLVM_ABI int analyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI, const DataLayout &DL)
This function determines whether a value for the pointer LoadPtr can be extracted from the load at De...
LLVM_ABI Value * getValueForLoad(Value *SrcVal, unsigned Offset, Type *LoadTy, Instruction *InsertPt, Function *F)
If analyzeLoadFromClobberingStore/Load returned an offset, this function can be used to actually perf...
LLVM_ABI int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, MemIntrinsic *DepMI, const DataLayout &DL)
This function determines whether a value for the pointer LoadPtr can be extracted from the memory int...
LLVM_ABI bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy, Function *F)
Return true if CoerceAvailableValueToLoadType would succeed if it was called.
initializer< Ty > init(const Ty &Val)
A private "module" namespace for types and utilities used by GVN.
Definition GVN.h:66
Add a small namespace to avoid name clashes with the classes used in the streaming interface.
NodeAddr< InstrNode * > Instr
Definition RDFGraph.h:389
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
hash_code hash_value(const FixedPointSemantics &Val)
LLVM_ABI Constant * getInitialValueOfAllocation(const Value *V, const TargetLibraryInfo *TLI, Type *Ty)
If this is a call to an allocation function that initializes memory to a fixed value,...
LLVM_ABI unsigned replaceDominatedUsesWithIf(Value *From, Value *To, DominatorTree &DT, const BasicBlockEdge &Edge, function_ref< bool(const Use &U, const Value *To)> ShouldReplace)
Replace each use of 'From' with 'To' if that use is dominated by the given edge and the callback Shou...
Definition Local.cpp:3277
LLVM_ABI unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ)
Search for the specified successor of basic block BB and return its position in the terminator instru...
Definition CFG.cpp:90
auto pred_end(const MachineBasicBlock *BB)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI FunctionPass * createGVNPass(bool ScalarPRE)
Create a legacy GVN pass.
Definition GVN.cpp:4012
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition Utils.cpp:1687
auto successors(const MachineBasicBlock *BB)
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr from_range_t from_range
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2207
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
LLVM_ABI bool isNoAliasCall(const Value *V)
Return true if this pointer is returned by a noalias function.
LLVM_ABI bool isAssumeWithEmptyBundle(const AssumeInst &Assume)
Return true iff the operand bundles of the provided llvm.assume doesn't contain any valuable informat...
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition Local.cpp:403
LLVM_ABI bool canReplacePointersInUseIfEqual(const Use &U, const Value *To, const DataLayout &DL)
Definition Loads.cpp:858
LLVM_ABI bool canReplacePointersIfEqual(const Value *From, const Value *To, const DataLayout &DL)
Returns true if a pointer value From can be replaced with another pointer value \To if they are deeme...
Definition Loads.cpp:878
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI void patchReplacementInstruction(Instruction *I, Value *Repl)
Patch the replacement so that it is not more restrictive than the value being replaced.
Definition Local.cpp:3177
LLVM_ABI void initializeGVNLegacyPassPass(PassRegistry &)
LLVM_ABI unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT, const BasicBlockEdge &Edge)
Replace each use of 'From' with 'To' if that use is dominated by the given edge.
Definition Local.cpp:3256
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ Success
The lock was released successfully.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
LLVM_ABI void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition Local.cpp:3105
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition ModRef.h:28
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
@ NoModRef
The access neither references nor modifies the value stored in memory.
Definition ModRef.h:30
LLVM_ABI bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition MemorySSA.cpp:84
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
LLVM_ABI bool salvageKnowledge(Instruction *I, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Calls BuildAssumeFromInst and if the resulting llvm.assume is valid insert if before I.
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
LLVM_ABI FunctionPass * createGVNPass()
Definition GVN.cpp:4011
LLVM_ABI bool isPotentiallyReachable(const Instruction *From, const Instruction *To, const SmallPtrSetImpl< BasicBlock * > *ExclusionSet=nullptr, const DominatorTree *DT=nullptr, const LoopInfo *LI=nullptr, const CycleInfo *CI=nullptr)
Determine whether instruction 'To' is reachable from 'From', without passing through any blocks in Ex...
Definition CFG.cpp:335
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
LLVM_ABI BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
LLVM_ABI bool isCriticalEdge(const Instruction *TI, unsigned SuccNum, bool AllowIdenticalEdges=false)
Return true if the specified edge is a critical edge.
Definition CFG.cpp:106
constexpr unsigned BitWidth
auto pred_begin(const MachineBasicBlock *BB)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:107
iterator_range< df_iterator< T > > depth_first(const T &G)
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition Hashing.h:305
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI bool EliminateDuplicatePHINodes(BasicBlock *BB)
Check for and eliminate duplicate PHI nodes in this block.
Definition Local.cpp:1509
bool isStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
Returns true if ao is stronger than other as defined by the AtomicOrdering lattice,...
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition Hashing.h:285
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
#define N
static bool isEqual(const GVNPass::Expression &LHS, const GVNPass::Expression &RHS)
Definition GVN.cpp:186
static unsigned getHashValue(const GVNPass::Expression &E)
Definition GVN.cpp:180
An information struct used to provide DenseMap with the various necessary components for a given valu...
A set of parameters to control various transforms performed by GVN pass.
Definition GVN.h:81
bool operator==(const Expression &Other) const
Definition GVN.cpp:158
friend hash_code hash_value(const Expression &Value)
Definition GVN.cpp:173
SmallVector< uint32_t, 4 > VarArgs
Definition GVN.cpp:152
AttributeList Attrs
Definition GVN.cpp:154
Expression(uint32_t Op=~2U)
Definition GVN.cpp:156
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:89
Represents an AvailableValue which can be rematerialized at the end of the associated BasicBlock.
Definition GVN.cpp:292
static AvailableValueInBlock get(BasicBlock *BB, Value *V, unsigned Offset=0)
Definition GVN.cpp:306
static AvailableValueInBlock getUndef(BasicBlock *BB)
Definition GVN.cpp:311
static AvailableValueInBlock get(BasicBlock *BB, AvailableValue &&AV)
Definition GVN.cpp:299
AvailableValue AV
AV - The actual available value.
Definition GVN.cpp:297
static AvailableValueInBlock getSelect(BasicBlock *BB, SelectInst *Sel, Value *V1, Value *V2)
Definition GVN.cpp:315
BasicBlock * BB
BB - The basic block in question.
Definition GVN.cpp:294
Value * MaterializeAdjustedValue(LoadInst *Load) const
Emit code at the end of this block to adjust the value defined here to the specified type.
Definition GVN.cpp:322
Represents a particular available value that we know how to materialize.
Definition GVN.cpp:196
unsigned Offset
Offset - The byte offset in Val that is interesting for the load query.
Definition GVN.cpp:213
bool isSimpleValue() const
Definition GVN.cpp:259
static AvailableValue getSelect(SelectInst *Sel, Value *V1, Value *V2)
Definition GVN.cpp:249
bool isCoercedLoadValue() const
Definition GVN.cpp:260
static AvailableValue get(Value *V, unsigned Offset=0)
Definition GVN.cpp:217
ValType Kind
Kind of the live-out value.
Definition GVN.cpp:210
LoadInst * getCoercedLoadValue() const
Definition GVN.cpp:270
static AvailableValue getLoad(LoadInst *Load, unsigned Offset=0)
Definition GVN.cpp:233
static AvailableValue getMI(MemIntrinsic *MI, unsigned Offset=0)
Definition GVN.cpp:225
bool isUndefValue() const
Definition GVN.cpp:262
bool isSelectValue() const
Definition GVN.cpp:263
Value * Val
Val - The value that is live out of the block.
Definition GVN.cpp:208
Value * V1
V1, V2 - The dominating non-clobbered values of SelectVal.
Definition GVN.cpp:215
static AvailableValue getUndef()
Definition GVN.cpp:241
SelectInst * getSelectValue() const
Definition GVN.cpp:280
Value * getSimpleValue() const
Definition GVN.cpp:265
bool isMemIntrinValue() const
Definition GVN.cpp:261
MemIntrinsic * getMemIntrinValue() const
Definition GVN.cpp:275
Value * MaterializeAdjustedValue(LoadInst *Load, Instruction *InsertPt) const
Emit code at the specified insertion point to adjust the value defined here to the specified type.
Definition GVN.cpp:1141