LLVM 20.0.0git
GVN.cpp
Go to the documentation of this file.
1//===- GVN.cpp - Eliminate redundant values and loads ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs global value numbering to eliminate fully redundant
10// instructions. It also performs simple dead load elimination.
11//
12// Note that this pass does the value numbering itself; it does not use the
13// ValueNumbering analysis passes.
14//
15//===----------------------------------------------------------------------===//
16
18#include "llvm/ADT/DenseMap.h"
20#include "llvm/ADT/Hashing.h"
21#include "llvm/ADT/MapVector.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/SetVector.h"
27#include "llvm/ADT/Statistic.h"
31#include "llvm/Analysis/CFG.h"
36#include "llvm/Analysis/Loads.h"
46#include "llvm/IR/Attributes.h"
47#include "llvm/IR/BasicBlock.h"
48#include "llvm/IR/Constant.h"
49#include "llvm/IR/Constants.h"
50#include "llvm/IR/DebugLoc.h"
51#include "llvm/IR/Dominators.h"
52#include "llvm/IR/Function.h"
53#include "llvm/IR/InstrTypes.h"
54#include "llvm/IR/Instruction.h"
57#include "llvm/IR/LLVMContext.h"
58#include "llvm/IR/Metadata.h"
59#include "llvm/IR/Module.h"
60#include "llvm/IR/PassManager.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/Value.h"
66#include "llvm/Pass.h"
70#include "llvm/Support/Debug.h"
77#include <algorithm>
78#include <cassert>
79#include <cstdint>
80#include <optional>
81#include <utility>
82
83using namespace llvm;
84using namespace llvm::gvn;
85using namespace llvm::VNCoercion;
86using namespace PatternMatch;
87
88#define DEBUG_TYPE "gvn"
89
90STATISTIC(NumGVNInstr, "Number of instructions deleted");
91STATISTIC(NumGVNLoad, "Number of loads deleted");
92STATISTIC(NumGVNPRE, "Number of instructions PRE'd");
93STATISTIC(NumGVNBlocks, "Number of blocks merged");
94STATISTIC(NumGVNSimpl, "Number of instructions simplified");
95STATISTIC(NumGVNEqProp, "Number of equalities propagated");
96STATISTIC(NumPRELoad, "Number of loads PRE'd");
97STATISTIC(NumPRELoopLoad, "Number of loop loads PRE'd");
98STATISTIC(NumPRELoadMoved2CEPred,
99 "Number of loads moved to predecessor of a critical edge in PRE");
100
101STATISTIC(IsValueFullyAvailableInBlockNumSpeculationsMax,
102 "Number of blocks speculated as available in "
103 "IsValueFullyAvailableInBlock(), max");
104STATISTIC(MaxBBSpeculationCutoffReachedTimes,
105 "Number of times we we reached gvn-max-block-speculations cut-off "
106 "preventing further exploration");
107
108static cl::opt<bool> GVNEnablePRE("enable-pre", cl::init(true), cl::Hidden);
109static cl::opt<bool> GVNEnableLoadPRE("enable-load-pre", cl::init(true));
110static cl::opt<bool> GVNEnableLoadInLoopPRE("enable-load-in-loop-pre",
111 cl::init(true));
112static cl::opt<bool>
113GVNEnableSplitBackedgeInLoadPRE("enable-split-backedge-in-load-pre",
114 cl::init(false));
115static cl::opt<bool> GVNEnableMemDep("enable-gvn-memdep", cl::init(true));
116
118 "gvn-max-num-deps", cl::Hidden, cl::init(100),
119 cl::desc("Max number of dependences to attempt Load PRE (default = 100)"));
120
121// This is based on IsValueFullyAvailableInBlockNumSpeculationsMax stat.
123 "gvn-max-block-speculations", cl::Hidden, cl::init(600),
124 cl::desc("Max number of blocks we're willing to speculate on (and recurse "
125 "into) when deducing if a value is fully available or not in GVN "
126 "(default = 600)"));
127
129 "gvn-max-num-visited-insts", cl::Hidden, cl::init(100),
130 cl::desc("Max number of visited instructions when trying to find "
131 "dominating value of select dependency (default = 100)"));
132
134 "gvn-max-num-insns", cl::Hidden, cl::init(100),
135 cl::desc("Max number of instructions to scan in each basic block in GVN "
136 "(default = 100)"));
137
140 bool commutative = false;
141 // The type is not necessarily the result type of the expression, it may be
142 // any additional type needed to disambiguate the expression.
143 Type *type = nullptr;
145
146 Expression(uint32_t o = ~2U) : opcode(o) {}
147
148 bool operator==(const Expression &other) const {
149 if (opcode != other.opcode)
150 return false;
151 if (opcode == ~0U || opcode == ~1U)
152 return true;
153 if (type != other.type)
154 return false;
155 if (varargs != other.varargs)
156 return false;
157 return true;
158 }
159
161 return hash_combine(
162 Value.opcode, Value.type,
163 hash_combine_range(Value.varargs.begin(), Value.varargs.end()));
164 }
165};
166
167namespace llvm {
168
169template <> struct DenseMapInfo<GVNPass::Expression> {
170 static inline GVNPass::Expression getEmptyKey() { return ~0U; }
171 static inline GVNPass::Expression getTombstoneKey() { return ~1U; }
172
173 static unsigned getHashValue(const GVNPass::Expression &e) {
174 using llvm::hash_value;
175
176 return static_cast<unsigned>(hash_value(e));
177 }
178
179 static bool isEqual(const GVNPass::Expression &LHS,
180 const GVNPass::Expression &RHS) {
181 return LHS == RHS;
182 }
183};
184
185} // end namespace llvm
186
187/// Represents a particular available value that we know how to materialize.
188/// Materialization of an AvailableValue never fails. An AvailableValue is
189/// implicitly associated with a rematerialization point which is the
190/// location of the instruction from which it was formed.
192 enum class ValType {
193 SimpleVal, // A simple offsetted value that is accessed.
194 LoadVal, // A value produced by a load.
195 MemIntrin, // A memory intrinsic which is loaded from.
196 UndefVal, // A UndefValue representing a value from dead block (which
197 // is not yet physically removed from the CFG).
198 SelectVal, // A pointer select which is loaded from and for which the load
199 // can be replace by a value select.
200 };
201
202 /// Val - The value that is live out of the block.
204 /// Kind of the live-out value.
206
207 /// Offset - The byte offset in Val that is interesting for the load query.
208 unsigned Offset = 0;
209 /// V1, V2 - The dominating non-clobbered values of SelectVal.
210 Value *V1 = nullptr, *V2 = nullptr;
211
212 static AvailableValue get(Value *V, unsigned Offset = 0) {
213 AvailableValue Res;
214 Res.Val = V;
216 Res.Offset = Offset;
217 return Res;
218 }
219
220 static AvailableValue getMI(MemIntrinsic *MI, unsigned Offset = 0) {
221 AvailableValue Res;
222 Res.Val = MI;
224 Res.Offset = Offset;
225 return Res;
226 }
227
228 static AvailableValue getLoad(LoadInst *Load, unsigned Offset = 0) {
229 AvailableValue Res;
230 Res.Val = Load;
232 Res.Offset = Offset;
233 return Res;
234 }
235
237 AvailableValue Res;
238 Res.Val = nullptr;
240 Res.Offset = 0;
241 return Res;
242 }
243
245 AvailableValue Res;
246 Res.Val = Sel;
248 Res.Offset = 0;
249 Res.V1 = V1;
250 Res.V2 = V2;
251 return Res;
252 }
253
254 bool isSimpleValue() const { return Kind == ValType::SimpleVal; }
255 bool isCoercedLoadValue() const { return Kind == ValType::LoadVal; }
256 bool isMemIntrinValue() const { return Kind == ValType::MemIntrin; }
257 bool isUndefValue() const { return Kind == ValType::UndefVal; }
258 bool isSelectValue() const { return Kind == ValType::SelectVal; }
259
261 assert(isSimpleValue() && "Wrong accessor");
262 return Val;
263 }
264
266 assert(isCoercedLoadValue() && "Wrong accessor");
267 return cast<LoadInst>(Val);
268 }
269
271 assert(isMemIntrinValue() && "Wrong accessor");
272 return cast<MemIntrinsic>(Val);
273 }
274
276 assert(isSelectValue() && "Wrong accessor");
277 return cast<SelectInst>(Val);
278 }
279
280 /// Emit code at the specified insertion point to adjust the value defined
281 /// here to the specified type. This handles various coercion cases.
283 GVNPass &gvn) const;
284};
285
286/// Represents an AvailableValue which can be rematerialized at the end of
287/// the associated BasicBlock.
289 /// BB - The basic block in question.
290 BasicBlock *BB = nullptr;
291
292 /// AV - The actual available value
294
297 Res.BB = BB;
298 Res.AV = std::move(AV);
299 return Res;
300 }
301
303 unsigned Offset = 0) {
304 return get(BB, AvailableValue::get(V, Offset));
305 }
306
309 }
310
312 Value *V1, Value *V2) {
313 return get(BB, AvailableValue::getSelect(Sel, V1, V2));
314 }
315
316 /// Emit code at the end of this block to adjust the value defined here to
317 /// the specified type. This handles various coercion cases.
319 return AV.MaterializeAdjustedValue(Load, BB->getTerminator(), gvn);
320 }
321};
322
323//===----------------------------------------------------------------------===//
324// ValueTable Internal Functions
325//===----------------------------------------------------------------------===//
326
327GVNPass::Expression GVNPass::ValueTable::createExpr(Instruction *I) {
328 Expression e;
329 e.type = I->getType();
330 e.opcode = I->getOpcode();
331 if (const GCRelocateInst *GCR = dyn_cast<GCRelocateInst>(I)) {
332 // gc.relocate is 'special' call: its second and third operands are
333 // not real values, but indices into statepoint's argument list.
334 // Use the refered to values for purposes of identity.
335 e.varargs.push_back(lookupOrAdd(GCR->getOperand(0)));
336 e.varargs.push_back(lookupOrAdd(GCR->getBasePtr()));
337 e.varargs.push_back(lookupOrAdd(GCR->getDerivedPtr()));
338 } else {
339 for (Use &Op : I->operands())
340 e.varargs.push_back(lookupOrAdd(Op));
341 }
342 if (I->isCommutative()) {
343 // Ensure that commutative instructions that only differ by a permutation
344 // of their operands get the same value number by sorting the operand value
345 // numbers. Since commutative operands are the 1st two operands it is more
346 // efficient to sort by hand rather than using, say, std::sort.
347 assert(I->getNumOperands() >= 2 && "Unsupported commutative instruction!");
348 if (e.varargs[0] > e.varargs[1])
349 std::swap(e.varargs[0], e.varargs[1]);
350 e.commutative = true;
351 }
352
353 if (auto *C = dyn_cast<CmpInst>(I)) {
354 // Sort the operand value numbers so x<y and y>x get the same value number.
355 CmpInst::Predicate Predicate = C->getPredicate();
356 if (e.varargs[0] > e.varargs[1]) {
357 std::swap(e.varargs[0], e.varargs[1]);
359 }
360 e.opcode = (C->getOpcode() << 8) | Predicate;
361 e.commutative = true;
362 } else if (auto *E = dyn_cast<InsertValueInst>(I)) {
363 e.varargs.append(E->idx_begin(), E->idx_end());
364 } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
365 ArrayRef<int> ShuffleMask = SVI->getShuffleMask();
366 e.varargs.append(ShuffleMask.begin(), ShuffleMask.end());
367 }
368
369 return e;
370}
371
372GVNPass::Expression GVNPass::ValueTable::createCmpExpr(
373 unsigned Opcode, CmpInst::Predicate Predicate, Value *LHS, Value *RHS) {
374 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
375 "Not a comparison!");
378 e.varargs.push_back(lookupOrAdd(LHS));
379 e.varargs.push_back(lookupOrAdd(RHS));
380
381 // Sort the operand value numbers so x<y and y>x get the same value number.
382 if (e.varargs[0] > e.varargs[1]) {
383 std::swap(e.varargs[0], e.varargs[1]);
385 }
386 e.opcode = (Opcode << 8) | Predicate;
387 e.commutative = true;
388 return e;
389}
390
392GVNPass::ValueTable::createExtractvalueExpr(ExtractValueInst *EI) {
393 assert(EI && "Not an ExtractValueInst?");
395 e.type = EI->getType();
396 e.opcode = 0;
397
398 WithOverflowInst *WO = dyn_cast<WithOverflowInst>(EI->getAggregateOperand());
399 if (WO != nullptr && EI->getNumIndices() == 1 && *EI->idx_begin() == 0) {
400 // EI is an extract from one of our with.overflow intrinsics. Synthesize
401 // a semantically equivalent expression instead of an extract value
402 // expression.
403 e.opcode = WO->getBinaryOp();
404 e.varargs.push_back(lookupOrAdd(WO->getLHS()));
405 e.varargs.push_back(lookupOrAdd(WO->getRHS()));
406 return e;
407 }
408
409 // Not a recognised intrinsic. Fall back to producing an extract value
410 // expression.
411 e.opcode = EI->getOpcode();
412 for (Use &Op : EI->operands())
413 e.varargs.push_back(lookupOrAdd(Op));
414
415 append_range(e.varargs, EI->indices());
416
417 return e;
418}
419
420GVNPass::Expression GVNPass::ValueTable::createGEPExpr(GetElementPtrInst *GEP) {
421 Expression E;
422 Type *PtrTy = GEP->getType()->getScalarType();
423 const DataLayout &DL = GEP->getDataLayout();
424 unsigned BitWidth = DL.getIndexTypeSizeInBits(PtrTy);
425 MapVector<Value *, APInt> VariableOffsets;
426 APInt ConstantOffset(BitWidth, 0);
427 if (GEP->collectOffset(DL, BitWidth, VariableOffsets, ConstantOffset)) {
428 // Convert into offset representation, to recognize equivalent address
429 // calculations that use different type encoding.
430 LLVMContext &Context = GEP->getContext();
431 E.opcode = GEP->getOpcode();
432 E.type = nullptr;
433 E.varargs.push_back(lookupOrAdd(GEP->getPointerOperand()));
434 for (const auto &Pair : VariableOffsets) {
435 E.varargs.push_back(lookupOrAdd(Pair.first));
436 E.varargs.push_back(lookupOrAdd(ConstantInt::get(Context, Pair.second)));
437 }
438 if (!ConstantOffset.isZero())
439 E.varargs.push_back(
440 lookupOrAdd(ConstantInt::get(Context, ConstantOffset)));
441 } else {
442 // If converting to offset representation fails (for scalable vectors),
443 // fall back to type-based implementation:
444 E.opcode = GEP->getOpcode();
445 E.type = GEP->getSourceElementType();
446 for (Use &Op : GEP->operands())
447 E.varargs.push_back(lookupOrAdd(Op));
448 }
449 return E;
450}
451
452//===----------------------------------------------------------------------===//
453// ValueTable External Functions
454//===----------------------------------------------------------------------===//
455
456GVNPass::ValueTable::ValueTable() = default;
457GVNPass::ValueTable::ValueTable(const ValueTable &) = default;
458GVNPass::ValueTable::ValueTable(ValueTable &&) = default;
459GVNPass::ValueTable::~ValueTable() = default;
462
463/// add - Insert a value into the table with a specified value number.
464void GVNPass::ValueTable::add(Value *V, uint32_t num) {
465 valueNumbering.insert(std::make_pair(V, num));
466 if (PHINode *PN = dyn_cast<PHINode>(V))
467 NumberingPhi[num] = PN;
468}
469
470uint32_t GVNPass::ValueTable::lookupOrAddCall(CallInst *C) {
471 // FIXME: Currently the calls which may access the thread id may
472 // be considered as not accessing the memory. But this is
473 // problematic for coroutines, since coroutines may resume in a
474 // different thread. So we disable the optimization here for the
475 // correctness. However, it may block many other correct
476 // optimizations. Revert this one when we detect the memory
477 // accessing kind more precisely.
478 if (C->getFunction()->isPresplitCoroutine()) {
479 valueNumbering[C] = nextValueNumber;
480 return nextValueNumber++;
481 }
482
483 // Do not combine convergent calls since they implicitly depend on the set of
484 // threads that is currently executing, and they might be in different basic
485 // blocks.
486 if (C->isConvergent()) {
487 valueNumbering[C] = nextValueNumber;
488 return nextValueNumber++;
489 }
490
491 if (AA->doesNotAccessMemory(C)) {
492 Expression exp = createExpr(C);
493 uint32_t e = assignExpNewValueNum(exp).first;
494 valueNumbering[C] = e;
495 return e;
496 }
497
498 if (MD && AA->onlyReadsMemory(C)) {
499 Expression exp = createExpr(C);
500 auto ValNum = assignExpNewValueNum(exp);
501 if (ValNum.second) {
502 valueNumbering[C] = ValNum.first;
503 return ValNum.first;
504 }
505
506 MemDepResult local_dep = MD->getDependency(C);
507
508 if (!local_dep.isDef() && !local_dep.isNonLocal()) {
509 valueNumbering[C] = nextValueNumber;
510 return nextValueNumber++;
511 }
512
513 if (local_dep.isDef()) {
514 // For masked load/store intrinsics, the local_dep may actually be
515 // a normal load or store instruction.
516 CallInst *local_cdep = dyn_cast<CallInst>(local_dep.getInst());
517
518 if (!local_cdep || local_cdep->arg_size() != C->arg_size()) {
519 valueNumbering[C] = nextValueNumber;
520 return nextValueNumber++;
521 }
522
523 for (unsigned i = 0, e = C->arg_size(); i < e; ++i) {
524 uint32_t c_vn = lookupOrAdd(C->getArgOperand(i));
525 uint32_t cd_vn = lookupOrAdd(local_cdep->getArgOperand(i));
526 if (c_vn != cd_vn) {
527 valueNumbering[C] = nextValueNumber;
528 return nextValueNumber++;
529 }
530 }
531
532 uint32_t v = lookupOrAdd(local_cdep);
533 valueNumbering[C] = v;
534 return v;
535 }
536
537 // Non-local case.
540 // FIXME: Move the checking logic to MemDep!
541 CallInst* cdep = nullptr;
542
543 // Check to see if we have a single dominating call instruction that is
544 // identical to C.
545 for (const NonLocalDepEntry &I : deps) {
546 if (I.getResult().isNonLocal())
547 continue;
548
549 // We don't handle non-definitions. If we already have a call, reject
550 // instruction dependencies.
551 if (!I.getResult().isDef() || cdep != nullptr) {
552 cdep = nullptr;
553 break;
554 }
555
556 CallInst *NonLocalDepCall = dyn_cast<CallInst>(I.getResult().getInst());
557 // FIXME: All duplicated with non-local case.
558 if (NonLocalDepCall && DT->properlyDominates(I.getBB(), C->getParent())) {
559 cdep = NonLocalDepCall;
560 continue;
561 }
562
563 cdep = nullptr;
564 break;
565 }
566
567 if (!cdep) {
568 valueNumbering[C] = nextValueNumber;
569 return nextValueNumber++;
570 }
571
572 if (cdep->arg_size() != C->arg_size()) {
573 valueNumbering[C] = nextValueNumber;
574 return nextValueNumber++;
575 }
576 for (unsigned i = 0, e = C->arg_size(); i < e; ++i) {
577 uint32_t c_vn = lookupOrAdd(C->getArgOperand(i));
578 uint32_t cd_vn = lookupOrAdd(cdep->getArgOperand(i));
579 if (c_vn != cd_vn) {
580 valueNumbering[C] = nextValueNumber;
581 return nextValueNumber++;
582 }
583 }
584
585 uint32_t v = lookupOrAdd(cdep);
586 valueNumbering[C] = v;
587 return v;
588 }
589
590 valueNumbering[C] = nextValueNumber;
591 return nextValueNumber++;
592}
593
594/// Returns true if a value number exists for the specified value.
595bool GVNPass::ValueTable::exists(Value *V) const {
596 return valueNumbering.contains(V);
597}
598
599/// lookup_or_add - Returns the value number for the specified value, assigning
600/// it a new number if it did not have one before.
601uint32_t GVNPass::ValueTable::lookupOrAdd(Value *V) {
602 DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
603 if (VI != valueNumbering.end())
604 return VI->second;
605
606 auto *I = dyn_cast<Instruction>(V);
607 if (!I) {
608 valueNumbering[V] = nextValueNumber;
609 return nextValueNumber++;
610 }
611
612 Expression exp;
613 switch (I->getOpcode()) {
614 case Instruction::Call:
615 return lookupOrAddCall(cast<CallInst>(I));
616 case Instruction::FNeg:
617 case Instruction::Add:
618 case Instruction::FAdd:
619 case Instruction::Sub:
620 case Instruction::FSub:
621 case Instruction::Mul:
622 case Instruction::FMul:
623 case Instruction::UDiv:
624 case Instruction::SDiv:
625 case Instruction::FDiv:
626 case Instruction::URem:
627 case Instruction::SRem:
628 case Instruction::FRem:
629 case Instruction::Shl:
630 case Instruction::LShr:
631 case Instruction::AShr:
632 case Instruction::And:
633 case Instruction::Or:
634 case Instruction::Xor:
635 case Instruction::ICmp:
636 case Instruction::FCmp:
637 case Instruction::Trunc:
638 case Instruction::ZExt:
639 case Instruction::SExt:
640 case Instruction::FPToUI:
641 case Instruction::FPToSI:
642 case Instruction::UIToFP:
643 case Instruction::SIToFP:
644 case Instruction::FPTrunc:
645 case Instruction::FPExt:
646 case Instruction::PtrToInt:
647 case Instruction::IntToPtr:
648 case Instruction::AddrSpaceCast:
649 case Instruction::BitCast:
650 case Instruction::Select:
651 case Instruction::Freeze:
652 case Instruction::ExtractElement:
653 case Instruction::InsertElement:
654 case Instruction::ShuffleVector:
655 case Instruction::InsertValue:
656 exp = createExpr(I);
657 break;
658 case Instruction::GetElementPtr:
659 exp = createGEPExpr(cast<GetElementPtrInst>(I));
660 break;
661 case Instruction::ExtractValue:
662 exp = createExtractvalueExpr(cast<ExtractValueInst>(I));
663 break;
664 case Instruction::PHI:
665 valueNumbering[V] = nextValueNumber;
666 NumberingPhi[nextValueNumber] = cast<PHINode>(V);
667 return nextValueNumber++;
668 default:
669 valueNumbering[V] = nextValueNumber;
670 return nextValueNumber++;
671 }
672
673 uint32_t e = assignExpNewValueNum(exp).first;
674 valueNumbering[V] = e;
675 return e;
676}
677
678/// Returns the value number of the specified value. Fails if
679/// the value has not yet been numbered.
680uint32_t GVNPass::ValueTable::lookup(Value *V, bool Verify) const {
681 DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V);
682 if (Verify) {
683 assert(VI != valueNumbering.end() && "Value not numbered?");
684 return VI->second;
685 }
686 return (VI != valueNumbering.end()) ? VI->second : 0;
687}
688
689/// Returns the value number of the given comparison,
690/// assigning it a new number if it did not have one before. Useful when
691/// we deduced the result of a comparison, but don't immediately have an
692/// instruction realizing that comparison to hand.
693uint32_t GVNPass::ValueTable::lookupOrAddCmp(unsigned Opcode,
694 CmpInst::Predicate Predicate,
695 Value *LHS, Value *RHS) {
696 Expression exp = createCmpExpr(Opcode, Predicate, LHS, RHS);
697 return assignExpNewValueNum(exp).first;
698}
699
700/// Remove all entries from the ValueTable.
702 valueNumbering.clear();
703 expressionNumbering.clear();
704 NumberingPhi.clear();
705 PhiTranslateTable.clear();
706 nextValueNumber = 1;
707 Expressions.clear();
708 ExprIdx.clear();
709 nextExprNumber = 0;
710}
711
712/// Remove a value from the value numbering.
714 uint32_t Num = valueNumbering.lookup(V);
715 valueNumbering.erase(V);
716 // If V is PHINode, V <--> value number is an one-to-one mapping.
717 if (isa<PHINode>(V))
718 NumberingPhi.erase(Num);
719}
720
721/// verifyRemoved - Verify that the value is removed from all internal data
722/// structures.
723void GVNPass::ValueTable::verifyRemoved(const Value *V) const {
724 assert(!valueNumbering.contains(V) &&
725 "Inst still occurs in value numbering map!");
726}
727
728//===----------------------------------------------------------------------===//
729// LeaderMap External Functions
730//===----------------------------------------------------------------------===//
731
732/// Push a new Value to the LeaderTable onto the list for its value number.
733void GVNPass::LeaderMap::insert(uint32_t N, Value *V, const BasicBlock *BB) {
734 LeaderListNode &Curr = NumToLeaders[N];
735 if (!Curr.Entry.Val) {
736 Curr.Entry.Val = V;
737 Curr.Entry.BB = BB;
738 return;
739 }
740
741 LeaderListNode *Node = TableAllocator.Allocate<LeaderListNode>();
742 Node->Entry.Val = V;
743 Node->Entry.BB = BB;
744 Node->Next = Curr.Next;
745 Curr.Next = Node;
746}
747
748/// Scan the list of values corresponding to a given
749/// value number, and remove the given instruction if encountered.
750void GVNPass::LeaderMap::erase(uint32_t N, Instruction *I,
751 const BasicBlock *BB) {
752 LeaderListNode *Prev = nullptr;
753 LeaderListNode *Curr = &NumToLeaders[N];
754
755 while (Curr && (Curr->Entry.Val != I || Curr->Entry.BB != BB)) {
756 Prev = Curr;
757 Curr = Curr->Next;
758 }
759
760 if (!Curr)
761 return;
762
763 if (Prev) {
764 Prev->Next = Curr->Next;
765 } else {
766 if (!Curr->Next) {
767 Curr->Entry.Val = nullptr;
768 Curr->Entry.BB = nullptr;
769 } else {
770 LeaderListNode *Next = Curr->Next;
771 Curr->Entry.Val = Next->Entry.Val;
772 Curr->Entry.BB = Next->Entry.BB;
773 Curr->Next = Next->Next;
774 }
775 }
776}
777
778void GVNPass::LeaderMap::verifyRemoved(const Value *V) const {
779 // Walk through the value number scope to make sure the instruction isn't
780 // ferreted away in it.
781 for (const auto &I : NumToLeaders) {
782 (void)I;
783 assert(I.second.Entry.Val != V && "Inst still in value numbering scope!");
784 assert(
785 std::none_of(leader_iterator(&I.second), leader_iterator(nullptr),
786 [=](const LeaderTableEntry &E) { return E.Val == V; }) &&
787 "Inst still in value numbering scope!");
788 }
789}
790
791//===----------------------------------------------------------------------===//
792// GVN Pass
793//===----------------------------------------------------------------------===//
794
796 return Options.AllowPRE.value_or(GVNEnablePRE);
797}
798
800 return Options.AllowLoadPRE.value_or(GVNEnableLoadPRE);
801}
802
804 return Options.AllowLoadInLoopPRE.value_or(GVNEnableLoadInLoopPRE);
805}
806
808 return Options.AllowLoadPRESplitBackedge.value_or(
810}
811
813 return Options.AllowMemDep.value_or(GVNEnableMemDep);
814}
815
817 // FIXME: The order of evaluation of these 'getResult' calls is very
818 // significant! Re-ordering these variables will cause GVN when run alone to
819 // be less effective! We should fix memdep and basic-aa to not exhibit this
820 // behavior, but until then don't change the order here.
821 auto &AC = AM.getResult<AssumptionAnalysis>(F);
822 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
823 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
824 auto &AA = AM.getResult<AAManager>(F);
825 auto *MemDep =
827 auto &LI = AM.getResult<LoopAnalysis>(F);
828 auto *MSSA = AM.getCachedResult<MemorySSAAnalysis>(F);
830 bool Changed = runImpl(F, AC, DT, TLI, AA, MemDep, LI, &ORE,
831 MSSA ? &MSSA->getMSSA() : nullptr);
832 if (!Changed)
833 return PreservedAnalyses::all();
837 if (MSSA)
840 return PA;
841}
842
844 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
845 static_cast<PassInfoMixin<GVNPass> *>(this)->printPipeline(
846 OS, MapClassName2PassName);
847
848 OS << '<';
849 if (Options.AllowPRE != std::nullopt)
850 OS << (*Options.AllowPRE ? "" : "no-") << "pre;";
851 if (Options.AllowLoadPRE != std::nullopt)
852 OS << (*Options.AllowLoadPRE ? "" : "no-") << "load-pre;";
853 if (Options.AllowLoadPRESplitBackedge != std::nullopt)
854 OS << (*Options.AllowLoadPRESplitBackedge ? "" : "no-")
855 << "split-backedge-load-pre;";
856 if (Options.AllowMemDep != std::nullopt)
857 OS << (*Options.AllowMemDep ? "" : "no-") << "memdep";
858 OS << '>';
859}
860
861#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
862LLVM_DUMP_METHOD void GVNPass::dump(DenseMap<uint32_t, Value *> &d) const {
863 errs() << "{\n";
864 for (auto &I : d) {
865 errs() << I.first << "\n";
866 I.second->dump();
867 }
868 errs() << "}\n";
869}
870#endif
871
872enum class AvailabilityState : char {
873 /// We know the block *is not* fully available. This is a fixpoint.
874 Unavailable = 0,
875 /// We know the block *is* fully available. This is a fixpoint.
876 Available = 1,
877 /// We do not know whether the block is fully available or not,
878 /// but we are currently speculating that it will be.
879 /// If it would have turned out that the block was, in fact, not fully
880 /// available, this would have been cleaned up into an Unavailable.
882};
883
884/// Return true if we can prove that the value
885/// we're analyzing is fully available in the specified block. As we go, keep
886/// track of which blocks we know are fully alive in FullyAvailableBlocks. This
887/// map is actually a tri-state map with the following values:
888/// 0) we know the block *is not* fully available.
889/// 1) we know the block *is* fully available.
890/// 2) we do not know whether the block is fully available or not, but we are
891/// currently speculating that it will be.
893 BasicBlock *BB,
894 DenseMap<BasicBlock *, AvailabilityState> &FullyAvailableBlocks) {
896 std::optional<BasicBlock *> UnavailableBB;
897
898 // The number of times we didn't find an entry for a block in a map and
899 // optimistically inserted an entry marking block as speculatively available.
900 unsigned NumNewNewSpeculativelyAvailableBBs = 0;
901
902#ifndef NDEBUG
903 SmallSet<BasicBlock *, 32> NewSpeculativelyAvailableBBs;
905#endif
906
907 Worklist.emplace_back(BB);
908 while (!Worklist.empty()) {
909 BasicBlock *CurrBB = Worklist.pop_back_val(); // LoadFO - depth-first!
910 // Optimistically assume that the block is Speculatively Available and check
911 // to see if we already know about this block in one lookup.
912 std::pair<DenseMap<BasicBlock *, AvailabilityState>::iterator, bool> IV =
913 FullyAvailableBlocks.try_emplace(
914 CurrBB, AvailabilityState::SpeculativelyAvailable);
915 AvailabilityState &State = IV.first->second;
916
917 // Did the entry already exist for this block?
918 if (!IV.second) {
919 if (State == AvailabilityState::Unavailable) {
920 UnavailableBB = CurrBB;
921 break; // Backpropagate unavailability info.
922 }
923
924#ifndef NDEBUG
925 AvailableBBs.emplace_back(CurrBB);
926#endif
927 continue; // Don't recurse further, but continue processing worklist.
928 }
929
930 // No entry found for block.
931 ++NumNewNewSpeculativelyAvailableBBs;
932 bool OutOfBudget = NumNewNewSpeculativelyAvailableBBs > MaxBBSpeculations;
933
934 // If we have exhausted our budget, mark this block as unavailable.
935 // Also, if this block has no predecessors, the value isn't live-in here.
936 if (OutOfBudget || pred_empty(CurrBB)) {
937 MaxBBSpeculationCutoffReachedTimes += (int)OutOfBudget;
938 State = AvailabilityState::Unavailable;
939 UnavailableBB = CurrBB;
940 break; // Backpropagate unavailability info.
941 }
942
943 // Tentatively consider this block as speculatively available.
944#ifndef NDEBUG
945 NewSpeculativelyAvailableBBs.insert(CurrBB);
946#endif
947 // And further recurse into block's predecessors, in depth-first order!
948 Worklist.append(pred_begin(CurrBB), pred_end(CurrBB));
949 }
950
951#if LLVM_ENABLE_STATS
952 IsValueFullyAvailableInBlockNumSpeculationsMax.updateMax(
953 NumNewNewSpeculativelyAvailableBBs);
954#endif
955
956 // If the block isn't marked as fixpoint yet
957 // (the Unavailable and Available states are fixpoints)
958 auto MarkAsFixpointAndEnqueueSuccessors =
959 [&](BasicBlock *BB, AvailabilityState FixpointState) {
960 auto It = FullyAvailableBlocks.find(BB);
961 if (It == FullyAvailableBlocks.end())
962 return; // Never queried this block, leave as-is.
963 switch (AvailabilityState &State = It->second) {
964 case AvailabilityState::Unavailable:
965 case AvailabilityState::Available:
966 return; // Don't backpropagate further, continue processing worklist.
967 case AvailabilityState::SpeculativelyAvailable: // Fix it!
968 State = FixpointState;
969#ifndef NDEBUG
970 assert(NewSpeculativelyAvailableBBs.erase(BB) &&
971 "Found a speculatively available successor leftover?");
972#endif
973 // Queue successors for further processing.
974 Worklist.append(succ_begin(BB), succ_end(BB));
975 return;
976 }
977 };
978
979 if (UnavailableBB) {
980 // Okay, we have encountered an unavailable block.
981 // Mark speculatively available blocks reachable from UnavailableBB as
982 // unavailable as well. Paths are terminated when they reach blocks not in
983 // FullyAvailableBlocks or they are not marked as speculatively available.
984 Worklist.clear();
985 Worklist.append(succ_begin(*UnavailableBB), succ_end(*UnavailableBB));
986 while (!Worklist.empty())
987 MarkAsFixpointAndEnqueueSuccessors(Worklist.pop_back_val(),
988 AvailabilityState::Unavailable);
989 }
990
991#ifndef NDEBUG
992 Worklist.clear();
993 for (BasicBlock *AvailableBB : AvailableBBs)
994 Worklist.append(succ_begin(AvailableBB), succ_end(AvailableBB));
995 while (!Worklist.empty())
996 MarkAsFixpointAndEnqueueSuccessors(Worklist.pop_back_val(),
997 AvailabilityState::Available);
998
999 assert(NewSpeculativelyAvailableBBs.empty() &&
1000 "Must have fixed all the new speculatively available blocks.");
1001#endif
1002
1003 return !UnavailableBB;
1004}
1005
1006/// If the specified OldValue exists in ValuesPerBlock, replace its value with
1007/// NewValue.
1009 SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock, Value *OldValue,
1010 Value *NewValue) {
1011 for (AvailableValueInBlock &V : ValuesPerBlock) {
1012 if (V.AV.Val == OldValue)
1013 V.AV.Val = NewValue;
1014 if (V.AV.isSelectValue()) {
1015 if (V.AV.V1 == OldValue)
1016 V.AV.V1 = NewValue;
1017 if (V.AV.V2 == OldValue)
1018 V.AV.V2 = NewValue;
1019 }
1020 }
1021}
1022
1023/// Given a set of loads specified by ValuesPerBlock,
1024/// construct SSA form, allowing us to eliminate Load. This returns the value
1025/// that should be used at Load's definition site.
1026static Value *
1029 GVNPass &gvn) {
1030 // Check for the fully redundant, dominating load case. In this case, we can
1031 // just use the dominating value directly.
1032 if (ValuesPerBlock.size() == 1 &&
1033 gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB,
1034 Load->getParent())) {
1035 assert(!ValuesPerBlock[0].AV.isUndefValue() &&
1036 "Dead BB dominate this block");
1037 return ValuesPerBlock[0].MaterializeAdjustedValue(Load, gvn);
1038 }
1039
1040 // Otherwise, we have to construct SSA form.
1042 SSAUpdater SSAUpdate(&NewPHIs);
1043 SSAUpdate.Initialize(Load->getType(), Load->getName());
1044
1045 for (const AvailableValueInBlock &AV : ValuesPerBlock) {
1046 BasicBlock *BB = AV.BB;
1047
1048 if (AV.AV.isUndefValue())
1049 continue;
1050
1051 if (SSAUpdate.HasValueForBlock(BB))
1052 continue;
1053
1054 // If the value is the load that we will be eliminating, and the block it's
1055 // available in is the block that the load is in, then don't add it as
1056 // SSAUpdater will resolve the value to the relevant phi which may let it
1057 // avoid phi construction entirely if there's actually only one value.
1058 if (BB == Load->getParent() &&
1059 ((AV.AV.isSimpleValue() && AV.AV.getSimpleValue() == Load) ||
1060 (AV.AV.isCoercedLoadValue() && AV.AV.getCoercedLoadValue() == Load)))
1061 continue;
1062
1063 SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(Load, gvn));
1064 }
1065
1066 // Perform PHI construction.
1067 return SSAUpdate.GetValueInMiddleOfBlock(Load->getParent());
1068}
1069
1071 Instruction *InsertPt,
1072 GVNPass &gvn) const {
1073 Value *Res;
1074 Type *LoadTy = Load->getType();
1075 const DataLayout &DL = Load->getDataLayout();
1076 if (isSimpleValue()) {
1077 Res = getSimpleValue();
1078 if (Res->getType() != LoadTy) {
1079 Res = getValueForLoad(Res, Offset, LoadTy, InsertPt, DL);
1080
1081 LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset
1082 << " " << *getSimpleValue() << '\n'
1083 << *Res << '\n'
1084 << "\n\n\n");
1085 }
1086 } else if (isCoercedLoadValue()) {
1087 LoadInst *CoercedLoad = getCoercedLoadValue();
1088 if (CoercedLoad->getType() == LoadTy && Offset == 0) {
1089 Res = CoercedLoad;
1090 combineMetadataForCSE(CoercedLoad, Load, false);
1091 } else {
1092 Res = getValueForLoad(CoercedLoad, Offset, LoadTy, InsertPt, DL);
1093 // We are adding a new user for this load, for which the original
1094 // metadata may not hold. Additionally, the new load may have a different
1095 // size and type, so their metadata cannot be combined in any
1096 // straightforward way.
1097 // Drop all metadata that is not known to cause immediate UB on violation,
1098 // unless the load has !noundef, in which case all metadata violations
1099 // will be promoted to UB.
1100 // TODO: We can combine noalias/alias.scope metadata here, because it is
1101 // independent of the load type.
1102 if (!CoercedLoad->hasMetadata(LLVMContext::MD_noundef))
1103 CoercedLoad->dropUnknownNonDebugMetadata(
1104 {LLVMContext::MD_dereferenceable,
1105 LLVMContext::MD_dereferenceable_or_null,
1106 LLVMContext::MD_invariant_load, LLVMContext::MD_invariant_group});
1107 LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset
1108 << " " << *getCoercedLoadValue() << '\n'
1109 << *Res << '\n'
1110 << "\n\n\n");
1111 }
1112 } else if (isMemIntrinValue()) {
1113 Res = getMemInstValueForLoad(getMemIntrinValue(), Offset, LoadTy,
1114 InsertPt, DL);
1115 LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
1116 << " " << *getMemIntrinValue() << '\n'
1117 << *Res << '\n'
1118 << "\n\n\n");
1119 } else if (isSelectValue()) {
1120 // Introduce a new value select for a load from an eligible pointer select.
1121 SelectInst *Sel = getSelectValue();
1122 assert(V1 && V2 && "both value operands of the select must be present");
1123 Res =
1124 SelectInst::Create(Sel->getCondition(), V1, V2, "", Sel->getIterator());
1125 } else {
1126 llvm_unreachable("Should not materialize value from dead block");
1127 }
1128 assert(Res && "failed to materialize?");
1129 return Res;
1130}
1131
1132static bool isLifetimeStart(const Instruction *Inst) {
1133 if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst))
1134 return II->getIntrinsicID() == Intrinsic::lifetime_start;
1135 return false;
1136}
1137
1138/// Assuming To can be reached from both From and Between, does Between lie on
1139/// every path from From to To?
1140static bool liesBetween(const Instruction *From, Instruction *Between,
1141 const Instruction *To, DominatorTree *DT) {
1142 if (From->getParent() == Between->getParent())
1143 return DT->dominates(From, Between);
1144 SmallSet<BasicBlock *, 1> Exclusion;
1145 Exclusion.insert(Between->getParent());
1146 return !isPotentiallyReachable(From, To, &Exclusion, DT);
1147}
1148
1149/// Try to locate the three instruction involved in a missed
1150/// load-elimination case that is due to an intervening store.
1152 DominatorTree *DT,
1154 using namespace ore;
1155
1156 Instruction *OtherAccess = nullptr;
1157
1158 OptimizationRemarkMissed R(DEBUG_TYPE, "LoadClobbered", Load);
1159 R << "load of type " << NV("Type", Load->getType()) << " not eliminated"
1160 << setExtraArgs();
1161
1162 for (auto *U : Load->getPointerOperand()->users()) {
1163 if (U != Load && (isa<LoadInst>(U) || isa<StoreInst>(U))) {
1164 auto *I = cast<Instruction>(U);
1165 if (I->getFunction() == Load->getFunction() && DT->dominates(I, Load)) {
1166 // Use the most immediately dominating value
1167 if (OtherAccess) {
1168 if (DT->dominates(OtherAccess, I))
1169 OtherAccess = I;
1170 else
1171 assert(U == OtherAccess || DT->dominates(I, OtherAccess));
1172 } else
1173 OtherAccess = I;
1174 }
1175 }
1176 }
1177
1178 if (!OtherAccess) {
1179 // There is no dominating use, check if we can find a closest non-dominating
1180 // use that lies between any other potentially available use and Load.
1181 for (auto *U : Load->getPointerOperand()->users()) {
1182 if (U != Load && (isa<LoadInst>(U) || isa<StoreInst>(U))) {
1183 auto *I = cast<Instruction>(U);
1184 if (I->getFunction() == Load->getFunction() &&
1185 isPotentiallyReachable(I, Load, nullptr, DT)) {
1186 if (OtherAccess) {
1187 if (liesBetween(OtherAccess, I, Load, DT)) {
1188 OtherAccess = I;
1189 } else if (!liesBetween(I, OtherAccess, Load, DT)) {
1190 // These uses are both partially available at Load were it not for
1191 // the clobber, but neither lies strictly after the other.
1192 OtherAccess = nullptr;
1193 break;
1194 } // else: keep current OtherAccess since it lies between U and Load
1195 } else {
1196 OtherAccess = I;
1197 }
1198 }
1199 }
1200 }
1201 }
1202
1203 if (OtherAccess)
1204 R << " in favor of " << NV("OtherAccess", OtherAccess);
1205
1206 R << " because it is clobbered by " << NV("ClobberedBy", DepInfo.getInst());
1207
1208 ORE->emit(R);
1209}
1210
1211// Find non-clobbered value for Loc memory location in extended basic block
1212// (chain of basic blocks with single predecessors) starting From instruction.
1213static Value *findDominatingValue(const MemoryLocation &Loc, Type *LoadTy,
1214 Instruction *From, AAResults *AA) {
1215 uint32_t NumVisitedInsts = 0;
1216 BasicBlock *FromBB = From->getParent();
1217 BatchAAResults BatchAA(*AA);
1218 for (BasicBlock *BB = FromBB; BB; BB = BB->getSinglePredecessor())
1219 for (auto *Inst = BB == FromBB ? From : BB->getTerminator();
1220 Inst != nullptr; Inst = Inst->getPrevNonDebugInstruction()) {
1221 // Stop the search if limit is reached.
1222 if (++NumVisitedInsts > MaxNumVisitedInsts)
1223 return nullptr;
1224 if (isModSet(BatchAA.getModRefInfo(Inst, Loc)))
1225 return nullptr;
1226 if (auto *LI = dyn_cast<LoadInst>(Inst))
1227 if (LI->getPointerOperand() == Loc.Ptr && LI->getType() == LoadTy)
1228 return LI;
1229 }
1230 return nullptr;
1231}
1232
1233std::optional<AvailableValue>
1234GVNPass::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
1235 Value *Address) {
1236 assert(Load->isUnordered() && "rules below are incorrect for ordered access");
1237 assert(DepInfo.isLocal() && "expected a local dependence");
1238
1239 Instruction *DepInst = DepInfo.getInst();
1240
1241 const DataLayout &DL = Load->getDataLayout();
1242 if (DepInfo.isClobber()) {
1243 // If the dependence is to a store that writes to a superset of the bits
1244 // read by the load, we can extract the bits we need for the load from the
1245 // stored value.
1246 if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
1247 // Can't forward from non-atomic to atomic without violating memory model.
1248 if (Address && Load->isAtomic() <= DepSI->isAtomic()) {
1249 int Offset =
1250 analyzeLoadFromClobberingStore(Load->getType(), Address, DepSI, DL);
1251 if (Offset != -1)
1252 return AvailableValue::get(DepSI->getValueOperand(), Offset);
1253 }
1254 }
1255
1256 // Check to see if we have something like this:
1257 // load i32* P
1258 // load i8* (P+1)
1259 // if we have this, replace the later with an extraction from the former.
1260 if (LoadInst *DepLoad = dyn_cast<LoadInst>(DepInst)) {
1261 // If this is a clobber and L is the first instruction in its block, then
1262 // we have the first instruction in the entry block.
1263 // Can't forward from non-atomic to atomic without violating memory model.
1264 if (DepLoad != Load && Address &&
1265 Load->isAtomic() <= DepLoad->isAtomic()) {
1266 Type *LoadType = Load->getType();
1267 int Offset = -1;
1268
1269 // If MD reported clobber, check it was nested.
1270 if (DepInfo.isClobber() &&
1271 canCoerceMustAliasedValueToLoad(DepLoad, LoadType, DL)) {
1272 const auto ClobberOff = MD->getClobberOffset(DepLoad);
1273 // GVN has no deal with a negative offset.
1274 Offset = (ClobberOff == std::nullopt || *ClobberOff < 0)
1275 ? -1
1276 : *ClobberOff;
1277 }
1278 if (Offset == -1)
1279 Offset =
1280 analyzeLoadFromClobberingLoad(LoadType, Address, DepLoad, DL);
1281 if (Offset != -1)
1282 return AvailableValue::getLoad(DepLoad, Offset);
1283 }
1284 }
1285
1286 // If the clobbering value is a memset/memcpy/memmove, see if we can
1287 // forward a value on from it.
1288 if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInst)) {
1289 if (Address && !Load->isAtomic()) {
1291 DepMI, DL);
1292 if (Offset != -1)
1293 return AvailableValue::getMI(DepMI, Offset);
1294 }
1295 }
1296
1297 // Nothing known about this clobber, have to be conservative
1298 LLVM_DEBUG(
1299 // fast print dep, using operator<< on instruction is too slow.
1300 dbgs() << "GVN: load "; Load->printAsOperand(dbgs());
1301 dbgs() << " is clobbered by " << *DepInst << '\n';);
1303 reportMayClobberedLoad(Load, DepInfo, DT, ORE);
1304
1305 return std::nullopt;
1306 }
1307 assert(DepInfo.isDef() && "follows from above");
1308
1309 // Loading the alloca -> undef.
1310 // Loading immediately after lifetime begin -> undef.
1311 if (isa<AllocaInst>(DepInst) || isLifetimeStart(DepInst))
1312 return AvailableValue::get(UndefValue::get(Load->getType()));
1313
1314 if (Constant *InitVal =
1315 getInitialValueOfAllocation(DepInst, TLI, Load->getType()))
1316 return AvailableValue::get(InitVal);
1317
1318 if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
1319 // Reject loads and stores that are to the same address but are of
1320 // different types if we have to. If the stored value is convertable to
1321 // the loaded value, we can reuse it.
1322 if (!canCoerceMustAliasedValueToLoad(S->getValueOperand(), Load->getType(),
1323 DL))
1324 return std::nullopt;
1325
1326 // Can't forward from non-atomic to atomic without violating memory model.
1327 if (S->isAtomic() < Load->isAtomic())
1328 return std::nullopt;
1329
1330 return AvailableValue::get(S->getValueOperand());
1331 }
1332
1333 if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) {
1334 // If the types mismatch and we can't handle it, reject reuse of the load.
1335 // If the stored value is larger or equal to the loaded value, we can reuse
1336 // it.
1337 if (!canCoerceMustAliasedValueToLoad(LD, Load->getType(), DL))
1338 return std::nullopt;
1339
1340 // Can't forward from non-atomic to atomic without violating memory model.
1341 if (LD->isAtomic() < Load->isAtomic())
1342 return std::nullopt;
1343
1344 return AvailableValue::getLoad(LD);
1345 }
1346
1347 // Check if load with Addr dependent from select can be converted to select
1348 // between load values. There must be no instructions between the found
1349 // loads and DepInst that may clobber the loads.
1350 if (auto *Sel = dyn_cast<SelectInst>(DepInst)) {
1351 assert(Sel->getType() == Load->getPointerOperandType());
1352 auto Loc = MemoryLocation::get(Load);
1353 Value *V1 =
1354 findDominatingValue(Loc.getWithNewPtr(Sel->getTrueValue()),
1355 Load->getType(), DepInst, getAliasAnalysis());
1356 if (!V1)
1357 return std::nullopt;
1358 Value *V2 =
1359 findDominatingValue(Loc.getWithNewPtr(Sel->getFalseValue()),
1360 Load->getType(), DepInst, getAliasAnalysis());
1361 if (!V2)
1362 return std::nullopt;
1363 return AvailableValue::getSelect(Sel, V1, V2);
1364 }
1365
1366 // Unknown def - must be conservative
1367 LLVM_DEBUG(
1368 // fast print dep, using operator<< on instruction is too slow.
1369 dbgs() << "GVN: load "; Load->printAsOperand(dbgs());
1370 dbgs() << " has unknown def " << *DepInst << '\n';);
1371 return std::nullopt;
1372}
1373
1374void GVNPass::AnalyzeLoadAvailability(LoadInst *Load, LoadDepVect &Deps,
1375 AvailValInBlkVect &ValuesPerBlock,
1376 UnavailBlkVect &UnavailableBlocks) {
1377 // Filter out useless results (non-locals, etc). Keep track of the blocks
1378 // where we have a value available in repl, also keep track of whether we see
1379 // dependencies that produce an unknown value for the load (such as a call
1380 // that could potentially clobber the load).
1381 for (const auto &Dep : Deps) {
1382 BasicBlock *DepBB = Dep.getBB();
1383 MemDepResult DepInfo = Dep.getResult();
1384
1385 if (DeadBlocks.count(DepBB)) {
1386 // Dead dependent mem-op disguise as a load evaluating the same value
1387 // as the load in question.
1388 ValuesPerBlock.push_back(AvailableValueInBlock::getUndef(DepBB));
1389 continue;
1390 }
1391
1392 if (!DepInfo.isLocal()) {
1393 UnavailableBlocks.push_back(DepBB);
1394 continue;
1395 }
1396
1397 // The address being loaded in this non-local block may not be the same as
1398 // the pointer operand of the load if PHI translation occurs. Make sure
1399 // to consider the right address.
1400 if (auto AV = AnalyzeLoadAvailability(Load, DepInfo, Dep.getAddress())) {
1401 // subtlety: because we know this was a non-local dependency, we know
1402 // it's safe to materialize anywhere between the instruction within
1403 // DepInfo and the end of it's block.
1404 ValuesPerBlock.push_back(
1405 AvailableValueInBlock::get(DepBB, std::move(*AV)));
1406 } else {
1407 UnavailableBlocks.push_back(DepBB);
1408 }
1409 }
1410
1411 assert(Deps.size() == ValuesPerBlock.size() + UnavailableBlocks.size() &&
1412 "post condition violation");
1413}
1414
1415/// Given the following code, v1 is partially available on some edges, but not
1416/// available on the edge from PredBB. This function tries to find if there is
1417/// another identical load in the other successor of PredBB.
1418///
1419/// v0 = load %addr
1420/// br %LoadBB
1421///
1422/// LoadBB:
1423/// v1 = load %addr
1424/// ...
1425///
1426/// PredBB:
1427/// ...
1428/// br %cond, label %LoadBB, label %SuccBB
1429///
1430/// SuccBB:
1431/// v2 = load %addr
1432/// ...
1433///
1434LoadInst *GVNPass::findLoadToHoistIntoPred(BasicBlock *Pred, BasicBlock *LoadBB,
1435 LoadInst *Load) {
1436 // For simplicity we handle a Pred has 2 successors only.
1437 auto *Term = Pred->getTerminator();
1438 if (Term->getNumSuccessors() != 2 || Term->isSpecialTerminator())
1439 return nullptr;
1440 auto *SuccBB = Term->getSuccessor(0);
1441 if (SuccBB == LoadBB)
1442 SuccBB = Term->getSuccessor(1);
1443 if (!SuccBB->getSinglePredecessor())
1444 return nullptr;
1445
1446 unsigned int NumInsts = MaxNumInsnsPerBlock;
1447 for (Instruction &Inst : *SuccBB) {
1448 if (Inst.isDebugOrPseudoInst())
1449 continue;
1450 if (--NumInsts == 0)
1451 return nullptr;
1452
1453 if (!Inst.isIdenticalTo(Load))
1454 continue;
1455
1456 MemDepResult Dep = MD->getDependency(&Inst);
1457 // If an identical load doesn't depends on any local instructions, it can
1458 // be safely moved to PredBB.
1459 // Also check for the implicit control flow instructions. See the comments
1460 // in PerformLoadPRE for details.
1461 if (Dep.isNonLocal() && !ICF->isDominatedByICFIFromSameBlock(&Inst))
1462 return cast<LoadInst>(&Inst);
1463
1464 // Otherwise there is something in the same BB clobbers the memory, we can't
1465 // move this and later load to PredBB.
1466 return nullptr;
1467 }
1468
1469 return nullptr;
1470}
1471
1472void GVNPass::eliminatePartiallyRedundantLoad(
1473 LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
1474 MapVector<BasicBlock *, Value *> &AvailableLoads,
1475 MapVector<BasicBlock *, LoadInst *> *CriticalEdgePredAndLoad) {
1476 for (const auto &AvailableLoad : AvailableLoads) {
1477 BasicBlock *UnavailableBlock = AvailableLoad.first;
1478 Value *LoadPtr = AvailableLoad.second;
1479
1480 auto *NewLoad = new LoadInst(
1481 Load->getType(), LoadPtr, Load->getName() + ".pre", Load->isVolatile(),
1482 Load->getAlign(), Load->getOrdering(), Load->getSyncScopeID(),
1483 UnavailableBlock->getTerminator()->getIterator());
1484 NewLoad->setDebugLoc(Load->getDebugLoc());
1485 if (MSSAU) {
1486 auto *NewAccess = MSSAU->createMemoryAccessInBB(
1487 NewLoad, nullptr, NewLoad->getParent(), MemorySSA::BeforeTerminator);
1488 if (auto *NewDef = dyn_cast<MemoryDef>(NewAccess))
1489 MSSAU->insertDef(NewDef, /*RenameUses=*/true);
1490 else
1491 MSSAU->insertUse(cast<MemoryUse>(NewAccess), /*RenameUses=*/true);
1492 }
1493
1494 // Transfer the old load's AA tags to the new load.
1495 AAMDNodes Tags = Load->getAAMetadata();
1496 if (Tags)
1497 NewLoad->setAAMetadata(Tags);
1498
1499 if (auto *MD = Load->getMetadata(LLVMContext::MD_invariant_load))
1500 NewLoad->setMetadata(LLVMContext::MD_invariant_load, MD);
1501 if (auto *InvGroupMD = Load->getMetadata(LLVMContext::MD_invariant_group))
1502 NewLoad->setMetadata(LLVMContext::MD_invariant_group, InvGroupMD);
1503 if (auto *RangeMD = Load->getMetadata(LLVMContext::MD_range))
1504 NewLoad->setMetadata(LLVMContext::MD_range, RangeMD);
1505 if (auto *AccessMD = Load->getMetadata(LLVMContext::MD_access_group))
1506 if (LI->getLoopFor(Load->getParent()) == LI->getLoopFor(UnavailableBlock))
1507 NewLoad->setMetadata(LLVMContext::MD_access_group, AccessMD);
1508
1509 // We do not propagate the old load's debug location, because the new
1510 // load now lives in a different BB, and we want to avoid a jumpy line
1511 // table.
1512 // FIXME: How do we retain source locations without causing poor debugging
1513 // behavior?
1514
1515 // Add the newly created load.
1516 ValuesPerBlock.push_back(
1517 AvailableValueInBlock::get(UnavailableBlock, NewLoad));
1518 MD->invalidateCachedPointerInfo(LoadPtr);
1519 LLVM_DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n');
1520
1521 // For PredBB in CriticalEdgePredAndLoad we need to replace the uses of old
1522 // load instruction with the new created load instruction.
1523 if (CriticalEdgePredAndLoad) {
1524 auto I = CriticalEdgePredAndLoad->find(UnavailableBlock);
1525 if (I != CriticalEdgePredAndLoad->end()) {
1526 ++NumPRELoadMoved2CEPred;
1527 ICF->insertInstructionTo(NewLoad, UnavailableBlock);
1528 LoadInst *OldLoad = I->second;
1529 combineMetadataForCSE(NewLoad, OldLoad, false);
1530 OldLoad->replaceAllUsesWith(NewLoad);
1531 replaceValuesPerBlockEntry(ValuesPerBlock, OldLoad, NewLoad);
1532 if (uint32_t ValNo = VN.lookup(OldLoad, false))
1533 LeaderTable.erase(ValNo, OldLoad, OldLoad->getParent());
1534 VN.erase(OldLoad);
1535 removeInstruction(OldLoad);
1536 }
1537 }
1538 }
1539
1540 // Perform PHI construction.
1541 Value *V = ConstructSSAForLoadSet(Load, ValuesPerBlock, *this);
1542 // ConstructSSAForLoadSet is responsible for combining metadata.
1543 ICF->removeUsersOf(Load);
1544 Load->replaceAllUsesWith(V);
1545 if (isa<PHINode>(V))
1546 V->takeName(Load);
1547 if (Instruction *I = dyn_cast<Instruction>(V))
1548 I->setDebugLoc(Load->getDebugLoc());
1549 if (V->getType()->isPtrOrPtrVectorTy())
1552 ORE->emit([&]() {
1553 return OptimizationRemark(DEBUG_TYPE, "LoadPRE", Load)
1554 << "load eliminated by PRE";
1555 });
1556}
1557
1558bool GVNPass::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
1559 UnavailBlkVect &UnavailableBlocks) {
1560 // Okay, we have *some* definitions of the value. This means that the value
1561 // is available in some of our (transitive) predecessors. Lets think about
1562 // doing PRE of this load. This will involve inserting a new load into the
1563 // predecessor when it's not available. We could do this in general, but
1564 // prefer to not increase code size. As such, we only do this when we know
1565 // that we only have to insert *one* load (which means we're basically moving
1566 // the load, not inserting a new one).
1567
1568 SmallPtrSet<BasicBlock *, 4> Blockers(UnavailableBlocks.begin(),
1569 UnavailableBlocks.end());
1570
1571 // Let's find the first basic block with more than one predecessor. Walk
1572 // backwards through predecessors if needed.
1573 BasicBlock *LoadBB = Load->getParent();
1574 BasicBlock *TmpBB = LoadBB;
1575
1576 // Check that there is no implicit control flow instructions above our load in
1577 // its block. If there is an instruction that doesn't always pass the
1578 // execution to the following instruction, then moving through it may become
1579 // invalid. For example:
1580 //
1581 // int arr[LEN];
1582 // int index = ???;
1583 // ...
1584 // guard(0 <= index && index < LEN);
1585 // use(arr[index]);
1586 //
1587 // It is illegal to move the array access to any point above the guard,
1588 // because if the index is out of bounds we should deoptimize rather than
1589 // access the array.
1590 // Check that there is no guard in this block above our instruction.
1591 bool MustEnsureSafetyOfSpeculativeExecution =
1593
1594 while (TmpBB->getSinglePredecessor()) {
1595 TmpBB = TmpBB->getSinglePredecessor();
1596 if (TmpBB == LoadBB) // Infinite (unreachable) loop.
1597 return false;
1598 if (Blockers.count(TmpBB))
1599 return false;
1600
1601 // If any of these blocks has more than one successor (i.e. if the edge we
1602 // just traversed was critical), then there are other paths through this
1603 // block along which the load may not be anticipated. Hoisting the load
1604 // above this block would be adding the load to execution paths along
1605 // which it was not previously executed.
1606 if (TmpBB->getTerminator()->getNumSuccessors() != 1)
1607 return false;
1608
1609 // Check that there is no implicit control flow in a block above.
1610 MustEnsureSafetyOfSpeculativeExecution =
1611 MustEnsureSafetyOfSpeculativeExecution || ICF->hasICF(TmpBB);
1612 }
1613
1614 assert(TmpBB);
1615 LoadBB = TmpBB;
1616
1617 // Check to see how many predecessors have the loaded value fully
1618 // available.
1620 DenseMap<BasicBlock *, AvailabilityState> FullyAvailableBlocks;
1621 for (const AvailableValueInBlock &AV : ValuesPerBlock)
1622 FullyAvailableBlocks[AV.BB] = AvailabilityState::Available;
1623 for (BasicBlock *UnavailableBB : UnavailableBlocks)
1624 FullyAvailableBlocks[UnavailableBB] = AvailabilityState::Unavailable;
1625
1626 // The edge from Pred to LoadBB is a critical edge will be splitted.
1627 SmallVector<BasicBlock *, 4> CriticalEdgePredSplit;
1628 // The edge from Pred to LoadBB is a critical edge, another successor of Pred
1629 // contains a load can be moved to Pred. This data structure maps the Pred to
1630 // the movable load.
1631 MapVector<BasicBlock *, LoadInst *> CriticalEdgePredAndLoad;
1632 for (BasicBlock *Pred : predecessors(LoadBB)) {
1633 // If any predecessor block is an EH pad that does not allow non-PHI
1634 // instructions before the terminator, we can't PRE the load.
1635 if (Pred->getTerminator()->isEHPad()) {
1636 LLVM_DEBUG(
1637 dbgs() << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD PREDECESSOR '"
1638 << Pred->getName() << "': " << *Load << '\n');
1639 return false;
1640 }
1641
1642 if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks)) {
1643 continue;
1644 }
1645
1646 if (Pred->getTerminator()->getNumSuccessors() != 1) {
1647 if (isa<IndirectBrInst>(Pred->getTerminator())) {
1648 LLVM_DEBUG(
1649 dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '"
1650 << Pred->getName() << "': " << *Load << '\n');
1651 return false;
1652 }
1653
1654 if (LoadBB->isEHPad()) {
1655 LLVM_DEBUG(
1656 dbgs() << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD CRITICAL EDGE '"
1657 << Pred->getName() << "': " << *Load << '\n');
1658 return false;
1659 }
1660
1661 // Do not split backedge as it will break the canonical loop form.
1663 if (DT->dominates(LoadBB, Pred)) {
1664 LLVM_DEBUG(
1665 dbgs()
1666 << "COULD NOT PRE LOAD BECAUSE OF A BACKEDGE CRITICAL EDGE '"
1667 << Pred->getName() << "': " << *Load << '\n');
1668 return false;
1669 }
1670
1671 if (LoadInst *LI = findLoadToHoistIntoPred(Pred, LoadBB, Load))
1672 CriticalEdgePredAndLoad[Pred] = LI;
1673 else
1674 CriticalEdgePredSplit.push_back(Pred);
1675 } else {
1676 // Only add the predecessors that will not be split for now.
1677 PredLoads[Pred] = nullptr;
1678 }
1679 }
1680
1681 // Decide whether PRE is profitable for this load.
1682 unsigned NumInsertPreds = PredLoads.size() + CriticalEdgePredSplit.size();
1683 unsigned NumUnavailablePreds = NumInsertPreds +
1684 CriticalEdgePredAndLoad.size();
1685 assert(NumUnavailablePreds != 0 &&
1686 "Fully available value should already be eliminated!");
1687 (void)NumUnavailablePreds;
1688
1689 // If we need to insert new load in multiple predecessors, reject it.
1690 // FIXME: If we could restructure the CFG, we could make a common pred with
1691 // all the preds that don't have an available Load and insert a new load into
1692 // that one block.
1693 if (NumInsertPreds > 1)
1694 return false;
1695
1696 // Now we know where we will insert load. We must ensure that it is safe
1697 // to speculatively execute the load at that points.
1698 if (MustEnsureSafetyOfSpeculativeExecution) {
1699 if (CriticalEdgePredSplit.size())
1700 if (!isSafeToSpeculativelyExecute(Load, LoadBB->getFirstNonPHI(), AC, DT))
1701 return false;
1702 for (auto &PL : PredLoads)
1703 if (!isSafeToSpeculativelyExecute(Load, PL.first->getTerminator(), AC,
1704 DT))
1705 return false;
1706 for (auto &CEP : CriticalEdgePredAndLoad)
1707 if (!isSafeToSpeculativelyExecute(Load, CEP.first->getTerminator(), AC,
1708 DT))
1709 return false;
1710 }
1711
1712 // Split critical edges, and update the unavailable predecessors accordingly.
1713 for (BasicBlock *OrigPred : CriticalEdgePredSplit) {
1714 BasicBlock *NewPred = splitCriticalEdges(OrigPred, LoadBB);
1715 assert(!PredLoads.count(OrigPred) && "Split edges shouldn't be in map!");
1716 PredLoads[NewPred] = nullptr;
1717 LLVM_DEBUG(dbgs() << "Split critical edge " << OrigPred->getName() << "->"
1718 << LoadBB->getName() << '\n');
1719 }
1720
1721 for (auto &CEP : CriticalEdgePredAndLoad)
1722 PredLoads[CEP.first] = nullptr;
1723
1724 // Check if the load can safely be moved to all the unavailable predecessors.
1725 bool CanDoPRE = true;
1726 const DataLayout &DL = Load->getDataLayout();
1728 for (auto &PredLoad : PredLoads) {
1729 BasicBlock *UnavailablePred = PredLoad.first;
1730
1731 // Do PHI translation to get its value in the predecessor if necessary. The
1732 // returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
1733 // We do the translation for each edge we skipped by going from Load's block
1734 // to LoadBB, otherwise we might miss pieces needing translation.
1735
1736 // If all preds have a single successor, then we know it is safe to insert
1737 // the load on the pred (?!?), so we can insert code to materialize the
1738 // pointer if it is not available.
1739 Value *LoadPtr = Load->getPointerOperand();
1740 BasicBlock *Cur = Load->getParent();
1741 while (Cur != LoadBB) {
1742 PHITransAddr Address(LoadPtr, DL, AC);
1743 LoadPtr = Address.translateWithInsertion(Cur, Cur->getSinglePredecessor(),
1744 *DT, NewInsts);
1745 if (!LoadPtr) {
1746 CanDoPRE = false;
1747 break;
1748 }
1749 Cur = Cur->getSinglePredecessor();
1750 }
1751
1752 if (LoadPtr) {
1753 PHITransAddr Address(LoadPtr, DL, AC);
1754 LoadPtr = Address.translateWithInsertion(LoadBB, UnavailablePred, *DT,
1755 NewInsts);
1756 }
1757 // If we couldn't find or insert a computation of this phi translated value,
1758 // we fail PRE.
1759 if (!LoadPtr) {
1760 LLVM_DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
1761 << *Load->getPointerOperand() << "\n");
1762 CanDoPRE = false;
1763 break;
1764 }
1765
1766 PredLoad.second = LoadPtr;
1767 }
1768
1769 if (!CanDoPRE) {
1770 while (!NewInsts.empty()) {
1771 // Erase instructions generated by the failed PHI translation before
1772 // trying to number them. PHI translation might insert instructions
1773 // in basic blocks other than the current one, and we delete them
1774 // directly, as markInstructionForDeletion only allows removing from the
1775 // current basic block.
1776 NewInsts.pop_back_val()->eraseFromParent();
1777 }
1778 // HINT: Don't revert the edge-splitting as following transformation may
1779 // also need to split these critical edges.
1780 return !CriticalEdgePredSplit.empty();
1781 }
1782
1783 // Okay, we can eliminate this load by inserting a reload in the predecessor
1784 // and using PHI construction to get the value in the other predecessors, do
1785 // it.
1786 LLVM_DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *Load << '\n');
1787 LLVM_DEBUG(if (!NewInsts.empty()) dbgs() << "INSERTED " << NewInsts.size()
1788 << " INSTS: " << *NewInsts.back()
1789 << '\n');
1790
1791 // Assign value numbers to the new instructions.
1792 for (Instruction *I : NewInsts) {
1793 // Instructions that have been inserted in predecessor(s) to materialize
1794 // the load address do not retain their original debug locations. Doing
1795 // so could lead to confusing (but correct) source attributions.
1796 I->updateLocationAfterHoist();
1797
1798 // FIXME: We really _ought_ to insert these value numbers into their
1799 // parent's availability map. However, in doing so, we risk getting into
1800 // ordering issues. If a block hasn't been processed yet, we would be
1801 // marking a value as AVAIL-IN, which isn't what we intend.
1802 VN.lookupOrAdd(I);
1803 }
1804
1805 eliminatePartiallyRedundantLoad(Load, ValuesPerBlock, PredLoads,
1806 &CriticalEdgePredAndLoad);
1807 ++NumPRELoad;
1808 return true;
1809}
1810
1811bool GVNPass::performLoopLoadPRE(LoadInst *Load,
1812 AvailValInBlkVect &ValuesPerBlock,
1813 UnavailBlkVect &UnavailableBlocks) {
1814 const Loop *L = LI->getLoopFor(Load->getParent());
1815 // TODO: Generalize to other loop blocks that dominate the latch.
1816 if (!L || L->getHeader() != Load->getParent())
1817 return false;
1818
1819 BasicBlock *Preheader = L->getLoopPreheader();
1820 BasicBlock *Latch = L->getLoopLatch();
1821 if (!Preheader || !Latch)
1822 return false;
1823
1824 Value *LoadPtr = Load->getPointerOperand();
1825 // Must be available in preheader.
1826 if (!L->isLoopInvariant(LoadPtr))
1827 return false;
1828
1829 // We plan to hoist the load to preheader without introducing a new fault.
1830 // In order to do it, we need to prove that we cannot side-exit the loop
1831 // once loop header is first entered before execution of the load.
1832 if (ICF->isDominatedByICFIFromSameBlock(Load))
1833 return false;
1834
1835 BasicBlock *LoopBlock = nullptr;
1836 for (auto *Blocker : UnavailableBlocks) {
1837 // Blockers from outside the loop are handled in preheader.
1838 if (!L->contains(Blocker))
1839 continue;
1840
1841 // Only allow one loop block. Loop header is not less frequently executed
1842 // than each loop block, and likely it is much more frequently executed. But
1843 // in case of multiple loop blocks, we need extra information (such as block
1844 // frequency info) to understand whether it is profitable to PRE into
1845 // multiple loop blocks.
1846 if (LoopBlock)
1847 return false;
1848
1849 // Do not sink into inner loops. This may be non-profitable.
1850 if (L != LI->getLoopFor(Blocker))
1851 return false;
1852
1853 // Blocks that dominate the latch execute on every single iteration, maybe
1854 // except the last one. So PREing into these blocks doesn't make much sense
1855 // in most cases. But the blocks that do not necessarily execute on each
1856 // iteration are sometimes much colder than the header, and this is when
1857 // PRE is potentially profitable.
1858 if (DT->dominates(Blocker, Latch))
1859 return false;
1860
1861 // Make sure that the terminator itself doesn't clobber.
1862 if (Blocker->getTerminator()->mayWriteToMemory())
1863 return false;
1864
1865 LoopBlock = Blocker;
1866 }
1867
1868 if (!LoopBlock)
1869 return false;
1870
1871 // Make sure the memory at this pointer cannot be freed, therefore we can
1872 // safely reload from it after clobber.
1873 if (LoadPtr->canBeFreed())
1874 return false;
1875
1876 // TODO: Support critical edge splitting if blocker has more than 1 successor.
1877 MapVector<BasicBlock *, Value *> AvailableLoads;
1878 AvailableLoads[LoopBlock] = LoadPtr;
1879 AvailableLoads[Preheader] = LoadPtr;
1880
1881 LLVM_DEBUG(dbgs() << "GVN REMOVING PRE LOOP LOAD: " << *Load << '\n');
1882 eliminatePartiallyRedundantLoad(Load, ValuesPerBlock, AvailableLoads,
1883 /*CriticalEdgePredAndLoad*/ nullptr);
1884 ++NumPRELoopLoad;
1885 return true;
1886}
1887
1890 using namespace ore;
1891
1892 ORE->emit([&]() {
1893 return OptimizationRemark(DEBUG_TYPE, "LoadElim", Load)
1894 << "load of type " << NV("Type", Load->getType()) << " eliminated"
1895 << setExtraArgs() << " in favor of "
1896 << NV("InfavorOfValue", AvailableValue);
1897 });
1898}
1899
1900/// Attempt to eliminate a load whose dependencies are
1901/// non-local by performing PHI construction.
1902bool GVNPass::processNonLocalLoad(LoadInst *Load) {
1903 // non-local speculations are not allowed under asan.
1904 if (Load->getParent()->getParent()->hasFnAttribute(
1905 Attribute::SanitizeAddress) ||
1906 Load->getParent()->getParent()->hasFnAttribute(
1907 Attribute::SanitizeHWAddress))
1908 return false;
1909
1910 // Step 1: Find the non-local dependencies of the load.
1911 LoadDepVect Deps;
1912 MD->getNonLocalPointerDependency(Load, Deps);
1913
1914 // If we had to process more than one hundred blocks to find the
1915 // dependencies, this load isn't worth worrying about. Optimizing
1916 // it will be too expensive.
1917 unsigned NumDeps = Deps.size();
1918 if (NumDeps > MaxNumDeps)
1919 return false;
1920
1921 // If we had a phi translation failure, we'll have a single entry which is a
1922 // clobber in the current block. Reject this early.
1923 if (NumDeps == 1 &&
1924 !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) {
1925 LLVM_DEBUG(dbgs() << "GVN: non-local load "; Load->printAsOperand(dbgs());
1926 dbgs() << " has unknown dependencies\n";);
1927 return false;
1928 }
1929
1930 bool Changed = false;
1931 // If this load follows a GEP, see if we can PRE the indices before analyzing.
1932 if (GetElementPtrInst *GEP =
1933 dyn_cast<GetElementPtrInst>(Load->getOperand(0))) {
1934 for (Use &U : GEP->indices())
1935 if (Instruction *I = dyn_cast<Instruction>(U.get()))
1936 Changed |= performScalarPRE(I);
1937 }
1938
1939 // Step 2: Analyze the availability of the load
1940 AvailValInBlkVect ValuesPerBlock;
1941 UnavailBlkVect UnavailableBlocks;
1942 AnalyzeLoadAvailability(Load, Deps, ValuesPerBlock, UnavailableBlocks);
1943
1944 // If we have no predecessors that produce a known value for this load, exit
1945 // early.
1946 if (ValuesPerBlock.empty())
1947 return Changed;
1948
1949 // Step 3: Eliminate fully redundancy.
1950 //
1951 // If all of the instructions we depend on produce a known value for this
1952 // load, then it is fully redundant and we can use PHI insertion to compute
1953 // its value. Insert PHIs and remove the fully redundant value now.
1954 if (UnavailableBlocks.empty()) {
1955 LLVM_DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *Load << '\n');
1956
1957 // Perform PHI construction.
1958 Value *V = ConstructSSAForLoadSet(Load, ValuesPerBlock, *this);
1959 // ConstructSSAForLoadSet is responsible for combining metadata.
1960 ICF->removeUsersOf(Load);
1961 Load->replaceAllUsesWith(V);
1962
1963 if (isa<PHINode>(V))
1964 V->takeName(Load);
1965 if (Instruction *I = dyn_cast<Instruction>(V))
1966 // If instruction I has debug info, then we should not update it.
1967 // Also, if I has a null DebugLoc, then it is still potentially incorrect
1968 // to propagate Load's DebugLoc because Load may not post-dominate I.
1969 if (Load->getDebugLoc() && Load->getParent() == I->getParent())
1970 I->setDebugLoc(Load->getDebugLoc());
1971 if (V->getType()->isPtrOrPtrVectorTy())
1974 ++NumGVNLoad;
1975 reportLoadElim(Load, V, ORE);
1976 return true;
1977 }
1978
1979 // Step 4: Eliminate partial redundancy.
1980 if (!isPREEnabled() || !isLoadPREEnabled())
1981 return Changed;
1982 if (!isLoadInLoopPREEnabled() && LI->getLoopFor(Load->getParent()))
1983 return Changed;
1984
1985 if (performLoopLoadPRE(Load, ValuesPerBlock, UnavailableBlocks) ||
1986 PerformLoadPRE(Load, ValuesPerBlock, UnavailableBlocks))
1987 return true;
1988
1989 return Changed;
1990}
1991
1993 if (Cmp->getPredicate() == CmpInst::Predicate::ICMP_EQ)
1994 return true;
1995
1996 // Floating point comparisons can be equal, but not equivalent. Cases:
1997 // NaNs for unordered operators
1998 // +0.0 vs 0.0 for all operators
1999 if (Cmp->getPredicate() == CmpInst::Predicate::FCMP_OEQ ||
2000 (Cmp->getPredicate() == CmpInst::Predicate::FCMP_UEQ &&
2001 Cmp->getFastMathFlags().noNaNs())) {
2002 Value *LHS = Cmp->getOperand(0);
2003 Value *RHS = Cmp->getOperand(1);
2004 // If we can prove either side non-zero, then equality must imply
2005 // equivalence.
2006 // FIXME: We should do this optimization if 'no signed zeros' is
2007 // applicable via an instruction-level fast-math-flag or some other
2008 // indicator that relaxed FP semantics are being used.
2009 if (isa<ConstantFP>(LHS) && !cast<ConstantFP>(LHS)->isZero())
2010 return true;
2011 if (isa<ConstantFP>(RHS) && !cast<ConstantFP>(RHS)->isZero())
2012 return true;
2013 // TODO: Handle vector floating point constants
2014 }
2015 return false;
2016}
2017
2019 if (Cmp->getPredicate() == CmpInst::Predicate::ICMP_NE)
2020 return true;
2021
2022 // Floating point comparisons can be equal, but not equivelent. Cases:
2023 // NaNs for unordered operators
2024 // +0.0 vs 0.0 for all operators
2025 if ((Cmp->getPredicate() == CmpInst::Predicate::FCMP_ONE &&
2026 Cmp->getFastMathFlags().noNaNs()) ||
2027 Cmp->getPredicate() == CmpInst::Predicate::FCMP_UNE) {
2028 Value *LHS = Cmp->getOperand(0);
2029 Value *RHS = Cmp->getOperand(1);
2030 // If we can prove either side non-zero, then equality must imply
2031 // equivalence.
2032 // FIXME: We should do this optimization if 'no signed zeros' is
2033 // applicable via an instruction-level fast-math-flag or some other
2034 // indicator that relaxed FP semantics are being used.
2035 if (isa<ConstantFP>(LHS) && !cast<ConstantFP>(LHS)->isZero())
2036 return true;
2037 if (isa<ConstantFP>(RHS) && !cast<ConstantFP>(RHS)->isZero())
2038 return true;
2039 // TODO: Handle vector floating point constants
2040 }
2041 return false;
2042}
2043
2044
2045static bool hasUsersIn(Value *V, BasicBlock *BB) {
2046 return llvm::any_of(V->users(), [BB](User *U) {
2047 auto *I = dyn_cast<Instruction>(U);
2048 return I && I->getParent() == BB;
2049 });
2050}
2051
2052bool GVNPass::processAssumeIntrinsic(AssumeInst *IntrinsicI) {
2053 Value *V = IntrinsicI->getArgOperand(0);
2054
2055 if (ConstantInt *Cond = dyn_cast<ConstantInt>(V)) {
2056 if (Cond->isZero()) {
2057 Type *Int8Ty = Type::getInt8Ty(V->getContext());
2058 Type *PtrTy = PointerType::get(V->getContext(), 0);
2059 // Insert a new store to null instruction before the load to indicate that
2060 // this code is not reachable. FIXME: We could insert unreachable
2061 // instruction directly because we can modify the CFG.
2062 auto *NewS =
2064 IntrinsicI->getIterator());
2065 if (MSSAU) {
2066 const MemoryUseOrDef *FirstNonDom = nullptr;
2067 const auto *AL =
2068 MSSAU->getMemorySSA()->getBlockAccesses(IntrinsicI->getParent());
2069
2070 // If there are accesses in the current basic block, find the first one
2071 // that does not come before NewS. The new memory access is inserted
2072 // after the found access or before the terminator if no such access is
2073 // found.
2074 if (AL) {
2075 for (const auto &Acc : *AL) {
2076 if (auto *Current = dyn_cast<MemoryUseOrDef>(&Acc))
2077 if (!Current->getMemoryInst()->comesBefore(NewS)) {
2078 FirstNonDom = Current;
2079 break;
2080 }
2081 }
2082 }
2083
2084 auto *NewDef =
2085 FirstNonDom ? MSSAU->createMemoryAccessBefore(
2086 NewS, nullptr,
2087 const_cast<MemoryUseOrDef *>(FirstNonDom))
2088 : MSSAU->createMemoryAccessInBB(
2089 NewS, nullptr,
2090 NewS->getParent(), MemorySSA::BeforeTerminator);
2091
2092 MSSAU->insertDef(cast<MemoryDef>(NewDef), /*RenameUses=*/false);
2093 }
2094 }
2095 if (isAssumeWithEmptyBundle(*IntrinsicI)) {
2096 markInstructionForDeletion(IntrinsicI);
2097 return true;
2098 }
2099 return false;
2100 }
2101
2102 if (isa<Constant>(V)) {
2103 // If it's not false, and constant, it must evaluate to true. This means our
2104 // assume is assume(true), and thus, pointless, and we don't want to do
2105 // anything more here.
2106 return false;
2107 }
2108
2109 Constant *True = ConstantInt::getTrue(V->getContext());
2110 bool Changed = false;
2111
2112 for (BasicBlock *Successor : successors(IntrinsicI->getParent())) {
2113 BasicBlockEdge Edge(IntrinsicI->getParent(), Successor);
2114
2115 // This property is only true in dominated successors, propagateEquality
2116 // will check dominance for us.
2117 Changed |= propagateEquality(V, True, Edge, false);
2118 }
2119
2120 // We can replace assume value with true, which covers cases like this:
2121 // call void @llvm.assume(i1 %cmp)
2122 // br i1 %cmp, label %bb1, label %bb2 ; will change %cmp to true
2123 ReplaceOperandsWithMap[V] = True;
2124
2125 // Similarly, after assume(!NotV) we know that NotV == false.
2126 Value *NotV;
2127 if (match(V, m_Not(m_Value(NotV))))
2128 ReplaceOperandsWithMap[NotV] = ConstantInt::getFalse(V->getContext());
2129
2130 // If we find an equality fact, canonicalize all dominated uses in this block
2131 // to one of the two values. We heuristically choice the "oldest" of the
2132 // two where age is determined by value number. (Note that propagateEquality
2133 // above handles the cross block case.)
2134 //
2135 // Key case to cover are:
2136 // 1)
2137 // %cmp = fcmp oeq float 3.000000e+00, %0 ; const on lhs could happen
2138 // call void @llvm.assume(i1 %cmp)
2139 // ret float %0 ; will change it to ret float 3.000000e+00
2140 // 2)
2141 // %load = load float, float* %addr
2142 // %cmp = fcmp oeq float %load, %0
2143 // call void @llvm.assume(i1 %cmp)
2144 // ret float %load ; will change it to ret float %0
2145 if (auto *CmpI = dyn_cast<CmpInst>(V)) {
2146 if (impliesEquivalanceIfTrue(CmpI)) {
2147 Value *CmpLHS = CmpI->getOperand(0);
2148 Value *CmpRHS = CmpI->getOperand(1);
2149 // Heuristically pick the better replacement -- the choice of heuristic
2150 // isn't terribly important here, but the fact we canonicalize on some
2151 // replacement is for exposing other simplifications.
2152 // TODO: pull this out as a helper function and reuse w/existing
2153 // (slightly different) logic.
2154 if (isa<Constant>(CmpLHS) && !isa<Constant>(CmpRHS))
2155 std::swap(CmpLHS, CmpRHS);
2156 if (!isa<Instruction>(CmpLHS) && isa<Instruction>(CmpRHS))
2157 std::swap(CmpLHS, CmpRHS);
2158 if ((isa<Argument>(CmpLHS) && isa<Argument>(CmpRHS)) ||
2159 (isa<Instruction>(CmpLHS) && isa<Instruction>(CmpRHS))) {
2160 // Move the 'oldest' value to the right-hand side, using the value
2161 // number as a proxy for age.
2162 uint32_t LVN = VN.lookupOrAdd(CmpLHS);
2163 uint32_t RVN = VN.lookupOrAdd(CmpRHS);
2164 if (LVN < RVN)
2165 std::swap(CmpLHS, CmpRHS);
2166 }
2167
2168 // Handle degenerate case where we either haven't pruned a dead path or a
2169 // removed a trivial assume yet.
2170 if (isa<Constant>(CmpLHS) && isa<Constant>(CmpRHS))
2171 return Changed;
2172
2173 LLVM_DEBUG(dbgs() << "Replacing dominated uses of "
2174 << *CmpLHS << " with "
2175 << *CmpRHS << " in block "
2176 << IntrinsicI->getParent()->getName() << "\n");
2177
2178
2179 // Setup the replacement map - this handles uses within the same block
2180 if (hasUsersIn(CmpLHS, IntrinsicI->getParent()))
2181 ReplaceOperandsWithMap[CmpLHS] = CmpRHS;
2182
2183 // NOTE: The non-block local cases are handled by the call to
2184 // propagateEquality above; this block is just about handling the block
2185 // local cases. TODO: There's a bunch of logic in propagateEqualiy which
2186 // isn't duplicated for the block local case, can we share it somehow?
2187 }
2188 }
2189 return Changed;
2190}
2191
2194 I->replaceAllUsesWith(Repl);
2195}
2196
2197/// Attempt to eliminate a load, first by eliminating it
2198/// locally, and then attempting non-local elimination if that fails.
2199bool GVNPass::processLoad(LoadInst *L) {
2200 if (!MD)
2201 return false;
2202
2203 // This code hasn't been audited for ordered or volatile memory access
2204 if (!L->isUnordered())
2205 return false;
2206
2207 if (L->use_empty()) {
2209 return true;
2210 }
2211
2212 // ... to a pointer that has been loaded from before...
2213 MemDepResult Dep = MD->getDependency(L);
2214
2215 // If it is defined in another block, try harder.
2216 if (Dep.isNonLocal())
2217 return processNonLocalLoad(L);
2218
2219 // Only handle the local case below
2220 if (!Dep.isLocal()) {
2221 // This might be a NonFuncLocal or an Unknown
2222 LLVM_DEBUG(
2223 // fast print dep, using operator<< on instruction is too slow.
2224 dbgs() << "GVN: load "; L->printAsOperand(dbgs());
2225 dbgs() << " has unknown dependence\n";);
2226 return false;
2227 }
2228
2229 auto AV = AnalyzeLoadAvailability(L, Dep, L->getPointerOperand());
2230 if (!AV)
2231 return false;
2232
2233 Value *AvailableValue = AV->MaterializeAdjustedValue(L, L, *this);
2234
2235 // MaterializeAdjustedValue is responsible for combining metadata.
2236 ICF->removeUsersOf(L);
2237 L->replaceAllUsesWith(AvailableValue);
2239 if (MSSAU)
2240 MSSAU->removeMemoryAccess(L);
2241 ++NumGVNLoad;
2243 // Tell MDA to reexamine the reused pointer since we might have more
2244 // information after forwarding it.
2245 if (MD && AvailableValue->getType()->isPtrOrPtrVectorTy())
2247 return true;
2248}
2249
2250/// Return a pair the first field showing the value number of \p Exp and the
2251/// second field showing whether it is a value number newly created.
2252std::pair<uint32_t, bool>
2253GVNPass::ValueTable::assignExpNewValueNum(Expression &Exp) {
2254 uint32_t &e = expressionNumbering[Exp];
2255 bool CreateNewValNum = !e;
2256 if (CreateNewValNum) {
2257 Expressions.push_back(Exp);
2258 if (ExprIdx.size() < nextValueNumber + 1)
2259 ExprIdx.resize(nextValueNumber * 2);
2260 e = nextValueNumber;
2261 ExprIdx[nextValueNumber++] = nextExprNumber++;
2262 }
2263 return {e, CreateNewValNum};
2264}
2265
2266/// Return whether all the values related with the same \p num are
2267/// defined in \p BB.
2268bool GVNPass::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB,
2269 GVNPass &Gvn) {
2270 return all_of(
2271 Gvn.LeaderTable.getLeaders(Num),
2272 [=](const LeaderMap::LeaderTableEntry &L) { return L.BB == BB; });
2273}
2274
2275/// Wrap phiTranslateImpl to provide caching functionality.
2276uint32_t GVNPass::ValueTable::phiTranslate(const BasicBlock *Pred,
2277 const BasicBlock *PhiBlock,
2278 uint32_t Num, GVNPass &Gvn) {
2279 auto FindRes = PhiTranslateTable.find({Num, Pred});
2280 if (FindRes != PhiTranslateTable.end())
2281 return FindRes->second;
2282 uint32_t NewNum = phiTranslateImpl(Pred, PhiBlock, Num, Gvn);
2283 PhiTranslateTable.insert({{Num, Pred}, NewNum});
2284 return NewNum;
2285}
2286
2287// Return true if the value number \p Num and NewNum have equal value.
2288// Return false if the result is unknown.
2289bool GVNPass::ValueTable::areCallValsEqual(uint32_t Num, uint32_t NewNum,
2290 const BasicBlock *Pred,
2291 const BasicBlock *PhiBlock,
2292 GVNPass &Gvn) {
2293 CallInst *Call = nullptr;
2294 auto Leaders = Gvn.LeaderTable.getLeaders(Num);
2295 for (const auto &Entry : Leaders) {
2296 Call = dyn_cast<CallInst>(Entry.Val);
2297 if (Call && Call->getParent() == PhiBlock)
2298 break;
2299 }
2300
2301 if (AA->doesNotAccessMemory(Call))
2302 return true;
2303
2304 if (!MD || !AA->onlyReadsMemory(Call))
2305 return false;
2306
2307 MemDepResult local_dep = MD->getDependency(Call);
2308 if (!local_dep.isNonLocal())
2309 return false;
2310
2312 MD->getNonLocalCallDependency(Call);
2313
2314 // Check to see if the Call has no function local clobber.
2315 for (const NonLocalDepEntry &D : deps) {
2316 if (D.getResult().isNonFuncLocal())
2317 return true;
2318 }
2319 return false;
2320}
2321
2322/// Translate value number \p Num using phis, so that it has the values of
2323/// the phis in BB.
2324uint32_t GVNPass::ValueTable::phiTranslateImpl(const BasicBlock *Pred,
2325 const BasicBlock *PhiBlock,
2326 uint32_t Num, GVNPass &Gvn) {
2327 if (PHINode *PN = NumberingPhi[Num]) {
2328 for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
2329 if (PN->getParent() == PhiBlock && PN->getIncomingBlock(i) == Pred)
2330 if (uint32_t TransVal = lookup(PN->getIncomingValue(i), false))
2331 return TransVal;
2332 }
2333 return Num;
2334 }
2335
2336 // If there is any value related with Num is defined in a BB other than
2337 // PhiBlock, it cannot depend on a phi in PhiBlock without going through
2338 // a backedge. We can do an early exit in that case to save compile time.
2339 if (!areAllValsInBB(Num, PhiBlock, Gvn))
2340 return Num;
2341
2342 if (Num >= ExprIdx.size() || ExprIdx[Num] == 0)
2343 return Num;
2344 Expression Exp = Expressions[ExprIdx[Num]];
2345
2346 for (unsigned i = 0; i < Exp.varargs.size(); i++) {
2347 // For InsertValue and ExtractValue, some varargs are index numbers
2348 // instead of value numbers. Those index numbers should not be
2349 // translated.
2350 if ((i > 1 && Exp.opcode == Instruction::InsertValue) ||
2351 (i > 0 && Exp.opcode == Instruction::ExtractValue) ||
2352 (i > 1 && Exp.opcode == Instruction::ShuffleVector))
2353 continue;
2354 Exp.varargs[i] = phiTranslate(Pred, PhiBlock, Exp.varargs[i], Gvn);
2355 }
2356
2357 if (Exp.commutative) {
2358 assert(Exp.varargs.size() >= 2 && "Unsupported commutative instruction!");
2359 if (Exp.varargs[0] > Exp.varargs[1]) {
2360 std::swap(Exp.varargs[0], Exp.varargs[1]);
2361 uint32_t Opcode = Exp.opcode >> 8;
2362 if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp)
2363 Exp.opcode = (Opcode << 8) |
2365 static_cast<CmpInst::Predicate>(Exp.opcode & 255));
2366 }
2367 }
2368
2369 if (uint32_t NewNum = expressionNumbering[Exp]) {
2370 if (Exp.opcode == Instruction::Call && NewNum != Num)
2371 return areCallValsEqual(Num, NewNum, Pred, PhiBlock, Gvn) ? NewNum : Num;
2372 return NewNum;
2373 }
2374 return Num;
2375}
2376
2377/// Erase stale entry from phiTranslate cache so phiTranslate can be computed
2378/// again.
2379void GVNPass::ValueTable::eraseTranslateCacheEntry(
2380 uint32_t Num, const BasicBlock &CurrBlock) {
2381 for (const BasicBlock *Pred : predecessors(&CurrBlock))
2382 PhiTranslateTable.erase({Num, Pred});
2383}
2384
2385// In order to find a leader for a given value number at a
2386// specific basic block, we first obtain the list of all Values for that number,
2387// and then scan the list to find one whose block dominates the block in
2388// question. This is fast because dominator tree queries consist of only
2389// a few comparisons of DFS numbers.
2390Value *GVNPass::findLeader(const BasicBlock *BB, uint32_t num) {
2391 auto Leaders = LeaderTable.getLeaders(num);
2392 if (Leaders.empty())
2393 return nullptr;
2394
2395 Value *Val = nullptr;
2396 for (const auto &Entry : Leaders) {
2397 if (DT->dominates(Entry.BB, BB)) {
2398 Val = Entry.Val;
2399 if (isa<Constant>(Val))
2400 return Val;
2401 }
2402 }
2403
2404 return Val;
2405}
2406
2407/// There is an edge from 'Src' to 'Dst'. Return
2408/// true if every path from the entry block to 'Dst' passes via this edge. In
2409/// particular 'Dst' must not be reachable via another edge from 'Src'.
2411 DominatorTree *DT) {
2412 // While in theory it is interesting to consider the case in which Dst has
2413 // more than one predecessor, because Dst might be part of a loop which is
2414 // only reachable from Src, in practice it is pointless since at the time
2415 // GVN runs all such loops have preheaders, which means that Dst will have
2416 // been changed to have only one predecessor, namely Src.
2417 const BasicBlock *Pred = E.getEnd()->getSinglePredecessor();
2418 assert((!Pred || Pred == E.getStart()) &&
2419 "No edge between these basic blocks!");
2420 return Pred != nullptr;
2421}
2422
2423void GVNPass::assignBlockRPONumber(Function &F) {
2424 BlockRPONumber.clear();
2425 uint32_t NextBlockNumber = 1;
2427 for (BasicBlock *BB : RPOT)
2428 BlockRPONumber[BB] = NextBlockNumber++;
2429 InvalidBlockRPONumbers = false;
2430}
2431
2432bool GVNPass::replaceOperandsForInBlockEquality(Instruction *Instr) const {
2433 bool Changed = false;
2434 for (unsigned OpNum = 0; OpNum < Instr->getNumOperands(); ++OpNum) {
2435 Value *Operand = Instr->getOperand(OpNum);
2436 auto it = ReplaceOperandsWithMap.find(Operand);
2437 if (it != ReplaceOperandsWithMap.end()) {
2438 LLVM_DEBUG(dbgs() << "GVN replacing: " << *Operand << " with "
2439 << *it->second << " in instruction " << *Instr << '\n');
2440 Instr->setOperand(OpNum, it->second);
2441 Changed = true;
2442 }
2443 }
2444 return Changed;
2445}
2446
2447/// The given values are known to be equal in every block
2448/// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with
2449/// 'RHS' everywhere in the scope. Returns whether a change was made.
2450/// If DominatesByEdge is false, then it means that we will propagate the RHS
2451/// value starting from the end of Root.Start.
2452bool GVNPass::propagateEquality(Value *LHS, Value *RHS,
2453 const BasicBlockEdge &Root,
2454 bool DominatesByEdge) {
2456 Worklist.push_back(std::make_pair(LHS, RHS));
2457 bool Changed = false;
2458 // For speed, compute a conservative fast approximation to
2459 // DT->dominates(Root, Root.getEnd());
2460 const bool RootDominatesEnd = isOnlyReachableViaThisEdge(Root, DT);
2461
2462 while (!Worklist.empty()) {
2463 std::pair<Value*, Value*> Item = Worklist.pop_back_val();
2464 LHS = Item.first; RHS = Item.second;
2465
2466 if (LHS == RHS)
2467 continue;
2468 assert(LHS->getType() == RHS->getType() && "Equality but unequal types!");
2469
2470 // Don't try to propagate equalities between constants.
2471 if (isa<Constant>(LHS) && isa<Constant>(RHS))
2472 continue;
2473
2474 // Prefer a constant on the right-hand side, or an Argument if no constants.
2475 if (isa<Constant>(LHS) || (isa<Argument>(LHS) && !isa<Constant>(RHS)))
2476 std::swap(LHS, RHS);
2477 assert((isa<Argument>(LHS) || isa<Instruction>(LHS)) && "Unexpected value!");
2478 const DataLayout &DL =
2479 isa<Argument>(LHS)
2480 ? cast<Argument>(LHS)->getParent()->getDataLayout()
2481 : cast<Instruction>(LHS)->getDataLayout();
2482
2483 // If there is no obvious reason to prefer the left-hand side over the
2484 // right-hand side, ensure the longest lived term is on the right-hand side,
2485 // so the shortest lived term will be replaced by the longest lived.
2486 // This tends to expose more simplifications.
2487 uint32_t LVN = VN.lookupOrAdd(LHS);
2488 if ((isa<Argument>(LHS) && isa<Argument>(RHS)) ||
2489 (isa<Instruction>(LHS) && isa<Instruction>(RHS))) {
2490 // Move the 'oldest' value to the right-hand side, using the value number
2491 // as a proxy for age.
2492 uint32_t RVN = VN.lookupOrAdd(RHS);
2493 if (LVN < RVN) {
2494 std::swap(LHS, RHS);
2495 LVN = RVN;
2496 }
2497 }
2498
2499 // If value numbering later sees that an instruction in the scope is equal
2500 // to 'LHS' then ensure it will be turned into 'RHS'. In order to preserve
2501 // the invariant that instructions only occur in the leader table for their
2502 // own value number (this is used by removeFromLeaderTable), do not do this
2503 // if RHS is an instruction (if an instruction in the scope is morphed into
2504 // LHS then it will be turned into RHS by the next GVN iteration anyway, so
2505 // using the leader table is about compiling faster, not optimizing better).
2506 // The leader table only tracks basic blocks, not edges. Only add to if we
2507 // have the simple case where the edge dominates the end.
2508 if (RootDominatesEnd && !isa<Instruction>(RHS) &&
2509 canReplacePointersIfEqual(LHS, RHS, DL))
2510 LeaderTable.insert(LVN, RHS, Root.getEnd());
2511
2512 // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. As
2513 // LHS always has at least one use that is not dominated by Root, this will
2514 // never do anything if LHS has only one use.
2515 if (!LHS->hasOneUse()) {
2516 // Create a callback that captures the DL.
2517 auto canReplacePointersCallBack = [&DL](const Use &U, const Value *To) {
2518 return canReplacePointersInUseIfEqual(U, To, DL);
2519 };
2520 unsigned NumReplacements =
2521 DominatesByEdge
2522 ? replaceDominatedUsesWithIf(LHS, RHS, *DT, Root,
2523 canReplacePointersCallBack)
2524 : replaceDominatedUsesWithIf(LHS, RHS, *DT, Root.getStart(),
2525 canReplacePointersCallBack);
2526
2527 if (NumReplacements > 0) {
2528 Changed = true;
2529 NumGVNEqProp += NumReplacements;
2530 // Cached information for anything that uses LHS will be invalid.
2531 if (MD)
2533 }
2534 }
2535
2536 // Now try to deduce additional equalities from this one. For example, if
2537 // the known equality was "(A != B)" == "false" then it follows that A and B
2538 // are equal in the scope. Only boolean equalities with an explicit true or
2539 // false RHS are currently supported.
2540 if (!RHS->getType()->isIntegerTy(1))
2541 // Not a boolean equality - bail out.
2542 continue;
2543 ConstantInt *CI = dyn_cast<ConstantInt>(RHS);
2544 if (!CI)
2545 // RHS neither 'true' nor 'false' - bail out.
2546 continue;
2547 // Whether RHS equals 'true'. Otherwise it equals 'false'.
2548 bool isKnownTrue = CI->isMinusOne();
2549 bool isKnownFalse = !isKnownTrue;
2550
2551 // If "A && B" is known true then both A and B are known true. If "A || B"
2552 // is known false then both A and B are known false.
2553 Value *A, *B;
2554 if ((isKnownTrue && match(LHS, m_LogicalAnd(m_Value(A), m_Value(B)))) ||
2555 (isKnownFalse && match(LHS, m_LogicalOr(m_Value(A), m_Value(B))))) {
2556 Worklist.push_back(std::make_pair(A, RHS));
2557 Worklist.push_back(std::make_pair(B, RHS));
2558 continue;
2559 }
2560
2561 // If we are propagating an equality like "(A == B)" == "true" then also
2562 // propagate the equality A == B. When propagating a comparison such as
2563 // "(A >= B)" == "true", replace all instances of "A < B" with "false".
2564 if (CmpInst *Cmp = dyn_cast<CmpInst>(LHS)) {
2565 Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1);
2566
2567 // If "A == B" is known true, or "A != B" is known false, then replace
2568 // A with B everywhere in the scope. For floating point operations, we
2569 // have to be careful since equality does not always imply equivalance.
2570 if ((isKnownTrue && impliesEquivalanceIfTrue(Cmp)) ||
2571 (isKnownFalse && impliesEquivalanceIfFalse(Cmp)))
2572 Worklist.push_back(std::make_pair(Op0, Op1));
2573
2574 // If "A >= B" is known true, replace "A < B" with false everywhere.
2575 CmpInst::Predicate NotPred = Cmp->getInversePredicate();
2576 Constant *NotVal = ConstantInt::get(Cmp->getType(), isKnownFalse);
2577 // Since we don't have the instruction "A < B" immediately to hand, work
2578 // out the value number that it would have and use that to find an
2579 // appropriate instruction (if any).
2580 uint32_t NextNum = VN.getNextUnusedValueNumber();
2581 uint32_t Num = VN.lookupOrAddCmp(Cmp->getOpcode(), NotPred, Op0, Op1);
2582 // If the number we were assigned was brand new then there is no point in
2583 // looking for an instruction realizing it: there cannot be one!
2584 if (Num < NextNum) {
2585 Value *NotCmp = findLeader(Root.getEnd(), Num);
2586 if (NotCmp && isa<Instruction>(NotCmp)) {
2587 unsigned NumReplacements =
2588 DominatesByEdge
2589 ? replaceDominatedUsesWith(NotCmp, NotVal, *DT, Root)
2590 : replaceDominatedUsesWith(NotCmp, NotVal, *DT,
2591 Root.getStart());
2592 Changed |= NumReplacements > 0;
2593 NumGVNEqProp += NumReplacements;
2594 // Cached information for anything that uses NotCmp will be invalid.
2595 if (MD)
2596 MD->invalidateCachedPointerInfo(NotCmp);
2597 }
2598 }
2599 // Ensure that any instruction in scope that gets the "A < B" value number
2600 // is replaced with false.
2601 // The leader table only tracks basic blocks, not edges. Only add to if we
2602 // have the simple case where the edge dominates the end.
2603 if (RootDominatesEnd)
2604 LeaderTable.insert(Num, NotVal, Root.getEnd());
2605
2606 continue;
2607 }
2608 }
2609
2610 return Changed;
2611}
2612
2613/// When calculating availability, handle an instruction
2614/// by inserting it into the appropriate sets
2615bool GVNPass::processInstruction(Instruction *I) {
2616 // Ignore dbg info intrinsics.
2617 if (isa<DbgInfoIntrinsic>(I))
2618 return false;
2619
2620 // If the instruction can be easily simplified then do so now in preference
2621 // to value numbering it. Value numbering often exposes redundancies, for
2622 // example if it determines that %y is equal to %x then the instruction
2623 // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
2624 const DataLayout &DL = I->getDataLayout();
2625 if (Value *V = simplifyInstruction(I, {DL, TLI, DT, AC})) {
2626 bool Changed = false;
2627 if (!I->use_empty()) {
2628 // Simplification can cause a special instruction to become not special.
2629 // For example, devirtualization to a willreturn function.
2630 ICF->removeUsersOf(I);
2631 I->replaceAllUsesWith(V);
2632 Changed = true;
2633 }
2634 if (isInstructionTriviallyDead(I, TLI)) {
2636 Changed = true;
2637 }
2638 if (Changed) {
2639 if (MD && V->getType()->isPtrOrPtrVectorTy())
2641 ++NumGVNSimpl;
2642 return true;
2643 }
2644 }
2645
2646 if (auto *Assume = dyn_cast<AssumeInst>(I))
2647 return processAssumeIntrinsic(Assume);
2648
2649 if (LoadInst *Load = dyn_cast<LoadInst>(I)) {
2650 if (processLoad(Load))
2651 return true;
2652
2653 unsigned Num = VN.lookupOrAdd(Load);
2654 LeaderTable.insert(Num, Load, Load->getParent());
2655 return false;
2656 }
2657
2658 // For conditional branches, we can perform simple conditional propagation on
2659 // the condition value itself.
2660 if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
2661 if (!BI->isConditional())
2662 return false;
2663
2664 if (isa<Constant>(BI->getCondition()))
2665 return processFoldableCondBr(BI);
2666
2667 Value *BranchCond = BI->getCondition();
2668 BasicBlock *TrueSucc = BI->getSuccessor(0);
2669 BasicBlock *FalseSucc = BI->getSuccessor(1);
2670 // Avoid multiple edges early.
2671 if (TrueSucc == FalseSucc)
2672 return false;
2673
2674 BasicBlock *Parent = BI->getParent();
2675 bool Changed = false;
2676
2678 BasicBlockEdge TrueE(Parent, TrueSucc);
2679 Changed |= propagateEquality(BranchCond, TrueVal, TrueE, true);
2680
2682 BasicBlockEdge FalseE(Parent, FalseSucc);
2683 Changed |= propagateEquality(BranchCond, FalseVal, FalseE, true);
2684
2685 return Changed;
2686 }
2687
2688 // For switches, propagate the case values into the case destinations.
2689 if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
2690 Value *SwitchCond = SI->getCondition();
2691 BasicBlock *Parent = SI->getParent();
2692 bool Changed = false;
2693
2694 // Remember how many outgoing edges there are to every successor.
2696 for (BasicBlock *Succ : successors(Parent))
2697 ++SwitchEdges[Succ];
2698
2699 for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
2700 i != e; ++i) {
2701 BasicBlock *Dst = i->getCaseSuccessor();
2702 // If there is only a single edge, propagate the case value into it.
2703 if (SwitchEdges.lookup(Dst) == 1) {
2704 BasicBlockEdge E(Parent, Dst);
2705 Changed |= propagateEquality(SwitchCond, i->getCaseValue(), E, true);
2706 }
2707 }
2708 return Changed;
2709 }
2710
2711 // Instructions with void type don't return a value, so there's
2712 // no point in trying to find redundancies in them.
2713 if (I->getType()->isVoidTy())
2714 return false;
2715
2716 uint32_t NextNum = VN.getNextUnusedValueNumber();
2717 unsigned Num = VN.lookupOrAdd(I);
2718
2719 // Allocations are always uniquely numbered, so we can save time and memory
2720 // by fast failing them.
2721 if (isa<AllocaInst>(I) || I->isTerminator() || isa<PHINode>(I)) {
2722 LeaderTable.insert(Num, I, I->getParent());
2723 return false;
2724 }
2725
2726 // If the number we were assigned was a brand new VN, then we don't
2727 // need to do a lookup to see if the number already exists
2728 // somewhere in the domtree: it can't!
2729 if (Num >= NextNum) {
2730 LeaderTable.insert(Num, I, I->getParent());
2731 return false;
2732 }
2733
2734 // Perform fast-path value-number based elimination of values inherited from
2735 // dominators.
2736 Value *Repl = findLeader(I->getParent(), Num);
2737 if (!Repl) {
2738 // Failure, just remember this instance for future use.
2739 LeaderTable.insert(Num, I, I->getParent());
2740 return false;
2741 }
2742
2743 if (Repl == I) {
2744 // If I was the result of a shortcut PRE, it might already be in the table
2745 // and the best replacement for itself. Nothing to do.
2746 return false;
2747 }
2748
2749 // Remove it!
2751 if (MD && Repl->getType()->isPtrOrPtrVectorTy())
2754 return true;
2755}
2756
2757/// runOnFunction - This is the main transformation entry point for a function.
2758bool GVNPass::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
2759 const TargetLibraryInfo &RunTLI, AAResults &RunAA,
2761 OptimizationRemarkEmitter *RunORE, MemorySSA *MSSA) {
2762 AC = &RunAC;
2763 DT = &RunDT;
2764 VN.setDomTree(DT);
2765 TLI = &RunTLI;
2766 VN.setAliasAnalysis(&RunAA);
2767 MD = RunMD;
2768 ImplicitControlFlowTracking ImplicitCFT;
2769 ICF = &ImplicitCFT;
2770 this->LI = &LI;
2771 VN.setMemDep(MD);
2772 ORE = RunORE;
2773 InvalidBlockRPONumbers = true;
2774 MemorySSAUpdater Updater(MSSA);
2775 MSSAU = MSSA ? &Updater : nullptr;
2776
2777 bool Changed = false;
2778 bool ShouldContinue = true;
2779
2780 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
2781 // Merge unconditional branches, allowing PRE to catch more
2782 // optimization opportunities.
2784 bool removedBlock = MergeBlockIntoPredecessor(&BB, &DTU, &LI, MSSAU, MD);
2785 if (removedBlock)
2786 ++NumGVNBlocks;
2787
2788 Changed |= removedBlock;
2789 }
2790 DTU.flush();
2791
2792 unsigned Iteration = 0;
2793 while (ShouldContinue) {
2794 LLVM_DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n");
2795 (void) Iteration;
2796 ShouldContinue = iterateOnFunction(F);
2797 Changed |= ShouldContinue;
2798 ++Iteration;
2799 }
2800
2801 if (isPREEnabled()) {
2802 // Fabricate val-num for dead-code in order to suppress assertion in
2803 // performPRE().
2804 assignValNumForDeadCode();
2805 bool PREChanged = true;
2806 while (PREChanged) {
2807 PREChanged = performPRE(F);
2808 Changed |= PREChanged;
2809 }
2810 }
2811
2812 // FIXME: Should perform GVN again after PRE does something. PRE can move
2813 // computations into blocks where they become fully redundant. Note that
2814 // we can't do this until PRE's critical edge splitting updates memdep.
2815 // Actually, when this happens, we should just fully integrate PRE into GVN.
2816
2817 cleanupGlobalSets();
2818 // Do not cleanup DeadBlocks in cleanupGlobalSets() as it's called for each
2819 // iteration.
2820 DeadBlocks.clear();
2821
2822 if (MSSA && VerifyMemorySSA)
2823 MSSA->verifyMemorySSA();
2824
2825 return Changed;
2826}
2827
2828bool GVNPass::processBlock(BasicBlock *BB) {
2829 // FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function
2830 // (and incrementing BI before processing an instruction).
2831 assert(InstrsToErase.empty() &&
2832 "We expect InstrsToErase to be empty across iterations");
2833 if (DeadBlocks.count(BB))
2834 return false;
2835
2836 // Clearing map before every BB because it can be used only for single BB.
2837 ReplaceOperandsWithMap.clear();
2838 bool ChangedFunction = false;
2839
2840 // Since we may not have visited the input blocks of the phis, we can't
2841 // use our normal hash approach for phis. Instead, simply look for
2842 // obvious duplicates. The first pass of GVN will tend to create
2843 // identical phis, and the second or later passes can eliminate them.
2844 SmallPtrSet<PHINode *, 8> PHINodesToRemove;
2845 ChangedFunction |= EliminateDuplicatePHINodes(BB, PHINodesToRemove);
2846 for (PHINode *PN : PHINodesToRemove) {
2847 VN.erase(PN);
2848 removeInstruction(PN);
2849 }
2850
2851 for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
2852 BI != BE;) {
2853 if (!ReplaceOperandsWithMap.empty())
2854 ChangedFunction |= replaceOperandsForInBlockEquality(&*BI);
2855 ChangedFunction |= processInstruction(&*BI);
2856
2857 if (InstrsToErase.empty()) {
2858 ++BI;
2859 continue;
2860 }
2861
2862 // If we need some instructions deleted, do it now.
2863 NumGVNInstr += InstrsToErase.size();
2864
2865 // Avoid iterator invalidation.
2866 bool AtStart = BI == BB->begin();
2867 if (!AtStart)
2868 --BI;
2869
2870 for (auto *I : InstrsToErase) {
2871 assert(I->getParent() == BB && "Removing instruction from wrong block?");
2872 LLVM_DEBUG(dbgs() << "GVN removed: " << *I << '\n');
2873 salvageKnowledge(I, AC);
2875 removeInstruction(I);
2876 }
2877 InstrsToErase.clear();
2878
2879 if (AtStart)
2880 BI = BB->begin();
2881 else
2882 ++BI;
2883 }
2884
2885 return ChangedFunction;
2886}
2887
2888// Instantiate an expression in a predecessor that lacked it.
2889bool GVNPass::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
2890 BasicBlock *Curr, unsigned int ValNo) {
2891 // Because we are going top-down through the block, all value numbers
2892 // will be available in the predecessor by the time we need them. Any
2893 // that weren't originally present will have been instantiated earlier
2894 // in this loop.
2895 bool success = true;
2896 for (unsigned i = 0, e = Instr->getNumOperands(); i != e; ++i) {
2897 Value *Op = Instr->getOperand(i);
2898 if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
2899 continue;
2900 // This could be a newly inserted instruction, in which case, we won't
2901 // find a value number, and should give up before we hurt ourselves.
2902 // FIXME: Rewrite the infrastructure to let it easier to value number
2903 // and process newly inserted instructions.
2904 if (!VN.exists(Op)) {
2905 success = false;
2906 break;
2907 }
2908 uint32_t TValNo =
2909 VN.phiTranslate(Pred, Curr, VN.lookup(Op), *this);
2910 if (Value *V = findLeader(Pred, TValNo)) {
2911 Instr->setOperand(i, V);
2912 } else {
2913 success = false;
2914 break;
2915 }
2916 }
2917
2918 // Fail out if we encounter an operand that is not available in
2919 // the PRE predecessor. This is typically because of loads which
2920 // are not value numbered precisely.
2921 if (!success)
2922 return false;
2923
2924 Instr->insertBefore(Pred->getTerminator());
2925 Instr->setName(Instr->getName() + ".pre");
2926 Instr->setDebugLoc(Instr->getDebugLoc());
2927
2928 ICF->insertInstructionTo(Instr, Pred);
2929
2930 unsigned Num = VN.lookupOrAdd(Instr);
2931 VN.add(Instr, Num);
2932
2933 // Update the availability map to include the new instruction.
2934 LeaderTable.insert(Num, Instr, Pred);
2935 return true;
2936}
2937
2938bool GVNPass::performScalarPRE(Instruction *CurInst) {
2939 if (isa<AllocaInst>(CurInst) || CurInst->isTerminator() ||
2940 isa<PHINode>(CurInst) || CurInst->getType()->isVoidTy() ||
2941 CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
2942 isa<DbgInfoIntrinsic>(CurInst))
2943 return false;
2944
2945 // Don't do PRE on compares. The PHI would prevent CodeGenPrepare from
2946 // sinking the compare again, and it would force the code generator to
2947 // move the i1 from processor flags or predicate registers into a general
2948 // purpose register.
2949 if (isa<CmpInst>(CurInst))
2950 return false;
2951
2952 // Don't do PRE on GEPs. The inserted PHI would prevent CodeGenPrepare from
2953 // sinking the addressing mode computation back to its uses. Extending the
2954 // GEP's live range increases the register pressure, and therefore it can
2955 // introduce unnecessary spills.
2956 //
2957 // This doesn't prevent Load PRE. PHI translation will make the GEP available
2958 // to the load by moving it to the predecessor block if necessary.
2959 if (isa<GetElementPtrInst>(CurInst))
2960 return false;
2961
2962 if (auto *CallB = dyn_cast<CallBase>(CurInst)) {
2963 // We don't currently value number ANY inline asm calls.
2964 if (CallB->isInlineAsm())
2965 return false;
2966 }
2967
2968 uint32_t ValNo = VN.lookup(CurInst);
2969
2970 // Look for the predecessors for PRE opportunities. We're
2971 // only trying to solve the basic diamond case, where
2972 // a value is computed in the successor and one predecessor,
2973 // but not the other. We also explicitly disallow cases
2974 // where the successor is its own predecessor, because they're
2975 // more complicated to get right.
2976 unsigned NumWith = 0;
2977 unsigned NumWithout = 0;
2978 BasicBlock *PREPred = nullptr;
2979 BasicBlock *CurrentBlock = CurInst->getParent();
2980
2981 // Update the RPO numbers for this function.
2982 if (InvalidBlockRPONumbers)
2983 assignBlockRPONumber(*CurrentBlock->getParent());
2984
2986 for (BasicBlock *P : predecessors(CurrentBlock)) {
2987 // We're not interested in PRE where blocks with predecessors that are
2988 // not reachable.
2989 if (!DT->isReachableFromEntry(P)) {
2990 NumWithout = 2;
2991 break;
2992 }
2993 // It is not safe to do PRE when P->CurrentBlock is a loop backedge.
2994 assert(BlockRPONumber.count(P) && BlockRPONumber.count(CurrentBlock) &&
2995 "Invalid BlockRPONumber map.");
2996 if (BlockRPONumber[P] >= BlockRPONumber[CurrentBlock]) {
2997 NumWithout = 2;
2998 break;
2999 }
3000
3001 uint32_t TValNo = VN.phiTranslate(P, CurrentBlock, ValNo, *this);
3002 Value *predV = findLeader(P, TValNo);
3003 if (!predV) {
3004 predMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P));
3005 PREPred = P;
3006 ++NumWithout;
3007 } else if (predV == CurInst) {
3008 /* CurInst dominates this predecessor. */
3009 NumWithout = 2;
3010 break;
3011 } else {
3012 predMap.push_back(std::make_pair(predV, P));
3013 ++NumWith;
3014 }
3015 }
3016
3017 // Don't do PRE when it might increase code size, i.e. when
3018 // we would need to insert instructions in more than one pred.
3019 if (NumWithout > 1 || NumWith == 0)
3020 return false;
3021
3022 // We may have a case where all predecessors have the instruction,
3023 // and we just need to insert a phi node. Otherwise, perform
3024 // insertion.
3025 Instruction *PREInstr = nullptr;
3026
3027 if (NumWithout != 0) {
3028 if (!isSafeToSpeculativelyExecute(CurInst)) {
3029 // It is only valid to insert a new instruction if the current instruction
3030 // is always executed. An instruction with implicit control flow could
3031 // prevent us from doing it. If we cannot speculate the execution, then
3032 // PRE should be prohibited.
3033 if (ICF->isDominatedByICFIFromSameBlock(CurInst))
3034 return false;
3035 }
3036
3037 // Don't do PRE across indirect branch.
3038 if (isa<IndirectBrInst>(PREPred->getTerminator()))
3039 return false;
3040
3041 // We can't do PRE safely on a critical edge, so instead we schedule
3042 // the edge to be split and perform the PRE the next time we iterate
3043 // on the function.
3044 unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock);
3045 if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) {
3046 toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum));
3047 return false;
3048 }
3049 // We need to insert somewhere, so let's give it a shot
3050 PREInstr = CurInst->clone();
3051 if (!performScalarPREInsertion(PREInstr, PREPred, CurrentBlock, ValNo)) {
3052 // If we failed insertion, make sure we remove the instruction.
3053#ifndef NDEBUG
3054 verifyRemoved(PREInstr);
3055#endif
3056 PREInstr->deleteValue();
3057 return false;
3058 }
3059 }
3060
3061 // Either we should have filled in the PRE instruction, or we should
3062 // not have needed insertions.
3063 assert(PREInstr != nullptr || NumWithout == 0);
3064
3065 ++NumGVNPRE;
3066
3067 // Create a PHI to make the value available in this block.
3068 PHINode *Phi = PHINode::Create(CurInst->getType(), predMap.size(),
3069 CurInst->getName() + ".pre-phi");
3070 Phi->insertBefore(CurrentBlock->begin());
3071 for (unsigned i = 0, e = predMap.size(); i != e; ++i) {
3072 if (Value *V = predMap[i].first) {
3073 // If we use an existing value in this phi, we have to patch the original
3074 // value because the phi will be used to replace a later value.
3075 patchReplacementInstruction(CurInst, V);
3076 Phi->addIncoming(V, predMap[i].second);
3077 } else
3078 Phi->addIncoming(PREInstr, PREPred);
3079 }
3080
3081 VN.add(Phi, ValNo);
3082 // After creating a new PHI for ValNo, the phi translate result for ValNo will
3083 // be changed, so erase the related stale entries in phi translate cache.
3084 VN.eraseTranslateCacheEntry(ValNo, *CurrentBlock);
3085 LeaderTable.insert(ValNo, Phi, CurrentBlock);
3086 Phi->setDebugLoc(CurInst->getDebugLoc());
3087 CurInst->replaceAllUsesWith(Phi);
3088 if (MD && Phi->getType()->isPtrOrPtrVectorTy())
3090 VN.erase(CurInst);
3091 LeaderTable.erase(ValNo, CurInst, CurrentBlock);
3092
3093 LLVM_DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
3094 removeInstruction(CurInst);
3095 ++NumGVNInstr;
3096
3097 return true;
3098}
3099
3100/// Perform a purely local form of PRE that looks for diamond
3101/// control flow patterns and attempts to perform simple PRE at the join point.
3102bool GVNPass::performPRE(Function &F) {
3103 bool Changed = false;
3104 for (BasicBlock *CurrentBlock : depth_first(&F.getEntryBlock())) {
3105 // Nothing to PRE in the entry block.
3106 if (CurrentBlock == &F.getEntryBlock())
3107 continue;
3108
3109 // Don't perform PRE on an EH pad.
3110 if (CurrentBlock->isEHPad())
3111 continue;
3112
3113 for (BasicBlock::iterator BI = CurrentBlock->begin(),
3114 BE = CurrentBlock->end();
3115 BI != BE;) {
3116 Instruction *CurInst = &*BI++;
3117 Changed |= performScalarPRE(CurInst);
3118 }
3119 }
3120
3121 if (splitCriticalEdges())
3122 Changed = true;
3123
3124 return Changed;
3125}
3126
3127/// Split the critical edge connecting the given two blocks, and return
3128/// the block inserted to the critical edge.
3129BasicBlock *GVNPass::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
3130 // GVN does not require loop-simplify, do not try to preserve it if it is not
3131 // possible.
3133 Pred, Succ,
3134 CriticalEdgeSplittingOptions(DT, LI, MSSAU).unsetPreserveLoopSimplify());
3135 if (BB) {
3136 if (MD)
3138 InvalidBlockRPONumbers = true;
3139 }
3140 return BB;
3141}
3142
3143/// Split critical edges found during the previous
3144/// iteration that may enable further optimization.
3145bool GVNPass::splitCriticalEdges() {
3146 if (toSplit.empty())
3147 return false;
3148
3149 bool Changed = false;
3150 do {
3151 std::pair<Instruction *, unsigned> Edge = toSplit.pop_back_val();
3152 Changed |= SplitCriticalEdge(Edge.first, Edge.second,
3153 CriticalEdgeSplittingOptions(DT, LI, MSSAU)) !=
3154 nullptr;
3155 } while (!toSplit.empty());
3156 if (Changed) {
3157 if (MD)
3159 InvalidBlockRPONumbers = true;
3160 }
3161 return Changed;
3162}
3163
3164/// Executes one iteration of GVN
3165bool GVNPass::iterateOnFunction(Function &F) {
3166 cleanupGlobalSets();
3167
3168 // Top-down walk of the dominator tree
3169 bool Changed = false;
3170 // Needed for value numbering with phi construction to work.
3171 // RPOT walks the graph in its constructor and will not be invalidated during
3172 // processBlock.
3174
3175 for (BasicBlock *BB : RPOT)
3176 Changed |= processBlock(BB);
3177
3178 return Changed;
3179}
3180
3181void GVNPass::cleanupGlobalSets() {
3182 VN.clear();
3183 LeaderTable.clear();
3184 BlockRPONumber.clear();
3185 ICF->clear();
3186 InvalidBlockRPONumbers = true;
3187}
3188
3189void GVNPass::removeInstruction(Instruction *I) {
3190 if (MD) MD->removeInstruction(I);
3191 if (MSSAU)
3192 MSSAU->removeMemoryAccess(I);
3193#ifndef NDEBUG
3194 verifyRemoved(I);
3195#endif
3196 ICF->removeInstruction(I);
3197 I->eraseFromParent();
3198}
3199
3200/// Verify that the specified instruction does not occur in our
3201/// internal data structures.
3202void GVNPass::verifyRemoved(const Instruction *Inst) const {
3203 VN.verifyRemoved(Inst);
3204 LeaderTable.verifyRemoved(Inst);
3205}
3206
3207/// BB is declared dead, which implied other blocks become dead as well. This
3208/// function is to add all these blocks to "DeadBlocks". For the dead blocks'
3209/// live successors, update their phi nodes by replacing the operands
3210/// corresponding to dead blocks with UndefVal.
3211void GVNPass::addDeadBlock(BasicBlock *BB) {
3214
3215 NewDead.push_back(BB);
3216 while (!NewDead.empty()) {
3217 BasicBlock *D = NewDead.pop_back_val();
3218 if (DeadBlocks.count(D))
3219 continue;
3220
3221 // All blocks dominated by D are dead.
3223 DT->getDescendants(D, Dom);
3224 DeadBlocks.insert(Dom.begin(), Dom.end());
3225
3226 // Figure out the dominance-frontier(D).
3227 for (BasicBlock *B : Dom) {
3228 for (BasicBlock *S : successors(B)) {
3229 if (DeadBlocks.count(S))
3230 continue;
3231
3232 bool AllPredDead = true;
3233 for (BasicBlock *P : predecessors(S))
3234 if (!DeadBlocks.count(P)) {
3235 AllPredDead = false;
3236 break;
3237 }
3238
3239 if (!AllPredDead) {
3240 // S could be proved dead later on. That is why we don't update phi
3241 // operands at this moment.
3242 DF.insert(S);
3243 } else {
3244 // While S is not dominated by D, it is dead by now. This could take
3245 // place if S already have a dead predecessor before D is declared
3246 // dead.
3247 NewDead.push_back(S);
3248 }
3249 }
3250 }
3251 }
3252
3253 // For the dead blocks' live successors, update their phi nodes by replacing
3254 // the operands corresponding to dead blocks with UndefVal.
3255 for (BasicBlock *B : DF) {
3256 if (DeadBlocks.count(B))
3257 continue;
3258
3259 // First, split the critical edges. This might also create additional blocks
3260 // to preserve LoopSimplify form and adjust edges accordingly.
3262 for (BasicBlock *P : Preds) {
3263 if (!DeadBlocks.count(P))
3264 continue;
3265
3267 isCriticalEdge(P->getTerminator(), B)) {
3268 if (BasicBlock *S = splitCriticalEdges(P, B))
3269 DeadBlocks.insert(P = S);
3270 }
3271 }
3272
3273 // Now poison the incoming values from the dead predecessors.
3274 for (BasicBlock *P : predecessors(B)) {
3275 if (!DeadBlocks.count(P))
3276 continue;
3277 for (PHINode &Phi : B->phis()) {
3278 Phi.setIncomingValueForBlock(P, PoisonValue::get(Phi.getType()));
3279 if (MD)
3281 }
3282 }
3283 }
3284}
3285
3286// If the given branch is recognized as a foldable branch (i.e. conditional
3287// branch with constant condition), it will perform following analyses and
3288// transformation.
3289// 1) If the dead out-coming edge is a critical-edge, split it. Let
3290// R be the target of the dead out-coming edge.
3291// 1) Identify the set of dead blocks implied by the branch's dead outcoming
3292// edge. The result of this step will be {X| X is dominated by R}
3293// 2) Identify those blocks which haves at least one dead predecessor. The
3294// result of this step will be dominance-frontier(R).
3295// 3) Update the PHIs in DF(R) by replacing the operands corresponding to
3296// dead blocks with "UndefVal" in an hope these PHIs will optimized away.
3297//
3298// Return true iff *NEW* dead code are found.
3299bool GVNPass::processFoldableCondBr(BranchInst *BI) {
3300 if (!BI || BI->isUnconditional())
3301 return false;
3302
3303 // If a branch has two identical successors, we cannot declare either dead.
3304 if (BI->getSuccessor(0) == BI->getSuccessor(1))
3305 return false;
3306
3307 ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
3308 if (!Cond)
3309 return false;
3310
3311 BasicBlock *DeadRoot =
3312 Cond->getZExtValue() ? BI->getSuccessor(1) : BI->getSuccessor(0);
3313 if (DeadBlocks.count(DeadRoot))
3314 return false;
3315
3316 if (!DeadRoot->getSinglePredecessor())
3317 DeadRoot = splitCriticalEdges(BI->getParent(), DeadRoot);
3318
3319 addDeadBlock(DeadRoot);
3320 return true;
3321}
3322
3323// performPRE() will trigger assert if it comes across an instruction without
3324// associated val-num. As it normally has far more live instructions than dead
3325// instructions, it makes more sense just to "fabricate" a val-number for the
3326// dead code than checking if instruction involved is dead or not.
3327void GVNPass::assignValNumForDeadCode() {
3328 for (BasicBlock *BB : DeadBlocks) {
3329 for (Instruction &Inst : *BB) {
3330 unsigned ValNum = VN.lookupOrAdd(&Inst);
3331 LeaderTable.insert(ValNum, &Inst, BB);
3332 }
3333 }
3334}
3335
3337public:
3338 static char ID; // Pass identification, replacement for typeid
3339
3340 explicit GVNLegacyPass(bool NoMemDepAnalysis = !GVNEnableMemDep)
3341 : FunctionPass(ID), Impl(GVNOptions().setMemDep(!NoMemDepAnalysis)) {
3343 }
3344
3345 bool runOnFunction(Function &F) override {
3346 if (skipFunction(F))
3347 return false;
3348
3349 auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>();
3350 return Impl.runImpl(
3351 F, getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
3352 getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
3353 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F),
3354 getAnalysis<AAResultsWrapperPass>().getAAResults(),
3355 Impl.isMemDepEnabled()
3356 ? &getAnalysis<MemoryDependenceWrapperPass>().getMemDep()
3357 : nullptr,
3358 getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
3359 &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(),
3360 MSSAWP ? &MSSAWP->getMSSA() : nullptr);
3361 }
3362
3363 void getAnalysisUsage(AnalysisUsage &AU) const override {
3368 if (Impl.isMemDepEnabled())
3377 }
3378
3379private:
3380 GVNPass Impl;
3381};
3382
3383char GVNLegacyPass::ID = 0;
3384
3385INITIALIZE_PASS_BEGIN(GVNLegacyPass, "gvn", "Global Value Numbering", false, false)
3394
3395// The public interface to this file...
3396FunctionPass *llvm::createGVNPass(bool NoMemDepAnalysis) {
3397 return new GVNLegacyPass(NoMemDepAnalysis);
3398}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:533
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static RegisterPass< DebugifyFunctionPass > DF("debugify-function", "Attach debug info to a function")
This file defines the DenseMap class.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
static bool hasUsersIn(Value *V, BasicBlock *BB)
Definition: GVN.cpp:2045
static void reportMayClobberedLoad(LoadInst *Load, MemDepResult DepInfo, DominatorTree *DT, OptimizationRemarkEmitter *ORE)
Try to locate the three instruction involved in a missed load-elimination case that is due to an inte...
Definition: GVN.cpp:1151
static void reportLoadElim(LoadInst *Load, Value *AvailableValue, OptimizationRemarkEmitter *ORE)
Definition: GVN.cpp:1888
static cl::opt< uint32_t > MaxNumInsnsPerBlock("gvn-max-num-insns", cl::Hidden, cl::init(100), cl::desc("Max number of instructions to scan in each basic block in GVN " "(default = 100)"))
static cl::opt< bool > GVNEnableMemDep("enable-gvn-memdep", cl::init(true))
static cl::opt< bool > GVNEnableLoadInLoopPRE("enable-load-in-loop-pre", cl::init(true))
static cl::opt< uint32_t > MaxNumDeps("gvn-max-num-deps", cl::Hidden, cl::init(100), cl::desc("Max number of dependences to attempt Load PRE (default = 100)"))
static Value * ConstructSSAForLoadSet(LoadInst *Load, SmallVectorImpl< AvailableValueInBlock > &ValuesPerBlock, GVNPass &gvn)
Given a set of loads specified by ValuesPerBlock, construct SSA form, allowing us to eliminate Load.
Definition: GVN.cpp:1027
static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E, DominatorTree *DT)
There is an edge from 'Src' to 'Dst'.
Definition: GVN.cpp:2410
static bool impliesEquivalanceIfFalse(CmpInst *Cmp)
Definition: GVN.cpp:2018
static bool IsValueFullyAvailableInBlock(BasicBlock *BB, DenseMap< BasicBlock *, AvailabilityState > &FullyAvailableBlocks)
Return true if we can prove that the value we're analyzing is fully available in the specified block.
Definition: GVN.cpp:892
static Value * findDominatingValue(const MemoryLocation &Loc, Type *LoadTy, Instruction *From, AAResults *AA)
Definition: GVN.cpp:1213
static bool isLifetimeStart(const Instruction *Inst)
Definition: GVN.cpp:1132
static cl::opt< bool > GVNEnableSplitBackedgeInLoadPRE("enable-split-backedge-in-load-pre", cl::init(false))
static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl)
Definition: GVN.cpp:2192
static void replaceValuesPerBlockEntry(SmallVectorImpl< AvailableValueInBlock > &ValuesPerBlock, Value *OldValue, Value *NewValue)
If the specified OldValue exists in ValuesPerBlock, replace its value with NewValue.
Definition: GVN.cpp:1008
static bool impliesEquivalanceIfTrue(CmpInst *Cmp)
Definition: GVN.cpp:1992
AvailabilityState
Definition: GVN.cpp:872
@ Unavailable
We know the block is not fully available. This is a fixpoint.
@ Available
We know the block is fully available. This is a fixpoint.
@ SpeculativelyAvailable
We do not know whether the block is fully available or not, but we are currently speculating that it ...
static cl::opt< bool > GVNEnablePRE("enable-pre", cl::init(true), cl::Hidden)
static cl::opt< uint32_t > MaxNumVisitedInsts("gvn-max-num-visited-insts", cl::Hidden, cl::init(100), cl::desc("Max number of visited instructions when trying to find " "dominating value of select dependency (default = 100)"))
static cl::opt< uint32_t > MaxBBSpeculations("gvn-max-block-speculations", cl::Hidden, cl::init(600), cl::desc("Max number of blocks we're willing to speculate on (and recurse " "into) when deducing if a value is fully available or not in GVN " "(default = 600)"))
static bool liesBetween(const Instruction *From, Instruction *Between, const Instruction *To, DominatorTree *DT)
Assuming To can be reached from both From and Between, does Between lie on every path from From to To...
Definition: GVN.cpp:1140
static cl::opt< bool > GVNEnableLoadPRE("enable-load-pre", cl::init(true))
This file provides the interface for LLVM's Global Value Numbering pass which eliminates fully redund...
#define DEBUG_TYPE
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
IRTranslator LLVM IR MI
This defines the Use class.
static bool lookup(const GsymReader &GR, DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr, uint64_t Addr, SourceLocations &SrcLocs, llvm::Error &Err)
A Lookup helper functions.
Definition: InlineInfo.cpp:109
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:512
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
This file contains the declarations for metadata subclasses.
Module.h This file contains the declarations for the Module class.
uint64_t IntrinsicInst * II
#define P(N)
ppc ctr loops PowerPC CTR Loops Verify
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
Definition: APInt.h:77
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:424
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:405
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
iterator begin() const
Definition: ArrayRef.h:153
This represents the llvm.assume intrinsic.
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
const BasicBlock * getEnd() const
Definition: Dominators.h:112
const BasicBlock * getStart() const
Definition: Dominators.h:108
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:461
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:367
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:459
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:168
bool isEHPad() const
Return true if this basic block is an exception handling block.
Definition: BasicBlock.h:675
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Value * getRHS() const
Instruction::BinaryOps getBinaryOp() const
Returns the binary operation underlying the intrinsic.
Value * getLHS() const
Conditional or Unconditional Branch instruction.
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1410
unsigned arg_size() const
Definition: InstrTypes.h:1408
This class represents a function call, abstracting a target machine's calling convention.
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:747
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition: InstrTypes.h:1104
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:760
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:765
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:768
@ ICMP_EQ
equal
Definition: InstrTypes.h:778
@ ICMP_NE
not equal
Definition: InstrTypes.h:779
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:773
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:909
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:218
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:850
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:857
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:194
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Definition: DenseMap.h:226
iterator end()
Definition: DenseMap.h:84
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
void getDescendants(NodeT *R, SmallVectorImpl< NodeT * > &Result) const
Get all nodes dominated by R, including R itself.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:317
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
Class representing an expression and its matching format.
This instruction extracts a struct member or array element value from an aggregate value.
unsigned getNumIndices() const
iterator_range< idx_iterator > indices() const
idx_iterator idx_begin() const
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
Represents calls to the gc.relocate intrinsic.
This class holds the mapping between values and value numbers.
Definition: GVN.h:151
uint32_t lookupOrAddCmp(unsigned Opcode, CmpInst::Predicate Pred, Value *LHS, Value *RHS)
Returns the value number of the given comparison, assigning it a new number if it did not have one be...
Definition: GVN.cpp:693
uint32_t getNextUnusedValueNumber()
Definition: GVN.h:213
uint32_t lookupOrAdd(Value *V)
lookup_or_add - Returns the value number for the specified value, assigning it a new number if it did...
Definition: GVN.cpp:601
uint32_t lookup(Value *V, bool Verify=true) const
Returns the value number of the specified value.
Definition: GVN.cpp:680
void setAliasAnalysis(AAResults *A)
Definition: GVN.h:209
void clear()
Remove all entries from the ValueTable.
Definition: GVN.cpp:701
bool exists(Value *V) const
Returns true if a value number exists for the specified value.
Definition: GVN.cpp:595
uint32_t phiTranslate(const BasicBlock *BB, const BasicBlock *PhiBlock, uint32_t Num, GVNPass &Gvn)
Wrap phiTranslateImpl to provide caching functionality.
Definition: GVN.cpp:2276
void setMemDep(MemoryDependenceResults *M)
Definition: GVN.h:211
void erase(Value *v)
Remove a value from the value numbering.
Definition: GVN.cpp:713
void add(Value *V, uint32_t num)
add - Insert a value into the table with a specified value number.
Definition: GVN.cpp:464
void setDomTree(DominatorTree *D)
Definition: GVN.h:212
void eraseTranslateCacheEntry(uint32_t Num, const BasicBlock &CurrBlock)
Erase stale entry from phiTranslate cache so phiTranslate can be computed again.
Definition: GVN.cpp:2379
void verifyRemoved(const Value *) const
verifyRemoved - Verify that the value is removed from all internal data structures.
Definition: GVN.cpp:723
The core GVN pass object.
Definition: GVN.h:117
friend class gvn::GVNLegacyPass
Definition: GVN.h:218
bool isPREEnabled() const
Definition: GVN.cpp:795
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
Definition: GVN.cpp:816
AAResults * getAliasAnalysis() const
Definition: GVN.h:139
bool isLoadPREEnabled() const
Definition: GVN.cpp:799
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Definition: GVN.cpp:843
DominatorTree & getDominatorTree() const
Definition: GVN.h:138
bool isLoadInLoopPREEnabled() const
Definition: GVN.cpp:803
bool isLoadPRESplitBackedgeEnabled() const
Definition: GVN.cpp:807
void markInstructionForDeletion(Instruction *I)
This removes the specified instruction from our various maps and marks it for deletion.
Definition: GVN.h:133
bool isMemDepEnabled() const
Definition: GVN.cpp:812
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:915
Legacy wrapper pass to provide the GlobalsAAResult object.
This class allows to keep track on instructions with implicit control flow.
bool isDominatedByICFIFromSameBlock(const Instruction *Insn)
Returns true if the first ICFI of Insn's block exists and dominates Insn.
bool hasICF(const BasicBlock *BB)
Returns true if at least one instruction from the given basic block has implicit control flow.
void clear()
Invalidates all information from this tracking.
void removeUsersOf(const Instruction *Inst)
Notifies this tracking that we are going to replace all uses of Inst.
void insertInstructionTo(const Instruction *Inst, const BasicBlock *BB)
Notifies this tracking that we are going to insert a new instruction Inst to the basic block BB.
void removeInstruction(const Instruction *Inst)
Notifies this tracking that we are going to remove the instruction Inst It makes all necessary update...
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:466
const Instruction * getPrevNonDebugInstruction(bool SkipPseudoOp=false) const
Return a pointer to the previous non-debug instruction in the same basic block as 'this',...
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
Definition: Instruction.h:363
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:824
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool isTerminator() const
Definition: Instruction.h:277
void dropUnknownNonDebugMetadata(ArrayRef< unsigned > KnownIDs=std::nullopt)
Drop all unknown metadata except for debug locations.
Definition: Metadata.cpp:1596
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:174
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:566
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:593
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
iterator end()
Definition: MapVector.h:71
iterator find(const KeyT &Key)
Definition: MapVector.h:167
size_type size() const
Definition: MapVector.h:60
A memory dependence query can return one of three different answers.
bool isClobber() const
Tests if this MemDepResult represents a query that is an instruction clobber dependency.
bool isNonLocal() const
Tests if this MemDepResult represents a query that is transparent to the start of the block,...
bool isDef() const
Tests if this MemDepResult represents a query that is an instruction definition dependency.
bool isLocal() const
Tests if this MemDepResult represents a valid local query (Clobber/Def).
Instruction * getInst() const
If this is a normal dependency, returns the instruction that is depended on.
This is the common base class for memset/memcpy/memmove.
An analysis that produces MemoryDependenceResults for a function.
Provides a lazy, caching interface for making common memory aliasing information queries,...
std::vector< NonLocalDepEntry > NonLocalDepInfo
void invalidateCachedPredecessors()
Clears the PredIteratorCache info.
void invalidateCachedPointerInfo(Value *Ptr)
Invalidates cached information about the specified pointer, because it may be too conservative in mem...
std::optional< int32_t > getClobberOffset(LoadInst *DepInst) const
Return the clobber offset to dependent instruction.
void removeInstruction(Instruction *InstToRemove)
Removes an instruction from the dependence analysis, updating the dependence of instructions that pre...
MemDepResult getDependency(Instruction *QueryInst)
Returns the instruction on which a memory operation depends.
const NonLocalDepInfo & getNonLocalCallDependency(CallBase *QueryCall)
Perform a full dependency query for the specified call, returning the set of blocks that the value is...
void getNonLocalPointerDependency(Instruction *QueryInst, SmallVectorImpl< NonLocalDepResult > &Result)
Perform a full dependency query for an access to the QueryInst's specified memory location,...
A wrapper analysis pass for the legacy pass manager that exposes a MemoryDepnedenceResults instance.
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
const Value * Ptr
The address of the start of the location.
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:924
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
MemoryUseOrDef * createMemoryAccessBefore(Instruction *I, MemoryAccess *Definition, MemoryUseOrDef *InsertPt)
Create a MemoryAccess in MemorySSA before an existing MemoryAccess.
void insertDef(MemoryDef *Def, bool RenameUses=false)
Insert a definition into the MemorySSA IR.
MemoryAccess * createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, MemorySSA::InsertionPlace Point)
Create a MemoryAccess in MemorySSA at a specified point in a block.
void insertUse(MemoryUse *Use, bool RenameUses=false)
void removeMemoryAccess(MemoryAccess *, bool OptimizePhis=false)
Remove a MemoryAccess from MemorySSA, including updating all definitions and uses.
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:981
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:697
const AccessList * getBlockAccesses(const BasicBlock *BB) const
Return the list of MemoryAccess's for a given basic block.
Definition: MemorySSA.h:755
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1905
Class that has the common methods + fields of memory uses/defs.
Definition: MemorySSA.h:249
This is an entry in the NonLocalDepInfo cache.
OptimizationRemarkEmitter legacy analysis pass.
The optimization diagnostic interface.
bool allowExtraAnalysis(StringRef PassName) const
Whether we allow for extra compile-time budget to perform more analysis to produce fewer false positi...
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
PHITransAddr - An address value which tracks and handles phi translation.
Definition: PHITransAddr.h:35
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1852
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:131
Helper class for SSA formation on a set of values defined in multiple blocks.
Definition: SSAUpdater.h:40
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
Definition: SSAUpdater.cpp:53
Value * GetValueInMiddleOfBlock(BasicBlock *BB)
Construct SSA form, materializing a value that is live in the middle of the specified block.
Definition: SSAUpdater.cpp:98
bool HasValueForBlock(BasicBlock *BB) const
Return true if the SSAUpdater already has a value for the specified block.
Definition: SSAUpdater.cpp:62
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
Definition: SSAUpdater.cpp:70
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, Instruction *MDFrom=nullptr)
const Value * getCondition() const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:502
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
bool empty() const
Definition: SmallSet.h:159
bool erase(const T &V)
Definition: SmallSet.h:207
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
iterator erase(const_iterator CI)
Definition: SmallVector.h:750
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
SmallVector & operator=(const SmallVector &RHS)
Definition: SmallVector.h:1254
An instruction for storing to memory.
Definition: Instructions.h:290
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Multiway switch.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static IntegerType * getInt8Ty(LLVMContext &C)
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition: Type.h:258
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:224
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1833
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
op_range operands()
Definition: User.h:242
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
bool canBeFreed() const
Return true if the memory object referred to by V can by freed in the scope for which the SSA value d...
Definition: Value.cpp:790
void deleteValue()
Delete a pointer to a generic Value.
Definition: Value.cpp:110
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
Represents an op.with.overflow intrinsic.
An efficient, type-erasing, non-owning reference to a callable.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: GVN.cpp:3363
GVNLegacyPass(bool NoMemDepAnalysis=!GVNEnableMemDep)
Definition: GVN.cpp:3340
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
Definition: GVN.cpp:3345
An opaque object representing a hash code.
Definition: Hashing.h:75
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
Value * getValueForLoad(Value *SrcVal, unsigned Offset, Type *LoadTy, Instruction *InsertPt, const DataLayout &DL)
If analyzeLoadFromClobberingStore/Load returned an offset, this function can be used to actually perf...
Definition: VNCoercion.cpp:335
int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr, StoreInst *DepSI, const DataLayout &DL)
This function determines whether a value for the pointer LoadPtr can be extracted from the store at D...
Definition: VNCoercion.cpp:211
Value * getMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, Type *LoadTy, Instruction *InsertPt, const DataLayout &DL)
If analyzeLoadFromClobberingMemInst returned an offset, this function can be used to actually perform...
Definition: VNCoercion.cpp:360
int analyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI, const DataLayout &DL)
This function determines whether a value for the pointer LoadPtr can be extracted from the load at De...
Definition: VNCoercion.cpp:232
int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, MemIntrinsic *DepMI, const DataLayout &DL)
This function determines whether a value for the pointer LoadPtr can be extracted from the memory int...
Definition: VNCoercion.cpp:246
bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy, const DataLayout &DL)
Return true if CoerceAvailableValueToLoadType would succeed if it was called.
Definition: VNCoercion.cpp:18
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
A private "module" namespace for types and utilities used by GVN.
Definition: GVN.h:59
constexpr double e
Definition: MathExtras.h:47
NodeAddr< InstrNode * > Instr
Definition: RDFGraph.h:389
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
pred_iterator pred_end(BasicBlock *BB)
Definition: CFG.h:114
@ Offset
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
hash_code hash_value(const FixedPointSemantics &Val)
Definition: APFixedPoint.h:127
Constant * getInitialValueOfAllocation(const Value *V, const TargetLibraryInfo *TLI, Type *Ty)
If this is a call to an allocation function that initializes memory to a fixed value,...
unsigned replaceDominatedUsesWithIf(Value *From, Value *To, DominatorTree &DT, const BasicBlockEdge &Edge, function_ref< bool(const Use &U, const Value *To)> ShouldReplace)
Replace each use of 'From' with 'To' if that use is dominated by the given edge and the callback Shou...
Definition: Local.cpp:3537
unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ)
Search for the specified successor of basic block BB and return its position in the terminator instru...
Definition: CFG.cpp:79
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition: Utils.cpp:1678
auto successors(const MachineBasicBlock *BB)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2098
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
bool isAssumeWithEmptyBundle(const AssumeInst &Assume)
Return true iff the operand bundles of the provided llvm.assume doesn't contain any valuable informat...
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
void initializeGVNLegacyPassPass(PassRegistry &)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:400
pred_iterator pred_begin(BasicBlock *BB)
Definition: CFG.h:110
bool canReplacePointersInUseIfEqual(const Use &U, const Value *To, const DataLayout &DL)
Definition: Loads.cpp:774
bool canReplacePointersIfEqual(const Value *From, const Value *To, const DataLayout &DL)
Returns true if a pointer value From can be replaced with another pointer value \To if they are deeme...
Definition: Loads.cpp:786
bool isModSet(const ModRefInfo MRI)
Definition: ModRef.h:48
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void patchReplacementInstruction(Instruction *I, Value *Repl)
Patch the replacement so that it is not more restrictive than the value being replaced.
Definition: Local.cpp:3453
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT, const BasicBlockEdge &Edge)
Replace each use of 'From' with 'To' if that use is dominated by the given edge.
Definition: Local.cpp:3519
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
@ Global
Append to llvm.global_dtors.
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:3379
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:84
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
bool salvageKnowledge(Instruction *I, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Calls BuildAssumeFromInst and if the resulting llvm.assume is valid insert if before I.
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
FunctionPass * createGVNPass(bool NoMemDepAnalysis=false)
Create a legacy GVN pass.
Definition: GVN.cpp:3396
bool isCriticalEdge(const Instruction *TI, unsigned SuccNum, bool AllowIdenticalEdges=false)
Return true if the specified edge is a critical edge.
Definition: CFG.cpp:95
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1886
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
iterator_range< df_iterator< T > > depth_first(const T &G)
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition: Hashing.h:593
bool EliminateDuplicatePHINodes(BasicBlock *BB)
Check for and eliminate duplicate PHI nodes in this block.
Definition: Local.cpp:1485
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:471
bool isPotentiallyReachable(const Instruction *From, const Instruction *To, const SmallPtrSetImpl< BasicBlock * > *ExclusionSet=nullptr, const DominatorTree *DT=nullptr, const LoopInfo *LI=nullptr)
Determine whether instruction 'To' is reachable from 'From', without passing through any blocks in Ex...
Definition: CFG.cpp:281
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:760
Option class for critical edge splitting.
static GVNPass::Expression getTombstoneKey()
Definition: GVN.cpp:171
static bool isEqual(const GVNPass::Expression &LHS, const GVNPass::Expression &RHS)
Definition: GVN.cpp:179
static unsigned getHashValue(const GVNPass::Expression &e)
Definition: GVN.cpp:173
static GVNPass::Expression getEmptyKey()
Definition: GVN.cpp:170
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: DenseMapInfo.h:52
A set of parameters to control various transforms performed by GVN pass.
Definition: GVN.h:74
std::optional< bool > AllowLoadPRESplitBackedge
Definition: GVN.h:78
std::optional< bool > AllowPRE
Definition: GVN.h:75
std::optional< bool > AllowLoadInLoopPRE
Definition: GVN.h:77
std::optional< bool > AllowMemDep
Definition: GVN.h:79
std::optional< bool > AllowLoadPRE
Definition: GVN.h:76
SmallVector< uint32_t, 4 > varargs
Definition: GVN.cpp:144
bool operator==(const Expression &other) const
Definition: GVN.cpp:148
friend hash_code hash_value(const Expression &Value)
Definition: GVN.cpp:160
Expression(uint32_t o=~2U)
Definition: GVN.cpp:146
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:69
Represents an AvailableValue which can be rematerialized at the end of the associated BasicBlock.
Definition: GVN.cpp:288
static AvailableValueInBlock get(BasicBlock *BB, Value *V, unsigned Offset=0)
Definition: GVN.cpp:302
static AvailableValueInBlock getUndef(BasicBlock *BB)
Definition: GVN.cpp:307
static AvailableValueInBlock get(BasicBlock *BB, AvailableValue &&AV)
Definition: GVN.cpp:295
Value * MaterializeAdjustedValue(LoadInst *Load, GVNPass &gvn) const
Emit code at the end of this block to adjust the value defined here to the specified type.
Definition: GVN.cpp:318
AvailableValue AV
AV - The actual available value.
Definition: GVN.cpp:293
static AvailableValueInBlock getSelect(BasicBlock *BB, SelectInst *Sel, Value *V1, Value *V2)
Definition: GVN.cpp:311
BasicBlock * BB
BB - The basic block in question.
Definition: GVN.cpp:290
Represents a particular available value that we know how to materialize.
Definition: GVN.cpp:191
unsigned Offset
Offset - The byte offset in Val that is interesting for the load query.
Definition: GVN.cpp:208
bool isSimpleValue() const
Definition: GVN.cpp:254
static AvailableValue getSelect(SelectInst *Sel, Value *V1, Value *V2)
Definition: GVN.cpp:244
bool isCoercedLoadValue() const
Definition: GVN.cpp:255
static AvailableValue get(Value *V, unsigned Offset=0)
Definition: GVN.cpp:212
ValType Kind
Kind of the live-out value.
Definition: GVN.cpp:205
LoadInst * getCoercedLoadValue() const
Definition: GVN.cpp:265
static AvailableValue getLoad(LoadInst *Load, unsigned Offset=0)
Definition: GVN.cpp:228
static AvailableValue getMI(MemIntrinsic *MI, unsigned Offset=0)
Definition: GVN.cpp:220
bool isUndefValue() const
Definition: GVN.cpp:257
bool isSelectValue() const
Definition: GVN.cpp:258
Value * Val
Val - The value that is live out of the block.
Definition: GVN.cpp:203
Value * V1
V1, V2 - The dominating non-clobbered values of SelectVal.
Definition: GVN.cpp:210
Value * MaterializeAdjustedValue(LoadInst *Load, Instruction *InsertPt, GVNPass &gvn) const
Emit code at the specified insertion point to adjust the value defined here to the specified type.
Definition: GVN.cpp:1070
static AvailableValue getUndef()
Definition: GVN.cpp:236
SelectInst * getSelectValue() const
Definition: GVN.cpp:275
Value * getSimpleValue() const
Definition: GVN.cpp:260
bool isMemIntrinValue() const
Definition: GVN.cpp:256
MemIntrinsic * getMemIntrinValue() const
Definition: GVN.cpp:270