LLVM 17.0.0git
EarlyCSE.cpp
Go to the documentation of this file.
1//===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs a simple dominator tree walk that eliminates trivially
10// redundant instructions.
11//
12//===----------------------------------------------------------------------===//
13
16#include "llvm/ADT/Hashing.h"
17#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/Statistic.h"
30#include "llvm/IR/BasicBlock.h"
31#include "llvm/IR/Constants.h"
32#include "llvm/IR/Dominators.h"
33#include "llvm/IR/Function.h"
34#include "llvm/IR/InstrTypes.h"
35#include "llvm/IR/Instruction.h"
38#include "llvm/IR/LLVMContext.h"
39#include "llvm/IR/PassManager.h"
41#include "llvm/IR/Type.h"
42#include "llvm/IR/Value.h"
44#include "llvm/Pass.h"
48#include "llvm/Support/Debug.h"
55#include <cassert>
56#include <deque>
57#include <memory>
58#include <utility>
59
60using namespace llvm;
61using namespace llvm::PatternMatch;
62
63#define DEBUG_TYPE "early-cse"
64
65STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
66STATISTIC(NumCSE, "Number of instructions CSE'd");
67STATISTIC(NumCSECVP, "Number of compare instructions CVP'd");
68STATISTIC(NumCSELoad, "Number of load instructions CSE'd");
69STATISTIC(NumCSECall, "Number of call instructions CSE'd");
70STATISTIC(NumDSE, "Number of trivial dead stores removed");
71
72DEBUG_COUNTER(CSECounter, "early-cse",
73 "Controls which instructions are removed");
74
76 "earlycse-mssa-optimization-cap", cl::init(500), cl::Hidden,
77 cl::desc("Enable imprecision in EarlyCSE in pathological cases, in exchange "
78 "for faster compile. Caps the MemorySSA clobbering calls."));
79
81 "earlycse-debug-hash", cl::init(false), cl::Hidden,
82 cl::desc("Perform extra assertion checking to verify that SimpleValue's hash "
83 "function is well-behaved w.r.t. its isEqual predicate"));
84
85//===----------------------------------------------------------------------===//
86// SimpleValue
87//===----------------------------------------------------------------------===//
88
89namespace {
90
91/// Struct representing the available values in the scoped hash table.
92struct SimpleValue {
93 Instruction *Inst;
94
95 SimpleValue(Instruction *I) : Inst(I) {
96 assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
97 }
98
99 bool isSentinel() const {
102 }
103
104 static bool canHandle(Instruction *Inst) {
105 // This can only handle non-void readnone functions.
106 // Also handled are constrained intrinsic that look like the types
107 // of instruction handled below (UnaryOperator, etc.).
108 if (CallInst *CI = dyn_cast<CallInst>(Inst)) {
109 if (Function *F = CI->getCalledFunction()) {
110 switch ((Intrinsic::ID)F->getIntrinsicID()) {
111 case Intrinsic::experimental_constrained_fadd:
112 case Intrinsic::experimental_constrained_fsub:
113 case Intrinsic::experimental_constrained_fmul:
114 case Intrinsic::experimental_constrained_fdiv:
115 case Intrinsic::experimental_constrained_frem:
116 case Intrinsic::experimental_constrained_fptosi:
117 case Intrinsic::experimental_constrained_sitofp:
118 case Intrinsic::experimental_constrained_fptoui:
119 case Intrinsic::experimental_constrained_uitofp:
120 case Intrinsic::experimental_constrained_fcmp:
121 case Intrinsic::experimental_constrained_fcmps: {
122 auto *CFP = cast<ConstrainedFPIntrinsic>(CI);
123 if (CFP->getExceptionBehavior() &&
124 CFP->getExceptionBehavior() == fp::ebStrict)
125 return false;
126 // Since we CSE across function calls we must not allow
127 // the rounding mode to change.
128 if (CFP->getRoundingMode() &&
129 CFP->getRoundingMode() == RoundingMode::Dynamic)
130 return false;
131 return true;
132 }
133 }
134 }
135 return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy() &&
136 // FIXME: Currently the calls which may access the thread id may
137 // be considered as not accessing the memory. But this is
138 // problematic for coroutines, since coroutines may resume in a
139 // different thread. So we disable the optimization here for the
140 // correctness. However, it may block many other correct
141 // optimizations. Revert this one when we detect the memory
142 // accessing kind more precisely.
143 !CI->getFunction()->isPresplitCoroutine();
144 }
145 return isa<CastInst>(Inst) || isa<UnaryOperator>(Inst) ||
146 isa<BinaryOperator>(Inst) || isa<GetElementPtrInst>(Inst) ||
147 isa<CmpInst>(Inst) || isa<SelectInst>(Inst) ||
148 isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
149 isa<ShuffleVectorInst>(Inst) || isa<ExtractValueInst>(Inst) ||
150 isa<InsertValueInst>(Inst) || isa<FreezeInst>(Inst);
151 }
152};
153
154} // end anonymous namespace
155
156namespace llvm {
157
158template <> struct DenseMapInfo<SimpleValue> {
159 static inline SimpleValue getEmptyKey() {
161 }
162
163 static inline SimpleValue getTombstoneKey() {
165 }
166
167 static unsigned getHashValue(SimpleValue Val);
168 static bool isEqual(SimpleValue LHS, SimpleValue RHS);
169};
170
171} // end namespace llvm
172
173/// Match a 'select' including an optional 'not's of the condition.
175 Value *&B,
176 SelectPatternFlavor &Flavor) {
177 // Return false if V is not even a select.
178 if (!match(V, m_Select(m_Value(Cond), m_Value(A), m_Value(B))))
179 return false;
180
181 // Look through a 'not' of the condition operand by swapping A/B.
182 Value *CondNot;
183 if (match(Cond, m_Not(m_Value(CondNot)))) {
184 Cond = CondNot;
185 std::swap(A, B);
186 }
187
188 // Match canonical forms of min/max. We are not using ValueTracking's
189 // more powerful matchSelectPattern() because it may rely on instruction flags
190 // such as "nsw". That would be incompatible with the current hashing
191 // mechanism that may remove flags to increase the likelihood of CSE.
192
193 Flavor = SPF_UNKNOWN;
195
196 if (!match(Cond, m_ICmp(Pred, m_Specific(A), m_Specific(B)))) {
197 // Check for commuted variants of min/max by swapping predicate.
198 // If we do not match the standard or commuted patterns, this is not a
199 // recognized form of min/max, but it is still a select, so return true.
200 if (!match(Cond, m_ICmp(Pred, m_Specific(B), m_Specific(A))))
201 return true;
202 Pred = ICmpInst::getSwappedPredicate(Pred);
203 }
204
205 switch (Pred) {
206 case CmpInst::ICMP_UGT: Flavor = SPF_UMAX; break;
207 case CmpInst::ICMP_ULT: Flavor = SPF_UMIN; break;
208 case CmpInst::ICMP_SGT: Flavor = SPF_SMAX; break;
209 case CmpInst::ICMP_SLT: Flavor = SPF_SMIN; break;
210 // Non-strict inequalities.
211 case CmpInst::ICMP_ULE: Flavor = SPF_UMIN; break;
212 case CmpInst::ICMP_UGE: Flavor = SPF_UMAX; break;
213 case CmpInst::ICMP_SLE: Flavor = SPF_SMIN; break;
214 case CmpInst::ICMP_SGE: Flavor = SPF_SMAX; break;
215 default: break;
216 }
217
218 return true;
219}
220
221static unsigned getHashValueImpl(SimpleValue Val) {
222 Instruction *Inst = Val.Inst;
223 // Hash in all of the operands as pointers.
224 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst)) {
225 Value *LHS = BinOp->getOperand(0);
226 Value *RHS = BinOp->getOperand(1);
227 if (BinOp->isCommutative() && BinOp->getOperand(0) > BinOp->getOperand(1))
228 std::swap(LHS, RHS);
229
230 return hash_combine(BinOp->getOpcode(), LHS, RHS);
231 }
232
233 if (CmpInst *CI = dyn_cast<CmpInst>(Inst)) {
234 // Compares can be commuted by swapping the comparands and
235 // updating the predicate. Choose the form that has the
236 // comparands in sorted order, or in the case of a tie, the
237 // one with the lower predicate.
238 Value *LHS = CI->getOperand(0);
239 Value *RHS = CI->getOperand(1);
240 CmpInst::Predicate Pred = CI->getPredicate();
241 CmpInst::Predicate SwappedPred = CI->getSwappedPredicate();
242 if (std::tie(LHS, Pred) > std::tie(RHS, SwappedPred)) {
243 std::swap(LHS, RHS);
244 Pred = SwappedPred;
245 }
246 return hash_combine(Inst->getOpcode(), Pred, LHS, RHS);
247 }
248
249 // Hash general selects to allow matching commuted true/false operands.
251 Value *Cond, *A, *B;
252 if (matchSelectWithOptionalNotCond(Inst, Cond, A, B, SPF)) {
253 // Hash min/max (cmp + select) to allow for commuted operands.
254 // Min/max may also have non-canonical compare predicate (eg, the compare for
255 // smin may use 'sgt' rather than 'slt'), and non-canonical operands in the
256 // compare.
257 // TODO: We should also detect FP min/max.
258 if (SPF == SPF_SMIN || SPF == SPF_SMAX ||
259 SPF == SPF_UMIN || SPF == SPF_UMAX) {
260 if (A > B)
261 std::swap(A, B);
262 return hash_combine(Inst->getOpcode(), SPF, A, B);
263 }
264
265 // Hash general selects to allow matching commuted true/false operands.
266
267 // If we do not have a compare as the condition, just hash in the condition.
269 Value *X, *Y;
270 if (!match(Cond, m_Cmp(Pred, m_Value(X), m_Value(Y))))
271 return hash_combine(Inst->getOpcode(), Cond, A, B);
272
273 // Similar to cmp normalization (above) - canonicalize the predicate value:
274 // select (icmp Pred, X, Y), A, B --> select (icmp InvPred, X, Y), B, A
275 if (CmpInst::getInversePredicate(Pred) < Pred) {
276 Pred = CmpInst::getInversePredicate(Pred);
277 std::swap(A, B);
278 }
279 return hash_combine(Inst->getOpcode(), Pred, X, Y, A, B);
280 }
281
282 if (CastInst *CI = dyn_cast<CastInst>(Inst))
283 return hash_combine(CI->getOpcode(), CI->getType(), CI->getOperand(0));
284
285 if (FreezeInst *FI = dyn_cast<FreezeInst>(Inst))
286 return hash_combine(FI->getOpcode(), FI->getOperand(0));
287
288 if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst))
289 return hash_combine(EVI->getOpcode(), EVI->getOperand(0),
290 hash_combine_range(EVI->idx_begin(), EVI->idx_end()));
291
292 if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst))
293 return hash_combine(IVI->getOpcode(), IVI->getOperand(0),
294 IVI->getOperand(1),
295 hash_combine_range(IVI->idx_begin(), IVI->idx_end()));
296
297 assert((isa<CallInst>(Inst) || isa<GetElementPtrInst>(Inst) ||
298 isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
299 isa<ShuffleVectorInst>(Inst) || isa<UnaryOperator>(Inst) ||
300 isa<FreezeInst>(Inst)) &&
301 "Invalid/unknown instruction");
302
303 // Handle intrinsics with commutative operands.
304 // TODO: Extend this to handle intrinsics with >2 operands where the 1st
305 // 2 operands are commutative.
306 auto *II = dyn_cast<IntrinsicInst>(Inst);
307 if (II && II->isCommutative() && II->arg_size() == 2) {
308 Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
309 if (LHS > RHS)
310 std::swap(LHS, RHS);
311 return hash_combine(II->getOpcode(), LHS, RHS);
312 }
313
314 // gc.relocate is 'special' call: its second and third operands are
315 // not real values, but indices into statepoint's argument list.
316 // Get values they point to.
317 if (const GCRelocateInst *GCR = dyn_cast<GCRelocateInst>(Inst))
318 return hash_combine(GCR->getOpcode(), GCR->getOperand(0),
319 GCR->getBasePtr(), GCR->getDerivedPtr());
320
321 // Don't CSE convergent calls in different basic blocks, because they
322 // implicitly depend on the set of threads that is currently executing.
323 if (CallInst *CI = dyn_cast<CallInst>(Inst); CI && CI->isConvergent()) {
324 return hash_combine(
325 Inst->getOpcode(), Inst->getParent(),
327 }
328
329 // Mix in the opcode.
330 return hash_combine(
331 Inst->getOpcode(),
333}
334
335unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
336#ifndef NDEBUG
337 // If -earlycse-debug-hash was specified, return a constant -- this
338 // will force all hashing to collide, so we'll exhaustively search
339 // the table for a match, and the assertion in isEqual will fire if
340 // there's a bug causing equal keys to hash differently.
342 return 0;
343#endif
344 return getHashValueImpl(Val);
345}
346
347static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS) {
348 Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
349
350 if (LHS.isSentinel() || RHS.isSentinel())
351 return LHSI == RHSI;
352
353 if (LHSI->getOpcode() != RHSI->getOpcode())
354 return false;
355 if (LHSI->isIdenticalToWhenDefined(RHSI)) {
356 // Convergent calls implicitly depend on the set of threads that is
357 // currently executing, so conservatively return false if they are in
358 // different basic blocks.
359 if (CallInst *CI = dyn_cast<CallInst>(LHSI);
360 CI && CI->isConvergent() && LHSI->getParent() != RHSI->getParent())
361 return false;
362
363 return true;
364 }
365
366 // If we're not strictly identical, we still might be a commutable instruction
367 if (BinaryOperator *LHSBinOp = dyn_cast<BinaryOperator>(LHSI)) {
368 if (!LHSBinOp->isCommutative())
369 return false;
370
371 assert(isa<BinaryOperator>(RHSI) &&
372 "same opcode, but different instruction type?");
373 BinaryOperator *RHSBinOp = cast<BinaryOperator>(RHSI);
374
375 // Commuted equality
376 return LHSBinOp->getOperand(0) == RHSBinOp->getOperand(1) &&
377 LHSBinOp->getOperand(1) == RHSBinOp->getOperand(0);
378 }
379 if (CmpInst *LHSCmp = dyn_cast<CmpInst>(LHSI)) {
380 assert(isa<CmpInst>(RHSI) &&
381 "same opcode, but different instruction type?");
382 CmpInst *RHSCmp = cast<CmpInst>(RHSI);
383 // Commuted equality
384 return LHSCmp->getOperand(0) == RHSCmp->getOperand(1) &&
385 LHSCmp->getOperand(1) == RHSCmp->getOperand(0) &&
386 LHSCmp->getSwappedPredicate() == RHSCmp->getPredicate();
387 }
388
389 // TODO: Extend this for >2 args by matching the trailing N-2 args.
390 auto *LII = dyn_cast<IntrinsicInst>(LHSI);
391 auto *RII = dyn_cast<IntrinsicInst>(RHSI);
392 if (LII && RII && LII->getIntrinsicID() == RII->getIntrinsicID() &&
393 LII->isCommutative() && LII->arg_size() == 2) {
394 return LII->getArgOperand(0) == RII->getArgOperand(1) &&
395 LII->getArgOperand(1) == RII->getArgOperand(0);
396 }
397
398 // See comment above in `getHashValue()`.
399 if (const GCRelocateInst *GCR1 = dyn_cast<GCRelocateInst>(LHSI))
400 if (const GCRelocateInst *GCR2 = dyn_cast<GCRelocateInst>(RHSI))
401 return GCR1->getOperand(0) == GCR2->getOperand(0) &&
402 GCR1->getBasePtr() == GCR2->getBasePtr() &&
403 GCR1->getDerivedPtr() == GCR2->getDerivedPtr();
404
405 // Min/max can occur with commuted operands, non-canonical predicates,
406 // and/or non-canonical operands.
407 // Selects can be non-trivially equivalent via inverted conditions and swaps.
408 SelectPatternFlavor LSPF, RSPF;
409 Value *CondL, *CondR, *LHSA, *RHSA, *LHSB, *RHSB;
410 if (matchSelectWithOptionalNotCond(LHSI, CondL, LHSA, LHSB, LSPF) &&
411 matchSelectWithOptionalNotCond(RHSI, CondR, RHSA, RHSB, RSPF)) {
412 if (LSPF == RSPF) {
413 // TODO: We should also detect FP min/max.
414 if (LSPF == SPF_SMIN || LSPF == SPF_SMAX ||
415 LSPF == SPF_UMIN || LSPF == SPF_UMAX)
416 return ((LHSA == RHSA && LHSB == RHSB) ||
417 (LHSA == RHSB && LHSB == RHSA));
418
419 // select Cond, A, B <--> select not(Cond), B, A
420 if (CondL == CondR && LHSA == RHSA && LHSB == RHSB)
421 return true;
422 }
423
424 // If the true/false operands are swapped and the conditions are compares
425 // with inverted predicates, the selects are equal:
426 // select (icmp Pred, X, Y), A, B <--> select (icmp InvPred, X, Y), B, A
427 //
428 // This also handles patterns with a double-negation in the sense of not +
429 // inverse, because we looked through a 'not' in the matching function and
430 // swapped A/B:
431 // select (cmp Pred, X, Y), A, B <--> select (not (cmp InvPred, X, Y)), B, A
432 //
433 // This intentionally does NOT handle patterns with a double-negation in
434 // the sense of not + not, because doing so could result in values
435 // comparing
436 // as equal that hash differently in the min/max cases like:
437 // select (cmp slt, X, Y), X, Y <--> select (not (not (cmp slt, X, Y))), X, Y
438 // ^ hashes as min ^ would not hash as min
439 // In the context of the EarlyCSE pass, however, such cases never reach
440 // this code, as we simplify the double-negation before hashing the second
441 // select (and so still succeed at CSEing them).
442 if (LHSA == RHSB && LHSB == RHSA) {
443 CmpInst::Predicate PredL, PredR;
444 Value *X, *Y;
445 if (match(CondL, m_Cmp(PredL, m_Value(X), m_Value(Y))) &&
446 match(CondR, m_Cmp(PredR, m_Specific(X), m_Specific(Y))) &&
447 CmpInst::getInversePredicate(PredL) == PredR)
448 return true;
449 }
450 }
451
452 return false;
453}
454
455bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
456 // These comparisons are nontrivial, so assert that equality implies
457 // hash equality (DenseMap demands this as an invariant).
458 bool Result = isEqualImpl(LHS, RHS);
459 assert(!Result || (LHS.isSentinel() && LHS.Inst == RHS.Inst) ||
461 return Result;
462}
463
464//===----------------------------------------------------------------------===//
465// CallValue
466//===----------------------------------------------------------------------===//
467
468namespace {
469
470/// Struct representing the available call values in the scoped hash
471/// table.
472struct CallValue {
473 Instruction *Inst;
474
475 CallValue(Instruction *I) : Inst(I) {
476 assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
477 }
478
479 bool isSentinel() const {
482 }
483
484 static bool canHandle(Instruction *Inst) {
485 // Don't value number anything that returns void.
486 if (Inst->getType()->isVoidTy())
487 return false;
488
489 CallInst *CI = dyn_cast<CallInst>(Inst);
490 if (!CI || !CI->onlyReadsMemory() ||
491 // FIXME: Currently the calls which may access the thread id may
492 // be considered as not accessing the memory. But this is
493 // problematic for coroutines, since coroutines may resume in a
494 // different thread. So we disable the optimization here for the
495 // correctness. However, it may block many other correct
496 // optimizations. Revert this one when we detect the memory
497 // accessing kind more precisely.
499 return false;
500 return true;
501 }
502};
503
504} // end anonymous namespace
505
506namespace llvm {
507
508template <> struct DenseMapInfo<CallValue> {
509 static inline CallValue getEmptyKey() {
511 }
512
513 static inline CallValue getTombstoneKey() {
515 }
516
517 static unsigned getHashValue(CallValue Val);
518 static bool isEqual(CallValue LHS, CallValue RHS);
519};
520
521} // end namespace llvm
522
523unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
524 Instruction *Inst = Val.Inst;
525
526 // Hash all of the operands as pointers and mix in the opcode.
527 return hash_combine(
528 Inst->getOpcode(),
530}
531
532bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
533 Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
534 if (LHS.isSentinel() || RHS.isSentinel())
535 return LHSI == RHSI;
536
537 return LHSI->isIdenticalTo(RHSI);
538}
539
540//===----------------------------------------------------------------------===//
541// EarlyCSE implementation
542//===----------------------------------------------------------------------===//
543
544namespace {
545
546/// A simple and fast domtree-based CSE pass.
547///
548/// This pass does a simple depth-first walk over the dominator tree,
549/// eliminating trivially redundant instructions and using instsimplify to
550/// canonicalize things as it goes. It is intended to be fast and catch obvious
551/// cases so that instcombine and other passes are more effective. It is
552/// expected that a later pass of GVN will catch the interesting/hard cases.
553class EarlyCSE {
554public:
555 const TargetLibraryInfo &TLI;
557 DominatorTree &DT;
558 AssumptionCache &AC;
559 const SimplifyQuery SQ;
560 MemorySSA *MSSA;
561 std::unique_ptr<MemorySSAUpdater> MSSAUpdater;
562
563 using AllocatorTy =
566 using ScopedHTType =
568 AllocatorTy>;
569
570 /// A scoped hash table of the current values of all of our simple
571 /// scalar expressions.
572 ///
573 /// As we walk down the domtree, we look to see if instructions are in this:
574 /// if so, we replace them with what we find, otherwise we insert them so
575 /// that dominated values can succeed in their lookup.
576 ScopedHTType AvailableValues;
577
578 /// A scoped hash table of the current values of previously encountered
579 /// memory locations.
580 ///
581 /// This allows us to get efficient access to dominating loads or stores when
582 /// we have a fully redundant load. In addition to the most recent load, we
583 /// keep track of a generation count of the read, which is compared against
584 /// the current generation count. The current generation count is incremented
585 /// after every possibly writing memory operation, which ensures that we only
586 /// CSE loads with other loads that have no intervening store. Ordering
587 /// events (such as fences or atomic instructions) increment the generation
588 /// count as well; essentially, we model these as writes to all possible
589 /// locations. Note that atomic and/or volatile loads and stores can be
590 /// present the table; it is the responsibility of the consumer to inspect
591 /// the atomicity/volatility if needed.
592 struct LoadValue {
593 Instruction *DefInst = nullptr;
594 unsigned Generation = 0;
595 int MatchingId = -1;
596 bool IsAtomic = false;
597 bool IsLoad = false;
598
599 LoadValue() = default;
600 LoadValue(Instruction *Inst, unsigned Generation, unsigned MatchingId,
601 bool IsAtomic, bool IsLoad)
602 : DefInst(Inst), Generation(Generation), MatchingId(MatchingId),
603 IsAtomic(IsAtomic), IsLoad(IsLoad) {}
604 };
605
606 using LoadMapAllocator =
609 using LoadHTType =
611 LoadMapAllocator>;
612
613 LoadHTType AvailableLoads;
614
615 // A scoped hash table mapping memory locations (represented as typed
616 // addresses) to generation numbers at which that memory location became
617 // (henceforth indefinitely) invariant.
618 using InvariantMapAllocator =
621 using InvariantHTType =
623 InvariantMapAllocator>;
624 InvariantHTType AvailableInvariants;
625
626 /// A scoped hash table of the current values of read-only call
627 /// values.
628 ///
629 /// It uses the same generation count as loads.
630 using CallHTType =
632 CallHTType AvailableCalls;
633
634 /// This is the current generation of the memory value.
635 unsigned CurrentGeneration = 0;
636
637 /// Set up the EarlyCSE runner for a particular function.
638 EarlyCSE(const DataLayout &DL, const TargetLibraryInfo &TLI,
640 AssumptionCache &AC, MemorySSA *MSSA)
641 : TLI(TLI), TTI(TTI), DT(DT), AC(AC), SQ(DL, &TLI, &DT, &AC), MSSA(MSSA),
642 MSSAUpdater(std::make_unique<MemorySSAUpdater>(MSSA)) {}
643
644 bool run();
645
646private:
647 unsigned ClobberCounter = 0;
648 // Almost a POD, but needs to call the constructors for the scoped hash
649 // tables so that a new scope gets pushed on. These are RAII so that the
650 // scope gets popped when the NodeScope is destroyed.
651 class NodeScope {
652 public:
653 NodeScope(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
654 InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls)
655 : Scope(AvailableValues), LoadScope(AvailableLoads),
656 InvariantScope(AvailableInvariants), CallScope(AvailableCalls) {}
657 NodeScope(const NodeScope &) = delete;
658 NodeScope &operator=(const NodeScope &) = delete;
659
660 private:
662 LoadHTType::ScopeTy LoadScope;
663 InvariantHTType::ScopeTy InvariantScope;
664 CallHTType::ScopeTy CallScope;
665 };
666
667 // Contains all the needed information to create a stack for doing a depth
668 // first traversal of the tree. This includes scopes for values, loads, and
669 // calls as well as the generation. There is a child iterator so that the
670 // children do not need to be store separately.
671 class StackNode {
672 public:
673 StackNode(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
674 InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
675 unsigned cg, DomTreeNode *n, DomTreeNode::const_iterator child,
677 : CurrentGeneration(cg), ChildGeneration(cg), Node(n), ChildIter(child),
678 EndIter(end),
679 Scopes(AvailableValues, AvailableLoads, AvailableInvariants,
680 AvailableCalls)
681 {}
682 StackNode(const StackNode &) = delete;
683 StackNode &operator=(const StackNode &) = delete;
684
685 // Accessors.
686 unsigned currentGeneration() const { return CurrentGeneration; }
687 unsigned childGeneration() const { return ChildGeneration; }
688 void childGeneration(unsigned generation) { ChildGeneration = generation; }
689 DomTreeNode *node() { return Node; }
690 DomTreeNode::const_iterator childIter() const { return ChildIter; }
691
692 DomTreeNode *nextChild() {
693 DomTreeNode *child = *ChildIter;
694 ++ChildIter;
695 return child;
696 }
697
698 DomTreeNode::const_iterator end() const { return EndIter; }
699 bool isProcessed() const { return Processed; }
700 void process() { Processed = true; }
701
702 private:
703 unsigned CurrentGeneration;
704 unsigned ChildGeneration;
708 NodeScope Scopes;
709 bool Processed = false;
710 };
711
712 /// Wrapper class to handle memory instructions, including loads,
713 /// stores and intrinsic loads and stores defined by the target.
714 class ParseMemoryInst {
715 public:
716 ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI)
717 : Inst(Inst) {
718 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
719 IntrID = II->getIntrinsicID();
720 if (TTI.getTgtMemIntrinsic(II, Info))
721 return;
722 if (isHandledNonTargetIntrinsic(IntrID)) {
723 switch (IntrID) {
724 case Intrinsic::masked_load:
725 Info.PtrVal = Inst->getOperand(0);
726 Info.MatchingId = Intrinsic::masked_load;
727 Info.ReadMem = true;
728 Info.WriteMem = false;
729 Info.IsVolatile = false;
730 break;
731 case Intrinsic::masked_store:
732 Info.PtrVal = Inst->getOperand(1);
733 // Use the ID of masked load as the "matching id". This will
734 // prevent matching non-masked loads/stores with masked ones
735 // (which could be done), but at the moment, the code here
736 // does not support matching intrinsics with non-intrinsics,
737 // so keep the MatchingIds specific to masked instructions
738 // for now (TODO).
739 Info.MatchingId = Intrinsic::masked_load;
740 Info.ReadMem = false;
741 Info.WriteMem = true;
742 Info.IsVolatile = false;
743 break;
744 }
745 }
746 }
747 }
748
749 Instruction *get() { return Inst; }
750 const Instruction *get() const { return Inst; }
751
752 bool isLoad() const {
753 if (IntrID != 0)
754 return Info.ReadMem;
755 return isa<LoadInst>(Inst);
756 }
757
758 bool isStore() const {
759 if (IntrID != 0)
760 return Info.WriteMem;
761 return isa<StoreInst>(Inst);
762 }
763
764 bool isAtomic() const {
765 if (IntrID != 0)
766 return Info.Ordering != AtomicOrdering::NotAtomic;
767 return Inst->isAtomic();
768 }
769
770 bool isUnordered() const {
771 if (IntrID != 0)
772 return Info.isUnordered();
773
774 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
775 return LI->isUnordered();
776 } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
777 return SI->isUnordered();
778 }
779 // Conservative answer
780 return !Inst->isAtomic();
781 }
782
783 bool isVolatile() const {
784 if (IntrID != 0)
785 return Info.IsVolatile;
786
787 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
788 return LI->isVolatile();
789 } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
790 return SI->isVolatile();
791 }
792 // Conservative answer
793 return true;
794 }
795
796 bool isInvariantLoad() const {
797 if (auto *LI = dyn_cast<LoadInst>(Inst))
798 return LI->hasMetadata(LLVMContext::MD_invariant_load);
799 return false;
800 }
801
802 bool isValid() const { return getPointerOperand() != nullptr; }
803
804 // For regular (non-intrinsic) loads/stores, this is set to -1. For
805 // intrinsic loads/stores, the id is retrieved from the corresponding
806 // field in the MemIntrinsicInfo structure. That field contains
807 // non-negative values only.
808 int getMatchingId() const {
809 if (IntrID != 0)
810 return Info.MatchingId;
811 return -1;
812 }
813
814 Value *getPointerOperand() const {
815 if (IntrID != 0)
816 return Info.PtrVal;
817 return getLoadStorePointerOperand(Inst);
818 }
819
820 Type *getValueType() const {
821 // TODO: handle target-specific intrinsics.
822 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
823 switch (II->getIntrinsicID()) {
824 case Intrinsic::masked_load:
825 return II->getType();
826 case Intrinsic::masked_store:
827 return II->getArgOperand(0)->getType();
828 default:
829 return nullptr;
830 }
831 }
832 return getLoadStoreType(Inst);
833 }
834
835 bool mayReadFromMemory() const {
836 if (IntrID != 0)
837 return Info.ReadMem;
838 return Inst->mayReadFromMemory();
839 }
840
841 bool mayWriteToMemory() const {
842 if (IntrID != 0)
843 return Info.WriteMem;
844 return Inst->mayWriteToMemory();
845 }
846
847 private:
848 Intrinsic::ID IntrID = 0;
850 Instruction *Inst;
851 };
852
853 // This function is to prevent accidentally passing a non-target
854 // intrinsic ID to TargetTransformInfo.
855 static bool isHandledNonTargetIntrinsic(Intrinsic::ID ID) {
856 switch (ID) {
857 case Intrinsic::masked_load:
858 case Intrinsic::masked_store:
859 return true;
860 }
861 return false;
862 }
863 static bool isHandledNonTargetIntrinsic(const Value *V) {
864 if (auto *II = dyn_cast<IntrinsicInst>(V))
865 return isHandledNonTargetIntrinsic(II->getIntrinsicID());
866 return false;
867 }
868
869 bool processNode(DomTreeNode *Node);
870
871 bool handleBranchCondition(Instruction *CondInst, const BranchInst *BI,
872 const BasicBlock *BB, const BasicBlock *Pred);
873
874 Value *getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,
875 unsigned CurrentGeneration);
876
877 bool overridingStores(const ParseMemoryInst &Earlier,
878 const ParseMemoryInst &Later);
879
880 Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const {
881 // TODO: We could insert relevant casts on type mismatch here.
882 if (auto *LI = dyn_cast<LoadInst>(Inst))
883 return LI->getType() == ExpectedType ? LI : nullptr;
884 if (auto *SI = dyn_cast<StoreInst>(Inst)) {
885 Value *V = SI->getValueOperand();
886 return V->getType() == ExpectedType ? V : nullptr;
887 }
888 assert(isa<IntrinsicInst>(Inst) && "Instruction not supported");
889 auto *II = cast<IntrinsicInst>(Inst);
890 if (isHandledNonTargetIntrinsic(II->getIntrinsicID()))
891 return getOrCreateResultNonTargetMemIntrinsic(II, ExpectedType);
892 return TTI.getOrCreateResultFromMemIntrinsic(II, ExpectedType);
893 }
894
895 Value *getOrCreateResultNonTargetMemIntrinsic(IntrinsicInst *II,
896 Type *ExpectedType) const {
897 // TODO: We could insert relevant casts on type mismatch here.
898 switch (II->getIntrinsicID()) {
899 case Intrinsic::masked_load:
900 return II->getType() == ExpectedType ? II : nullptr;
901 case Intrinsic::masked_store: {
902 Value *V = II->getOperand(0);
903 return V->getType() == ExpectedType ? V : nullptr;
904 }
905 }
906 return nullptr;
907 }
908
909 /// Return true if the instruction is known to only operate on memory
910 /// provably invariant in the given "generation".
911 bool isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt);
912
913 bool isSameMemGeneration(unsigned EarlierGeneration, unsigned LaterGeneration,
914 Instruction *EarlierInst, Instruction *LaterInst);
915
916 bool isNonTargetIntrinsicMatch(const IntrinsicInst *Earlier,
917 const IntrinsicInst *Later) {
918 auto IsSubmask = [](const Value *Mask0, const Value *Mask1) {
919 // Is Mask0 a submask of Mask1?
920 if (Mask0 == Mask1)
921 return true;
922 if (isa<UndefValue>(Mask0) || isa<UndefValue>(Mask1))
923 return false;
924 auto *Vec0 = dyn_cast<ConstantVector>(Mask0);
925 auto *Vec1 = dyn_cast<ConstantVector>(Mask1);
926 if (!Vec0 || !Vec1)
927 return false;
928 if (Vec0->getType() != Vec1->getType())
929 return false;
930 for (int i = 0, e = Vec0->getNumOperands(); i != e; ++i) {
931 Constant *Elem0 = Vec0->getOperand(i);
932 Constant *Elem1 = Vec1->getOperand(i);
933 auto *Int0 = dyn_cast<ConstantInt>(Elem0);
934 if (Int0 && Int0->isZero())
935 continue;
936 auto *Int1 = dyn_cast<ConstantInt>(Elem1);
937 if (Int1 && !Int1->isZero())
938 continue;
939 if (isa<UndefValue>(Elem0) || isa<UndefValue>(Elem1))
940 return false;
941 if (Elem0 == Elem1)
942 continue;
943 return false;
944 }
945 return true;
946 };
947 auto PtrOp = [](const IntrinsicInst *II) {
948 if (II->getIntrinsicID() == Intrinsic::masked_load)
949 return II->getOperand(0);
950 if (II->getIntrinsicID() == Intrinsic::masked_store)
951 return II->getOperand(1);
952 llvm_unreachable("Unexpected IntrinsicInst");
953 };
954 auto MaskOp = [](const IntrinsicInst *II) {
955 if (II->getIntrinsicID() == Intrinsic::masked_load)
956 return II->getOperand(2);
957 if (II->getIntrinsicID() == Intrinsic::masked_store)
958 return II->getOperand(3);
959 llvm_unreachable("Unexpected IntrinsicInst");
960 };
961 auto ThruOp = [](const IntrinsicInst *II) {
962 if (II->getIntrinsicID() == Intrinsic::masked_load)
963 return II->getOperand(3);
964 llvm_unreachable("Unexpected IntrinsicInst");
965 };
966
967 if (PtrOp(Earlier) != PtrOp(Later))
968 return false;
969
970 Intrinsic::ID IDE = Earlier->getIntrinsicID();
971 Intrinsic::ID IDL = Later->getIntrinsicID();
972 // We could really use specific intrinsic classes for masked loads
973 // and stores in IntrinsicInst.h.
974 if (IDE == Intrinsic::masked_load && IDL == Intrinsic::masked_load) {
975 // Trying to replace later masked load with the earlier one.
976 // Check that the pointers are the same, and
977 // - masks and pass-throughs are the same, or
978 // - replacee's pass-through is "undef" and replacer's mask is a
979 // super-set of the replacee's mask.
980 if (MaskOp(Earlier) == MaskOp(Later) && ThruOp(Earlier) == ThruOp(Later))
981 return true;
982 if (!isa<UndefValue>(ThruOp(Later)))
983 return false;
984 return IsSubmask(MaskOp(Later), MaskOp(Earlier));
985 }
986 if (IDE == Intrinsic::masked_store && IDL == Intrinsic::masked_load) {
987 // Trying to replace a load of a stored value with the store's value.
988 // Check that the pointers are the same, and
989 // - load's mask is a subset of store's mask, and
990 // - load's pass-through is "undef".
991 if (!IsSubmask(MaskOp(Later), MaskOp(Earlier)))
992 return false;
993 return isa<UndefValue>(ThruOp(Later));
994 }
995 if (IDE == Intrinsic::masked_load && IDL == Intrinsic::masked_store) {
996 // Trying to remove a store of the loaded value.
997 // Check that the pointers are the same, and
998 // - store's mask is a subset of the load's mask.
999 return IsSubmask(MaskOp(Later), MaskOp(Earlier));
1000 }
1001 if (IDE == Intrinsic::masked_store && IDL == Intrinsic::masked_store) {
1002 // Trying to remove a dead store (earlier).
1003 // Check that the pointers are the same,
1004 // - the to-be-removed store's mask is a subset of the other store's
1005 // mask.
1006 return IsSubmask(MaskOp(Earlier), MaskOp(Later));
1007 }
1008 return false;
1009 }
1010
1011 void removeMSSA(Instruction &Inst) {
1012 if (!MSSA)
1013 return;
1014 if (VerifyMemorySSA)
1015 MSSA->verifyMemorySSA();
1016 // Removing a store here can leave MemorySSA in an unoptimized state by
1017 // creating MemoryPhis that have identical arguments and by creating
1018 // MemoryUses whose defining access is not an actual clobber. The phi case
1019 // is handled by MemorySSA when passing OptimizePhis = true to
1020 // removeMemoryAccess. The non-optimized MemoryUse case is lazily updated
1021 // by MemorySSA's getClobberingMemoryAccess.
1022 MSSAUpdater->removeMemoryAccess(&Inst, true);
1023 }
1024};
1025
1026} // end anonymous namespace
1027
1028/// Determine if the memory referenced by LaterInst is from the same heap
1029/// version as EarlierInst.
1030/// This is currently called in two scenarios:
1031///
1032/// load p
1033/// ...
1034/// load p
1035///
1036/// and
1037///
1038/// x = load p
1039/// ...
1040/// store x, p
1041///
1042/// in both cases we want to verify that there are no possible writes to the
1043/// memory referenced by p between the earlier and later instruction.
1044bool EarlyCSE::isSameMemGeneration(unsigned EarlierGeneration,
1045 unsigned LaterGeneration,
1046 Instruction *EarlierInst,
1047 Instruction *LaterInst) {
1048 // Check the simple memory generation tracking first.
1049 if (EarlierGeneration == LaterGeneration)
1050 return true;
1051
1052 if (!MSSA)
1053 return false;
1054
1055 // If MemorySSA has determined that one of EarlierInst or LaterInst does not
1056 // read/write memory, then we can safely return true here.
1057 // FIXME: We could be more aggressive when checking doesNotAccessMemory(),
1058 // onlyReadsMemory(), mayReadFromMemory(), and mayWriteToMemory() in this pass
1059 // by also checking the MemorySSA MemoryAccess on the instruction. Initial
1060 // experiments suggest this isn't worthwhile, at least for C/C++ code compiled
1061 // with the default optimization pipeline.
1062 auto *EarlierMA = MSSA->getMemoryAccess(EarlierInst);
1063 if (!EarlierMA)
1064 return true;
1065 auto *LaterMA = MSSA->getMemoryAccess(LaterInst);
1066 if (!LaterMA)
1067 return true;
1068
1069 // Since we know LaterDef dominates LaterInst and EarlierInst dominates
1070 // LaterInst, if LaterDef dominates EarlierInst then it can't occur between
1071 // EarlierInst and LaterInst and neither can any other write that potentially
1072 // clobbers LaterInst.
1073 MemoryAccess *LaterDef;
1074 if (ClobberCounter < EarlyCSEMssaOptCap) {
1075 LaterDef = MSSA->getWalker()->getClobberingMemoryAccess(LaterInst);
1076 ClobberCounter++;
1077 } else
1078 LaterDef = LaterMA->getDefiningAccess();
1079
1080 return MSSA->dominates(LaterDef, EarlierMA);
1081}
1082
1083bool EarlyCSE::isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt) {
1084 // A location loaded from with an invariant_load is assumed to *never* change
1085 // within the visible scope of the compilation.
1086 if (auto *LI = dyn_cast<LoadInst>(I))
1087 if (LI->hasMetadata(LLVMContext::MD_invariant_load))
1088 return true;
1089
1090 auto MemLocOpt = MemoryLocation::getOrNone(I);
1091 if (!MemLocOpt)
1092 // "target" intrinsic forms of loads aren't currently known to
1093 // MemoryLocation::get. TODO
1094 return false;
1095 MemoryLocation MemLoc = *MemLocOpt;
1096 if (!AvailableInvariants.count(MemLoc))
1097 return false;
1098
1099 // Is the generation at which this became invariant older than the
1100 // current one?
1101 return AvailableInvariants.lookup(MemLoc) <= GenAt;
1102}
1103
1104bool EarlyCSE::handleBranchCondition(Instruction *CondInst,
1105 const BranchInst *BI, const BasicBlock *BB,
1106 const BasicBlock *Pred) {
1107 assert(BI->isConditional() && "Should be a conditional branch!");
1108 assert(BI->getCondition() == CondInst && "Wrong condition?");
1109 assert(BI->getSuccessor(0) == BB || BI->getSuccessor(1) == BB);
1110 auto *TorF = (BI->getSuccessor(0) == BB)
1113 auto MatchBinOp = [](Instruction *I, unsigned Opcode, Value *&LHS,
1114 Value *&RHS) {
1115 if (Opcode == Instruction::And &&
1116 match(I, m_LogicalAnd(m_Value(LHS), m_Value(RHS))))
1117 return true;
1118 else if (Opcode == Instruction::Or &&
1119 match(I, m_LogicalOr(m_Value(LHS), m_Value(RHS))))
1120 return true;
1121 return false;
1122 };
1123 // If the condition is AND operation, we can propagate its operands into the
1124 // true branch. If it is OR operation, we can propagate them into the false
1125 // branch.
1126 unsigned PropagateOpcode =
1127 (BI->getSuccessor(0) == BB) ? Instruction::And : Instruction::Or;
1128
1129 bool MadeChanges = false;
1132 WorkList.push_back(CondInst);
1133 while (!WorkList.empty()) {
1134 Instruction *Curr = WorkList.pop_back_val();
1135
1136 AvailableValues.insert(Curr, TorF);
1137 LLVM_DEBUG(dbgs() << "EarlyCSE CVP: Add conditional value for '"
1138 << Curr->getName() << "' as " << *TorF << " in "
1139 << BB->getName() << "\n");
1140 if (!DebugCounter::shouldExecute(CSECounter)) {
1141 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1142 } else {
1143 // Replace all dominated uses with the known value.
1144 if (unsigned Count = replaceDominatedUsesWith(Curr, TorF, DT,
1145 BasicBlockEdge(Pred, BB))) {
1146 NumCSECVP += Count;
1147 MadeChanges = true;
1148 }
1149 }
1150
1151 Value *LHS, *RHS;
1152 if (MatchBinOp(Curr, PropagateOpcode, LHS, RHS))
1153 for (auto *Op : { LHS, RHS })
1154 if (Instruction *OPI = dyn_cast<Instruction>(Op))
1155 if (SimpleValue::canHandle(OPI) && Visited.insert(OPI).second)
1156 WorkList.push_back(OPI);
1157 }
1158
1159 return MadeChanges;
1160}
1161
1162Value *EarlyCSE::getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,
1163 unsigned CurrentGeneration) {
1164 if (InVal.DefInst == nullptr)
1165 return nullptr;
1166 if (InVal.MatchingId != MemInst.getMatchingId())
1167 return nullptr;
1168 // We don't yet handle removing loads with ordering of any kind.
1169 if (MemInst.isVolatile() || !MemInst.isUnordered())
1170 return nullptr;
1171 // We can't replace an atomic load with one which isn't also atomic.
1172 if (MemInst.isLoad() && !InVal.IsAtomic && MemInst.isAtomic())
1173 return nullptr;
1174 // The value V returned from this function is used differently depending
1175 // on whether MemInst is a load or a store. If it's a load, we will replace
1176 // MemInst with V, if it's a store, we will check if V is the same as the
1177 // available value.
1178 bool MemInstMatching = !MemInst.isLoad();
1179 Instruction *Matching = MemInstMatching ? MemInst.get() : InVal.DefInst;
1180 Instruction *Other = MemInstMatching ? InVal.DefInst : MemInst.get();
1181
1182 // For stores check the result values before checking memory generation
1183 // (otherwise isSameMemGeneration may crash).
1184 Value *Result = MemInst.isStore()
1185 ? getOrCreateResult(Matching, Other->getType())
1186 : nullptr;
1187 if (MemInst.isStore() && InVal.DefInst != Result)
1188 return nullptr;
1189
1190 // Deal with non-target memory intrinsics.
1191 bool MatchingNTI = isHandledNonTargetIntrinsic(Matching);
1192 bool OtherNTI = isHandledNonTargetIntrinsic(Other);
1193 if (OtherNTI != MatchingNTI)
1194 return nullptr;
1195 if (OtherNTI && MatchingNTI) {
1196 if (!isNonTargetIntrinsicMatch(cast<IntrinsicInst>(InVal.DefInst),
1197 cast<IntrinsicInst>(MemInst.get())))
1198 return nullptr;
1199 }
1200
1201 if (!isOperatingOnInvariantMemAt(MemInst.get(), InVal.Generation) &&
1202 !isSameMemGeneration(InVal.Generation, CurrentGeneration, InVal.DefInst,
1203 MemInst.get()))
1204 return nullptr;
1205
1206 if (!Result)
1207 Result = getOrCreateResult(Matching, Other->getType());
1208 return Result;
1209}
1210
1211bool EarlyCSE::overridingStores(const ParseMemoryInst &Earlier,
1212 const ParseMemoryInst &Later) {
1213 // Can we remove Earlier store because of Later store?
1214
1215 assert(Earlier.isUnordered() && !Earlier.isVolatile() &&
1216 "Violated invariant");
1217 if (Earlier.getPointerOperand() != Later.getPointerOperand())
1218 return false;
1219 if (!Earlier.getValueType() || !Later.getValueType() ||
1220 Earlier.getValueType() != Later.getValueType())
1221 return false;
1222 if (Earlier.getMatchingId() != Later.getMatchingId())
1223 return false;
1224 // At the moment, we don't remove ordered stores, but do remove
1225 // unordered atomic stores. There's no special requirement (for
1226 // unordered atomics) about removing atomic stores only in favor of
1227 // other atomic stores since we were going to execute the non-atomic
1228 // one anyway and the atomic one might never have become visible.
1229 if (!Earlier.isUnordered() || !Later.isUnordered())
1230 return false;
1231
1232 // Deal with non-target memory intrinsics.
1233 bool ENTI = isHandledNonTargetIntrinsic(Earlier.get());
1234 bool LNTI = isHandledNonTargetIntrinsic(Later.get());
1235 if (ENTI && LNTI)
1236 return isNonTargetIntrinsicMatch(cast<IntrinsicInst>(Earlier.get()),
1237 cast<IntrinsicInst>(Later.get()));
1238
1239 // Because of the check above, at least one of them is false.
1240 // For now disallow matching intrinsics with non-intrinsics,
1241 // so assume that the stores match if neither is an intrinsic.
1242 return ENTI == LNTI;
1243}
1244
1245bool EarlyCSE::processNode(DomTreeNode *Node) {
1246 bool Changed = false;
1247 BasicBlock *BB = Node->getBlock();
1248
1249 // If this block has a single predecessor, then the predecessor is the parent
1250 // of the domtree node and all of the live out memory values are still current
1251 // in this block. If this block has multiple predecessors, then they could
1252 // have invalidated the live-out memory values of our parent value. For now,
1253 // just be conservative and invalidate memory if this block has multiple
1254 // predecessors.
1255 if (!BB->getSinglePredecessor())
1256 ++CurrentGeneration;
1257
1258 // If this node has a single predecessor which ends in a conditional branch,
1259 // we can infer the value of the branch condition given that we took this
1260 // path. We need the single predecessor to ensure there's not another path
1261 // which reaches this block where the condition might hold a different
1262 // value. Since we're adding this to the scoped hash table (like any other
1263 // def), it will have been popped if we encounter a future merge block.
1264 if (BasicBlock *Pred = BB->getSinglePredecessor()) {
1265 auto *BI = dyn_cast<BranchInst>(Pred->getTerminator());
1266 if (BI && BI->isConditional()) {
1267 auto *CondInst = dyn_cast<Instruction>(BI->getCondition());
1268 if (CondInst && SimpleValue::canHandle(CondInst))
1269 Changed |= handleBranchCondition(CondInst, BI, BB, Pred);
1270 }
1271 }
1272
1273 /// LastStore - Keep track of the last non-volatile store that we saw... for
1274 /// as long as there in no instruction that reads memory. If we see a store
1275 /// to the same location, we delete the dead store. This zaps trivial dead
1276 /// stores which can occur in bitfield code among other things.
1277 Instruction *LastStore = nullptr;
1278
1279 // See if any instructions in the block can be eliminated. If so, do it. If
1280 // not, add them to AvailableValues.
1281 for (Instruction &Inst : make_early_inc_range(*BB)) {
1282 // Dead instructions should just be removed.
1283 if (isInstructionTriviallyDead(&Inst, &TLI)) {
1284 LLVM_DEBUG(dbgs() << "EarlyCSE DCE: " << Inst << '\n');
1285 if (!DebugCounter::shouldExecute(CSECounter)) {
1286 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1287 continue;
1288 }
1289
1290 salvageKnowledge(&Inst, &AC);
1291 salvageDebugInfo(Inst);
1292 removeMSSA(Inst);
1293 Inst.eraseFromParent();
1294 Changed = true;
1295 ++NumSimplify;
1296 continue;
1297 }
1298
1299 // Skip assume intrinsics, they don't really have side effects (although
1300 // they're marked as such to ensure preservation of control dependencies),
1301 // and this pass will not bother with its removal. However, we should mark
1302 // its condition as true for all dominated blocks.
1303 if (auto *Assume = dyn_cast<AssumeInst>(&Inst)) {
1304 auto *CondI = dyn_cast<Instruction>(Assume->getArgOperand(0));
1305 if (CondI && SimpleValue::canHandle(CondI)) {
1306 LLVM_DEBUG(dbgs() << "EarlyCSE considering assumption: " << Inst
1307 << '\n');
1308 AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
1309 } else
1310 LLVM_DEBUG(dbgs() << "EarlyCSE skipping assumption: " << Inst << '\n');
1311 continue;
1312 }
1313
1314 // Likewise, noalias intrinsics don't actually write.
1315 if (match(&Inst,
1316 m_Intrinsic<Intrinsic::experimental_noalias_scope_decl>())) {
1317 LLVM_DEBUG(dbgs() << "EarlyCSE skipping noalias intrinsic: " << Inst
1318 << '\n');
1319 continue;
1320 }
1321
1322 // Skip sideeffect intrinsics, for the same reason as assume intrinsics.
1323 if (match(&Inst, m_Intrinsic<Intrinsic::sideeffect>())) {
1324 LLVM_DEBUG(dbgs() << "EarlyCSE skipping sideeffect: " << Inst << '\n');
1325 continue;
1326 }
1327
1328 // Skip pseudoprobe intrinsics, for the same reason as assume intrinsics.
1329 if (match(&Inst, m_Intrinsic<Intrinsic::pseudoprobe>())) {
1330 LLVM_DEBUG(dbgs() << "EarlyCSE skipping pseudoprobe: " << Inst << '\n');
1331 continue;
1332 }
1333
1334 // We can skip all invariant.start intrinsics since they only read memory,
1335 // and we can forward values across it. For invariant starts without
1336 // invariant ends, we can use the fact that the invariantness never ends to
1337 // start a scope in the current generaton which is true for all future
1338 // generations. Also, we dont need to consume the last store since the
1339 // semantics of invariant.start allow us to perform DSE of the last
1340 // store, if there was a store following invariant.start. Consider:
1341 //
1342 // store 30, i8* p
1343 // invariant.start(p)
1344 // store 40, i8* p
1345 // We can DSE the store to 30, since the store 40 to invariant location p
1346 // causes undefined behaviour.
1347 if (match(&Inst, m_Intrinsic<Intrinsic::invariant_start>())) {
1348 // If there are any uses, the scope might end.
1349 if (!Inst.use_empty())
1350 continue;
1351 MemoryLocation MemLoc =
1352 MemoryLocation::getForArgument(&cast<CallInst>(Inst), 1, TLI);
1353 // Don't start a scope if we already have a better one pushed
1354 if (!AvailableInvariants.count(MemLoc))
1355 AvailableInvariants.insert(MemLoc, CurrentGeneration);
1356 continue;
1357 }
1358
1359 if (isGuard(&Inst)) {
1360 if (auto *CondI =
1361 dyn_cast<Instruction>(cast<CallInst>(Inst).getArgOperand(0))) {
1362 if (SimpleValue::canHandle(CondI)) {
1363 // Do we already know the actual value of this condition?
1364 if (auto *KnownCond = AvailableValues.lookup(CondI)) {
1365 // Is the condition known to be true?
1366 if (isa<ConstantInt>(KnownCond) &&
1367 cast<ConstantInt>(KnownCond)->isOne()) {
1369 << "EarlyCSE removing guard: " << Inst << '\n');
1370 salvageKnowledge(&Inst, &AC);
1371 removeMSSA(Inst);
1372 Inst.eraseFromParent();
1373 Changed = true;
1374 continue;
1375 } else
1376 // Use the known value if it wasn't true.
1377 cast<CallInst>(Inst).setArgOperand(0, KnownCond);
1378 }
1379 // The condition we're on guarding here is true for all dominated
1380 // locations.
1381 AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
1382 }
1383 }
1384
1385 // Guard intrinsics read all memory, but don't write any memory.
1386 // Accordingly, don't update the generation but consume the last store (to
1387 // avoid an incorrect DSE).
1388 LastStore = nullptr;
1389 continue;
1390 }
1391
1392 // If the instruction can be simplified (e.g. X+0 = X) then replace it with
1393 // its simpler value.
1394 if (Value *V = simplifyInstruction(&Inst, SQ)) {
1395 LLVM_DEBUG(dbgs() << "EarlyCSE Simplify: " << Inst << " to: " << *V
1396 << '\n');
1397 if (!DebugCounter::shouldExecute(CSECounter)) {
1398 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1399 } else {
1400 bool Killed = false;
1401 if (!Inst.use_empty()) {
1402 Inst.replaceAllUsesWith(V);
1403 Changed = true;
1404 }
1405 if (isInstructionTriviallyDead(&Inst, &TLI)) {
1406 salvageKnowledge(&Inst, &AC);
1407 removeMSSA(Inst);
1408 Inst.eraseFromParent();
1409 Changed = true;
1410 Killed = true;
1411 }
1412 if (Changed)
1413 ++NumSimplify;
1414 if (Killed)
1415 continue;
1416 }
1417 }
1418
1419 // If this is a simple instruction that we can value number, process it.
1420 if (SimpleValue::canHandle(&Inst)) {
1421 if (auto *CI = dyn_cast<ConstrainedFPIntrinsic>(&Inst)) {
1422 assert(CI->getExceptionBehavior() != fp::ebStrict &&
1423 "Unexpected ebStrict from SimpleValue::canHandle()");
1424 assert((!CI->getRoundingMode() ||
1425 CI->getRoundingMode() != RoundingMode::Dynamic) &&
1426 "Unexpected dynamic rounding from SimpleValue::canHandle()");
1427 }
1428 // See if the instruction has an available value. If so, use it.
1429 if (Value *V = AvailableValues.lookup(&Inst)) {
1430 LLVM_DEBUG(dbgs() << "EarlyCSE CSE: " << Inst << " to: " << *V
1431 << '\n');
1432 if (!DebugCounter::shouldExecute(CSECounter)) {
1433 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1434 continue;
1435 }
1436 if (auto *I = dyn_cast<Instruction>(V)) {
1437 // If I being poison triggers UB, there is no need to drop those
1438 // flags. Otherwise, only retain flags present on both I and Inst.
1439 // TODO: Currently some fast-math flags are not treated as
1440 // poison-generating even though they should. Until this is fixed,
1441 // always retain flags present on both I and Inst for floating point
1442 // instructions.
1443 if (isa<FPMathOperator>(I) || (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I)))
1444 I->andIRFlags(&Inst);
1445 }
1446 Inst.replaceAllUsesWith(V);
1447 salvageKnowledge(&Inst, &AC);
1448 removeMSSA(Inst);
1449 Inst.eraseFromParent();
1450 Changed = true;
1451 ++NumCSE;
1452 continue;
1453 }
1454
1455 // Otherwise, just remember that this value is available.
1456 AvailableValues.insert(&Inst, &Inst);
1457 continue;
1458 }
1459
1460 ParseMemoryInst MemInst(&Inst, TTI);
1461 // If this is a non-volatile load, process it.
1462 if (MemInst.isValid() && MemInst.isLoad()) {
1463 // (conservatively) we can't peak past the ordering implied by this
1464 // operation, but we can add this load to our set of available values
1465 if (MemInst.isVolatile() || !MemInst.isUnordered()) {
1466 LastStore = nullptr;
1467 ++CurrentGeneration;
1468 }
1469
1470 if (MemInst.isInvariantLoad()) {
1471 // If we pass an invariant load, we know that memory location is
1472 // indefinitely constant from the moment of first dereferenceability.
1473 // We conservatively treat the invariant_load as that moment. If we
1474 // pass a invariant load after already establishing a scope, don't
1475 // restart it since we want to preserve the earliest point seen.
1476 auto MemLoc = MemoryLocation::get(&Inst);
1477 if (!AvailableInvariants.count(MemLoc))
1478 AvailableInvariants.insert(MemLoc, CurrentGeneration);
1479 }
1480
1481 // If we have an available version of this load, and if it is the right
1482 // generation or the load is known to be from an invariant location,
1483 // replace this instruction.
1484 //
1485 // If either the dominating load or the current load are invariant, then
1486 // we can assume the current load loads the same value as the dominating
1487 // load.
1488 LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
1489 if (Value *Op = getMatchingValue(InVal, MemInst, CurrentGeneration)) {
1490 LLVM_DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << Inst
1491 << " to: " << *InVal.DefInst << '\n');
1492 if (!DebugCounter::shouldExecute(CSECounter)) {
1493 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1494 continue;
1495 }
1496 if (InVal.IsLoad)
1497 if (auto *I = dyn_cast<Instruction>(Op))
1498 combineMetadataForCSE(I, &Inst, false);
1499 if (!Inst.use_empty())
1500 Inst.replaceAllUsesWith(Op);
1501 salvageKnowledge(&Inst, &AC);
1502 removeMSSA(Inst);
1503 Inst.eraseFromParent();
1504 Changed = true;
1505 ++NumCSELoad;
1506 continue;
1507 }
1508
1509 // Otherwise, remember that we have this instruction.
1510 AvailableLoads.insert(MemInst.getPointerOperand(),
1511 LoadValue(&Inst, CurrentGeneration,
1512 MemInst.getMatchingId(),
1513 MemInst.isAtomic(),
1514 MemInst.isLoad()));
1515 LastStore = nullptr;
1516 continue;
1517 }
1518
1519 // If this instruction may read from memory or throw (and potentially read
1520 // from memory in the exception handler), forget LastStore. Load/store
1521 // intrinsics will indicate both a read and a write to memory. The target
1522 // may override this (e.g. so that a store intrinsic does not read from
1523 // memory, and thus will be treated the same as a regular store for
1524 // commoning purposes).
1525 if ((Inst.mayReadFromMemory() || Inst.mayThrow()) &&
1526 !(MemInst.isValid() && !MemInst.mayReadFromMemory()))
1527 LastStore = nullptr;
1528
1529 // If this is a read-only call, process it.
1530 if (CallValue::canHandle(&Inst)) {
1531 // If we have an available version of this call, and if it is the right
1532 // generation, replace this instruction.
1533 std::pair<Instruction *, unsigned> InVal = AvailableCalls.lookup(&Inst);
1534 if (InVal.first != nullptr &&
1535 isSameMemGeneration(InVal.second, CurrentGeneration, InVal.first,
1536 &Inst)) {
1537 LLVM_DEBUG(dbgs() << "EarlyCSE CSE CALL: " << Inst
1538 << " to: " << *InVal.first << '\n');
1539 if (!DebugCounter::shouldExecute(CSECounter)) {
1540 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1541 continue;
1542 }
1543 if (!Inst.use_empty())
1544 Inst.replaceAllUsesWith(InVal.first);
1545 salvageKnowledge(&Inst, &AC);
1546 removeMSSA(Inst);
1547 Inst.eraseFromParent();
1548 Changed = true;
1549 ++NumCSECall;
1550 continue;
1551 }
1552
1553 // Otherwise, remember that we have this instruction.
1554 AvailableCalls.insert(&Inst, std::make_pair(&Inst, CurrentGeneration));
1555 continue;
1556 }
1557
1558 // A release fence requires that all stores complete before it, but does
1559 // not prevent the reordering of following loads 'before' the fence. As a
1560 // result, we don't need to consider it as writing to memory and don't need
1561 // to advance the generation. We do need to prevent DSE across the fence,
1562 // but that's handled above.
1563 if (auto *FI = dyn_cast<FenceInst>(&Inst))
1564 if (FI->getOrdering() == AtomicOrdering::Release) {
1565 assert(Inst.mayReadFromMemory() && "relied on to prevent DSE above");
1566 continue;
1567 }
1568
1569 // write back DSE - If we write back the same value we just loaded from
1570 // the same location and haven't passed any intervening writes or ordering
1571 // operations, we can remove the write. The primary benefit is in allowing
1572 // the available load table to remain valid and value forward past where
1573 // the store originally was.
1574 if (MemInst.isValid() && MemInst.isStore()) {
1575 LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
1576 if (InVal.DefInst &&
1577 InVal.DefInst == getMatchingValue(InVal, MemInst, CurrentGeneration)) {
1578 // It is okay to have a LastStore to a different pointer here if MemorySSA
1579 // tells us that the load and store are from the same memory generation.
1580 // In that case, LastStore should keep its present value since we're
1581 // removing the current store.
1582 assert((!LastStore ||
1583 ParseMemoryInst(LastStore, TTI).getPointerOperand() ==
1584 MemInst.getPointerOperand() ||
1585 MSSA) &&
1586 "can't have an intervening store if not using MemorySSA!");
1587 LLVM_DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << Inst << '\n');
1588 if (!DebugCounter::shouldExecute(CSECounter)) {
1589 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1590 continue;
1591 }
1592 salvageKnowledge(&Inst, &AC);
1593 removeMSSA(Inst);
1594 Inst.eraseFromParent();
1595 Changed = true;
1596 ++NumDSE;
1597 // We can avoid incrementing the generation count since we were able
1598 // to eliminate this store.
1599 continue;
1600 }
1601 }
1602
1603 // Okay, this isn't something we can CSE at all. Check to see if it is
1604 // something that could modify memory. If so, our available memory values
1605 // cannot be used so bump the generation count.
1606 if (Inst.mayWriteToMemory()) {
1607 ++CurrentGeneration;
1608
1609 if (MemInst.isValid() && MemInst.isStore()) {
1610 // We do a trivial form of DSE if there are two stores to the same
1611 // location with no intervening loads. Delete the earlier store.
1612 if (LastStore) {
1613 if (overridingStores(ParseMemoryInst(LastStore, TTI), MemInst)) {
1614 LLVM_DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
1615 << " due to: " << Inst << '\n');
1616 if (!DebugCounter::shouldExecute(CSECounter)) {
1617 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1618 } else {
1619 salvageKnowledge(&Inst, &AC);
1620 removeMSSA(*LastStore);
1621 LastStore->eraseFromParent();
1622 Changed = true;
1623 ++NumDSE;
1624 LastStore = nullptr;
1625 }
1626 }
1627 // fallthrough - we can exploit information about this store
1628 }
1629
1630 // Okay, we just invalidated anything we knew about loaded values. Try
1631 // to salvage *something* by remembering that the stored value is a live
1632 // version of the pointer. It is safe to forward from volatile stores
1633 // to non-volatile loads, so we don't have to check for volatility of
1634 // the store.
1635 AvailableLoads.insert(MemInst.getPointerOperand(),
1636 LoadValue(&Inst, CurrentGeneration,
1637 MemInst.getMatchingId(),
1638 MemInst.isAtomic(),
1639 MemInst.isLoad()));
1640
1641 // Remember that this was the last unordered store we saw for DSE. We
1642 // don't yet handle DSE on ordered or volatile stores since we don't
1643 // have a good way to model the ordering requirement for following
1644 // passes once the store is removed. We could insert a fence, but
1645 // since fences are slightly stronger than stores in their ordering,
1646 // it's not clear this is a profitable transform. Another option would
1647 // be to merge the ordering with that of the post dominating store.
1648 if (MemInst.isUnordered() && !MemInst.isVolatile())
1649 LastStore = &Inst;
1650 else
1651 LastStore = nullptr;
1652 }
1653 }
1654 }
1655
1656 return Changed;
1657}
1658
1659bool EarlyCSE::run() {
1660 // Note, deque is being used here because there is significant performance
1661 // gains over vector when the container becomes very large due to the
1662 // specific access patterns. For more information see the mailing list
1663 // discussion on this:
1664 // http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html
1665 std::deque<StackNode *> nodesToProcess;
1666
1667 bool Changed = false;
1668
1669 // Process the root node.
1670 nodesToProcess.push_back(new StackNode(
1671 AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,
1672 CurrentGeneration, DT.getRootNode(),
1673 DT.getRootNode()->begin(), DT.getRootNode()->end()));
1674
1675 assert(!CurrentGeneration && "Create a new EarlyCSE instance to rerun it.");
1676
1677 // Process the stack.
1678 while (!nodesToProcess.empty()) {
1679 // Grab the first item off the stack. Set the current generation, remove
1680 // the node from the stack, and process it.
1681 StackNode *NodeToProcess = nodesToProcess.back();
1682
1683 // Initialize class members.
1684 CurrentGeneration = NodeToProcess->currentGeneration();
1685
1686 // Check if the node needs to be processed.
1687 if (!NodeToProcess->isProcessed()) {
1688 // Process the node.
1689 Changed |= processNode(NodeToProcess->node());
1690 NodeToProcess->childGeneration(CurrentGeneration);
1691 NodeToProcess->process();
1692 } else if (NodeToProcess->childIter() != NodeToProcess->end()) {
1693 // Push the next child onto the stack.
1694 DomTreeNode *child = NodeToProcess->nextChild();
1695 nodesToProcess.push_back(
1696 new StackNode(AvailableValues, AvailableLoads, AvailableInvariants,
1697 AvailableCalls, NodeToProcess->childGeneration(),
1698 child, child->begin(), child->end()));
1699 } else {
1700 // It has been processed, and there are no more children to process,
1701 // so delete it and pop it off the stack.
1702 delete NodeToProcess;
1703 nodesToProcess.pop_back();
1704 }
1705 } // while (!nodes...)
1706
1707 return Changed;
1708}
1709
1712 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1713 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1714 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1715 auto &AC = AM.getResult<AssumptionAnalysis>(F);
1716 auto *MSSA =
1717 UseMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA() : nullptr;
1718
1719 EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA);
1720
1721 if (!CSE.run())
1722 return PreservedAnalyses::all();
1723
1726 if (UseMemorySSA)
1728 return PA;
1729}
1730
1732 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1733 static_cast<PassInfoMixin<EarlyCSEPass> *>(this)->printPipeline(
1734 OS, MapClassName2PassName);
1735 OS << '<';
1736 if (UseMemorySSA)
1737 OS << "memssa";
1738 OS << '>';
1739}
1740
1741namespace {
1742
1743/// A simple and fast domtree-based CSE pass.
1744///
1745/// This pass does a simple depth-first walk over the dominator tree,
1746/// eliminating trivially redundant instructions and using instsimplify to
1747/// canonicalize things as it goes. It is intended to be fast and catch obvious
1748/// cases so that instcombine and other passes are more effective. It is
1749/// expected that a later pass of GVN will catch the interesting/hard cases.
1750template<bool UseMemorySSA>
1751class EarlyCSELegacyCommonPass : public FunctionPass {
1752public:
1753 static char ID;
1754
1755 EarlyCSELegacyCommonPass() : FunctionPass(ID) {
1756 if (UseMemorySSA)
1758 else
1760 }
1761
1762 bool runOnFunction(Function &F) override {
1763 if (skipFunction(F))
1764 return false;
1765
1766 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1767 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1768 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1769 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1770 auto *MSSA =
1771 UseMemorySSA ? &getAnalysis<MemorySSAWrapperPass>().getMSSA() : nullptr;
1772
1773 EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA);
1774
1775 return CSE.run();
1776 }
1777
1778 void getAnalysisUsage(AnalysisUsage &AU) const override {
1783 if (UseMemorySSA) {
1787 }
1790 AU.setPreservesCFG();
1791 }
1792};
1793
1794} // end anonymous namespace
1795
1796using EarlyCSELegacyPass = EarlyCSELegacyCommonPass</*UseMemorySSA=*/false>;
1797
1798template<>
1799char EarlyCSELegacyPass::ID = 0;
1800
1801INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass, "early-cse", "Early CSE", false,
1802 false)
1808
1809using EarlyCSEMemSSALegacyPass =
1810 EarlyCSELegacyCommonPass</*UseMemorySSA=*/true>;
1811
1812template<>
1813char EarlyCSEMemSSALegacyPass::ID = 0;
1814
1816 if (UseMemorySSA)
1817 return new EarlyCSEMemSSALegacyPass();
1818 else
1819 return new EarlyCSELegacyPass();
1820}
1821
1822INITIALIZE_PASS_BEGIN(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
1823 "Early CSE w/ MemorySSA", false, false)
1830INITIALIZE_PASS_END(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
1831 "Early CSE w/ MemorySSA", false, false)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isLoad(int Opcode)
static bool isStore(int Opcode)
This file defines the BumpPtrAllocator interface.
Atomic ordering constants.
basic Basic Alias true
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Optimize for code generation
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool isSentinel(const DWARFDebugNames::AttributeEncoding &AE)
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
Definition: DebugCounter.h:182
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines DenseMapInfo traits for DenseMap.
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1269
static cl::opt< bool > EarlyCSEDebugHash("earlycse-debug-hash", cl::init(false), cl::Hidden, cl::desc("Perform extra assertion checking to verify that SimpleValue's hash " "function is well-behaved w.r.t. its isEqual predicate"))
EarlyCSELegacyCommonPass< false > EarlyCSELegacyPass
Definition: EarlyCSE.cpp:1796
early cse memssa
Definition: EarlyCSE.cpp:1830
static unsigned getHashValueImpl(SimpleValue Val)
Definition: EarlyCSE.cpp:221
static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS)
Definition: EarlyCSE.cpp:347
static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A, Value *&B, SelectPatternFlavor &Flavor)
Match a 'select' including an optional 'not's of the condition.
Definition: EarlyCSE.cpp:174
static cl::opt< unsigned > EarlyCSEMssaOptCap("earlycse-mssa-optimization-cap", cl::init(500), cl::Hidden, cl::desc("Enable imprecision in EarlyCSE in pathological cases, in exchange " "for faster compile. Caps the MemorySSA clobbering calls."))
This file provides the interface for a simple, fast CSE pass.
static bool runOnFunction(Function &F, bool PostInlining)
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This is the interface for a simple mod/ref and alias analysis over globals.
static void cse(BasicBlock *BB)
Perform cse of induction variable instructions.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
@ SI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
separate const offset from Split GEPs to a variadic base and a constant offset for better CSE
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:293
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:35
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
Conditional or Unconditional Branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:113
bool onlyReadsMemory(unsigned OpNo) const
Definition: InstrTypes.h:1741
bool isConvergent() const
Determine if the invoke is convergent.
Definition: InstrTypes.h:1939
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:428
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:701
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:711
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:740
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:741
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:735
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:734
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:738
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:736
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:739
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:737
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:825
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:801
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:833
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:840
This is an important base class in LLVM.
Definition: Constant.h:41
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:72
typename SmallVector< DomTreeNodeBase *, 4 >::const_iterator const_iterator
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
DomTreeNodeBase< NodeT > * getRootNode()
getRootNode - This returns the entry node for the CFG of the function.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:314
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
This instruction extracts a struct member or array element value from an aggregate value.
This class represents a freeze function that returns random concrete value if an operand is either a ...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
bool isPresplitCoroutine() const
Determine if the function is presplit coroutine.
Definition: Function.h:492
Represents calls to the gc.relocate intrinsic.
Legacy wrapper pass to provide the GlobalsAAResult object.
This instruction inserts a struct field of array element value into an aggregate value.
bool mayThrow(bool IncludePhaseOneUnwind=false) const LLVM_READONLY
Return true if this instruction may throw an exception.
bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
const BasicBlock * getParent() const
Definition: Instruction.h:90
bool isIdenticalToWhenDefined(const Instruction *I) const LLVM_READONLY
This is like isIdenticalTo, except that it ignores the SubclassOptionalData flags,...
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:74
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:168
bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:82
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:54
An instruction for reading from memory.
Definition: Instructions.h:177
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static std::optional< MemoryLocation > getOrNone(const Instruction *Inst)
static MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:936
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1046
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:986
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:700
bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
Definition: MemorySSA.cpp:2115
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1862
MemorySSAWalker * getWalker()
Definition: MemorySSA.cpp:1557
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:717
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:188
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:173
RecyclingAllocator - This class wraps an Allocator, adding the functionality of recycling deleted obj...
ScopedHashTableScope< K, V, KInfo, AllocatorTy > ScopeTy
ScopeTy - This is a helpful typedef that allows clients to get easy access to the name of the scope f...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
bool empty() const
Definition: SmallVector.h:94
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
An instruction for storing to memory.
Definition: Instructions.h:301
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:140
value_op_iterator value_op_end()
Definition: User.h:263
Value * getOperand(unsigned i) const
Definition: User.h:169
value_op_iterator value_op_begin()
Definition: User.h:260
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:535
bool use_empty() const
Definition: Value.h:344
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:772
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:89
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ ebStrict
This corresponds to "fpexcept.strict".
Definition: FPEnv.h:41
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:236
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition: Utils.cpp:1367
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:748
void initializeEarlyCSEMemSSALegacyPassPass(PassRegistry &)
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:398
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
Definition: GuardUtils.cpp:18
SelectPatternFlavor
Specific patterns of select instructions we can match.
@ SPF_UMIN
Signed minimum.
@ SPF_UMAX
Signed maximum.
@ SPF_SMIN
@ SPF_SMAX
Unsigned minimum.
@ SPF_UNKNOWN
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
bool programUndefinedIfPoison(const Instruction *Inst)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT, const BasicBlockEdge &Edge)
Replace each use of 'From' with 'To' if that use is dominated by the given edge.
Definition: Local.cpp:2922
BumpPtrAllocatorImpl BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
Definition: Allocator.h:375
void initializeEarlyCSELegacyPassPass(PassRegistry &)
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:2790
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:89
bool salvageKnowledge(Instruction *I, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Calls BuildAssumeFromInst and if the resulting llvm.assume is valid insert if before I.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition: Hashing.h:613
FunctionPass * createEarlyCSEPass(bool UseMemorySSA=false)
Definition: EarlyCSE.cpp:1815
Type * getLoadStoreType(Value *I)
A helper function that returns the type of a load or store instruction.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:491
Definition: BitVector.h:858
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
static unsigned getHashValue(CallValue Val)
static CallValue getTombstoneKey()
Definition: EarlyCSE.cpp:513
static bool isEqual(CallValue LHS, CallValue RHS)
static CallValue getEmptyKey()
Definition: EarlyCSE.cpp:509
static SimpleValue getEmptyKey()
Definition: EarlyCSE.cpp:159
static unsigned getHashValue(SimpleValue Val)
static SimpleValue getTombstoneKey()
Definition: EarlyCSE.cpp:163
static bool isEqual(SimpleValue LHS, SimpleValue RHS)
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: DenseMapInfo.h:51
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
Definition: EarlyCSE.cpp:1710
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Definition: EarlyCSE.cpp:1731
Information about a load/store intrinsic defined by the target.
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:371