LLVM 23.0.0git
MachineCSE.cpp
Go to the documentation of this file.
1//===- MachineCSE.cpp - Machine Common Subexpression Elimination Pass -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs global common subexpression elimination on machine
10// instructions using a scoped hash table based value numbering scheme. It
11// must be run while the machine function is still in SSA form.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/SmallSet.h"
21#include "llvm/ADT/Statistic.h"
22#include "llvm/Analysis/CFG.h"
32#include "llvm/CodeGen/Passes.h"
38#include "llvm/MC/MCRegister.h"
40#include "llvm/Pass.h"
42#include "llvm/Support/Debug.h"
45#include <cassert>
46#include <iterator>
47#include <utility>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "machine-cse"
52
53STATISTIC(NumCoalesces, "Number of copies coalesced");
54STATISTIC(NumCSEs, "Number of common subexpression eliminated");
55STATISTIC(NumPREs, "Number of partial redundant expression"
56 " transformed to fully redundant");
57STATISTIC(NumPhysCSEs,
58 "Number of physreg referencing common subexpr eliminated");
59STATISTIC(NumCrossBBCSEs,
60 "Number of cross-MBB physreg referencing CS eliminated");
61STATISTIC(NumCommutes, "Number of copies coalesced after commuting");
62
63// Threshold to avoid excessive cost to compute isProfitableToCSE.
64static cl::opt<int>
65 CSUsesThreshold("csuses-threshold", cl::Hidden, cl::init(1024),
66 cl::desc("Threshold for the size of CSUses"));
67
69 "aggressive-machine-cse", cl::Hidden, cl::init(false),
70 cl::desc("Override the profitability heuristics for Machine CSE"));
71
72namespace {
73
74class MachineCSEImpl {
75 const TargetInstrInfo *TII = nullptr;
76 const TargetRegisterInfo *TRI = nullptr;
77 MachineDominatorTree *DT = nullptr;
78 MachineRegisterInfo *MRI = nullptr;
79 MachineBlockFrequencyInfo *MBFI = nullptr;
80
81public:
82 MachineCSEImpl(MachineDominatorTree *DT, MachineBlockFrequencyInfo *MBFI)
83 : DT(DT), MBFI(MBFI) {}
84 bool run(MachineFunction &MF);
85
86private:
87 using AllocatorTy =
88 RecyclingAllocator<BumpPtrAllocator,
89 ScopedHashTableVal<MachineInstr *, unsigned>>;
90 using ScopedHTType =
91 ScopedHashTable<MachineInstr *, unsigned, MachineInstrExpressionTrait,
92 AllocatorTy>;
93 using ScopeType = ScopedHTType::ScopeTy;
94 using PhysDefVector = SmallVector<std::pair<unsigned, Register>, 2>;
95
96 unsigned LookAheadLimit = 0;
97 DenseMap<MachineBasicBlock *, ScopeType *> ScopeMap;
98 DenseMap<MachineInstr *, MachineBasicBlock *, MachineInstrExpressionTrait>
99 PREMap;
100 ScopedHTType VNT;
102 unsigned CurrVN = 0;
103
104 bool PerformTrivialCopyPropagation(MachineInstr *MI, MachineBasicBlock *MBB);
105 bool isPhysDefTriviallyDead(MCRegister Reg,
108 bool hasLivePhysRegDefUses(const MachineInstr *MI,
109 const MachineBasicBlock *MBB,
110 SmallSet<MCRegister, 8> &PhysRefs,
111 PhysDefVector &PhysDefs, bool &PhysUseDef) const;
112 bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
113 const SmallSet<MCRegister, 8> &PhysRefs,
114 const PhysDefVector &PhysDefs, bool &NonLocal) const;
115 bool isCSECandidate(MachineInstr *MI);
116 bool isProfitableToCSE(Register CSReg, Register Reg, MachineBasicBlock *CSBB,
117 MachineInstr *MI);
118 void EnterScope(MachineBasicBlock *MBB);
119 void ExitScope(MachineBasicBlock *MBB);
120 bool ProcessBlockCSE(MachineBasicBlock *MBB);
121 void ExitScopeIfDone(MachineDomTreeNode *Node,
122 DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren);
123 bool PerformCSE(MachineDomTreeNode *Node);
124
125 bool isPRECandidate(MachineInstr *MI, SmallSet<MCRegister, 8> &PhysRefs);
126 bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB);
127 bool PerformSimplePRE(MachineDominatorTree *DT);
128 /// Heuristics to see if it's profitable to move common computations of MBB
129 /// and MBB1 to CandidateBB.
130 bool isProfitableToHoistInto(MachineBasicBlock *CandidateBB,
131 MachineBasicBlock *MBB, MachineBasicBlock *MBB1);
132 void releaseMemory();
133};
134
135class MachineCSELegacy : public MachineFunctionPass {
136public:
137 static char ID; // Pass identification
138
139 MachineCSELegacy() : MachineFunctionPass(ID) {}
140
141 bool runOnMachineFunction(MachineFunction &MF) override;
142
143 void getAnalysisUsage(AnalysisUsage &AU) const override {
144 AU.setPreservesCFG();
147 AU.addRequired<MachineDominatorTreeWrapperPass>();
148 AU.addPreserved<MachineDominatorTreeWrapperPass>();
149 AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
150 AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>();
151 }
152
153 MachineFunctionProperties getRequiredProperties() const override {
154 return MachineFunctionProperties().setIsSSA();
155 }
156};
157} // end anonymous namespace
158
159char MachineCSELegacy::ID = 0;
160
161char &llvm::MachineCSELegacyID = MachineCSELegacy::ID;
162
164 "Machine Common Subexpression Elimination", false, false)
167 "Machine Common Subexpression Elimination", false, false)
168
169/// The source register of a COPY machine instruction can be propagated to all
170/// its users, and this propagation could increase the probability of finding
171/// common subexpressions. If the COPY has only one user, the COPY itself can
172/// be removed.
173bool MachineCSEImpl::PerformTrivialCopyPropagation(MachineInstr *MI,
175 bool Changed = false;
176 for (MachineOperand &MO : MI->all_uses()) {
177 Register Reg = MO.getReg();
178 if (!Reg.isVirtual())
179 continue;
180 bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg);
181 MachineInstr *DefMI = MRI->getVRegDef(Reg);
182 if (!DefMI || !DefMI->isCopy())
183 continue;
184 Register SrcReg = DefMI->getOperand(1).getReg();
185 if (!SrcReg.isVirtual())
186 continue;
187 // FIXME: We should trivially coalesce subregister copies to expose CSE
188 // opportunities on instructions with truncated operands (see
189 // cse-add-with-overflow.ll). This can be done here as follows:
190 // if (SrcSubReg)
191 // RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC,
192 // SrcSubReg);
193 // MO.substVirtReg(SrcReg, SrcSubReg, *TRI);
194 //
195 // The 2-addr pass has been updated to handle coalesced subregs. However,
196 // some machine-specific code still can't handle it.
197 // To handle it properly we also need a way find a constrained subregister
198 // class given a super-reg class and subreg index.
199 if (DefMI->getOperand(1).getSubReg())
200 continue;
201 if (!MRI->constrainRegAttrs(SrcReg, Reg))
202 continue;
203 LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI);
204 LLVM_DEBUG(dbgs() << "*** to: " << *MI);
205
206 // Propagate SrcReg of copies to MI.
207 MO.setReg(SrcReg);
208 MRI->clearKillFlags(SrcReg);
209 // Coalesce single use copies.
210 if (OnlyOneUse) {
211 // If (and only if) we've eliminated all uses of the copy, also
212 // copy-propagate to any debug-users of MI, or they'll be left using
213 // an undefined value.
214 DefMI->changeDebugValuesDefReg(SrcReg);
215
216 DefMI->eraseFromParent();
217 ++NumCoalesces;
218 }
219 Changed = true;
220 }
221
222 return Changed;
223}
224
225bool MachineCSEImpl::isPhysDefTriviallyDead(
228 unsigned LookAheadLeft = LookAheadLimit;
229 while (LookAheadLeft) {
230 // Skip over dbg_value's.
232
233 if (I == E)
234 // Reached end of block, we don't know if register is dead or not.
235 return false;
236
237 bool SeenDef = false;
238 for (const MachineOperand &MO : I->operands()) {
239 if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
240 SeenDef = true;
241 if (!MO.isReg() || !MO.getReg())
242 continue;
243 if (!TRI->regsOverlap(MO.getReg(), Reg))
244 continue;
245 if (MO.isUse())
246 // Found a use!
247 return false;
248 SeenDef = true;
249 }
250 if (SeenDef)
251 // See a def of Reg (or an alias) before encountering any use, it's
252 // trivially dead.
253 return true;
254
255 --LookAheadLeft;
256 ++I;
257 }
258 return false;
259}
260
262 const MachineOperand &MO,
263 const MachineFunction &MF,
264 const TargetRegisterInfo &TRI,
265 const TargetInstrInfo &TII) {
266 // MachineRegisterInfo::isConstantPhysReg directly called by
267 // MachineRegisterInfo::isCallerPreservedOrConstPhysReg expects the
268 // reserved registers to be frozen. That doesn't cause a problem post-ISel as
269 // most (if not all) targets freeze reserved registers right after ISel.
270 //
271 // It does cause issues mid-GlobalISel, however, hence the additional
272 // reservedRegsFrozen check.
273 const MachineRegisterInfo &MRI = MF.getRegInfo();
274 return TRI.isCallerPreservedPhysReg(Reg, MF) || TII.isIgnorableUse(MO) ||
275 (MRI.reservedRegsFrozen() && MRI.isConstantPhysReg(Reg));
276}
277
278/// hasLivePhysRegDefUses - Return true if the specified instruction read/write
279/// physical registers (except for dead defs of physical registers). It also
280/// returns the physical register def by reference if it's the only one and the
281/// instruction does not uses a physical register.
282bool MachineCSEImpl::hasLivePhysRegDefUses(const MachineInstr *MI,
283 const MachineBasicBlock *MBB,
284 SmallSet<MCRegister, 8> &PhysRefs,
285 PhysDefVector &PhysDefs,
286 bool &PhysUseDef) const {
287 // First, add all uses to PhysRefs.
288 for (const MachineOperand &MO : MI->all_uses()) {
289 Register Reg = MO.getReg();
290 if (!Reg)
291 continue;
292 if (Reg.isVirtual())
293 continue;
294 // Reading either caller preserved or constant physregs is ok.
295 if (!isCallerPreservedOrConstPhysReg(Reg.asMCReg(), MO, *MI->getMF(), *TRI,
296 *TII))
297 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
298 PhysRefs.insert(*AI);
299 }
300
301 // Next, collect all defs into PhysDefs. If any is already in PhysRefs
302 // (which currently contains only uses), set the PhysUseDef flag.
303 PhysUseDef = false;
304 MachineBasicBlock::const_iterator I = MI; I = std::next(I);
305 for (const auto &MOP : llvm::enumerate(MI->operands())) {
306 const MachineOperand &MO = MOP.value();
307 if (!MO.isReg() || !MO.isDef())
308 continue;
309 Register Reg = MO.getReg();
310 if (!Reg)
311 continue;
312 if (Reg.isVirtual())
313 continue;
314 // Check against PhysRefs even if the def is "dead".
315 if (PhysRefs.count(Reg.asMCReg()))
316 PhysUseDef = true;
317 // If the def is dead, it's ok. But the def may not marked "dead". That's
318 // common since this pass is run before livevariables. We can scan
319 // forward a few instructions and check if it is obviously dead.
320 if (!MO.isDead() && !isPhysDefTriviallyDead(Reg.asMCReg(), I, MBB->end()))
321 PhysDefs.emplace_back(MOP.index(), Reg);
322 }
323
324 // Finally, add all defs to PhysRefs as well.
325 for (const auto &Def : PhysDefs)
326 for (MCRegAliasIterator AI(Def.second, TRI, true); AI.isValid(); ++AI)
327 PhysRefs.insert(*AI);
328
329 return !PhysRefs.empty();
330}
331
332bool MachineCSEImpl::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
333 const SmallSet<MCRegister, 8> &PhysRefs,
334 const PhysDefVector &PhysDefs,
335 bool &NonLocal) const {
336 // For now conservatively returns false if the common subexpression is
337 // not in the same basic block as the given instruction. The only exception
338 // is if the common subexpression is in the sole predecessor block.
339 const MachineBasicBlock *MBB = MI->getParent();
340 const MachineBasicBlock *CSMBB = CSMI->getParent();
341
342 bool CrossMBB = false;
343 if (CSMBB != MBB) {
344 if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB)
345 return false;
346
347 for (const auto &PhysDef : PhysDefs) {
348 if (MRI->isAllocatable(PhysDef.second) || MRI->isReserved(PhysDef.second))
349 // Avoid extending live range of physical registers if they are
350 //allocatable or reserved.
351 return false;
352 }
353 CrossMBB = true;
354 }
355 MachineBasicBlock::const_iterator I = CSMI; I = std::next(I);
358 unsigned LookAheadLeft = LookAheadLimit;
359 while (LookAheadLeft) {
360 // Skip over dbg_value's.
361 while (I != E && I != EE && I->isDebugInstr())
362 ++I;
363
364 if (I == EE) {
365 assert(CrossMBB && "Reaching end-of-MBB without finding MI?");
366 (void)CrossMBB;
367 CrossMBB = false;
368 NonLocal = true;
369 I = MBB->begin();
370 EE = MBB->end();
371 continue;
372 }
373
374 if (I == E)
375 return true;
376
377 for (const MachineOperand &MO : I->operands()) {
378 // RegMasks go on instructions like calls that clobber lots of physregs.
379 // Don't attempt to CSE across such an instruction.
380 if (MO.isRegMask())
381 return false;
382 if (!MO.isReg() || !MO.isDef())
383 continue;
384 Register MOReg = MO.getReg();
385 if (MOReg.isVirtual())
386 continue;
387 if (PhysRefs.count(MOReg.asMCReg()))
388 return false;
389 }
390
391 --LookAheadLeft;
392 ++I;
393 }
394
395 return false;
396}
397
398bool MachineCSEImpl::isCSECandidate(MachineInstr *MI) {
399 if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() ||
400 MI->isInlineAsm() || MI->isDebugInstr() || MI->isJumpTableDebugInfo() ||
401 MI->isFakeUse())
402 return false;
403
404 // Ignore copies.
405 if (MI->isCopyLike())
406 return false;
407
408 // Ignore stuff that we obviously can't move.
409 if (MI->mayStore() || MI->isCall() || MI->isTerminator() ||
410 MI->mayRaiseFPException() || MI->hasUnmodeledSideEffects())
411 return false;
412
413 if (MI->mayLoad()) {
414 // Okay, this instruction does a load. As a refinement, we allow the target
415 // to decide whether the loaded value is actually a constant. If so, we can
416 // actually use it as a load.
417 if (!MI->isDereferenceableInvariantLoad())
418 // FIXME: we should be able to hoist loads with no other side effects if
419 // there are no other instructions which can change memory in this loop.
420 // This is a trivial form of alias analysis.
421 return false;
422 }
423
424 // Ignore stack guard loads, otherwise the register that holds CSEed value may
425 // be spilled and get loaded back with corrupted data.
426 if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD)
427 return false;
428
429 return true;
430}
431
432/// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
433/// common expression that defines Reg. CSBB is basic block where CSReg is
434/// defined.
435bool MachineCSEImpl::isProfitableToCSE(Register CSReg, Register Reg,
436 MachineBasicBlock *CSBB,
437 MachineInstr *MI) {
439 return true;
440
441 // FIXME: Heuristics that works around the lack the live range splitting.
442
443 // If CSReg is used at all uses of Reg, CSE should not increase register
444 // pressure of CSReg.
445 bool MayIncreasePressure = true;
446 if (CSReg.isVirtual() && Reg.isVirtual()) {
447 MayIncreasePressure = false;
448 SmallPtrSet<MachineInstr*, 8> CSUses;
449 int NumOfUses = 0;
450 for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) {
451 CSUses.insert(&MI);
452 // Too costly to compute if NumOfUses is very large. Conservatively assume
453 // MayIncreasePressure to avoid spending too much time here.
454 if (++NumOfUses > CSUsesThreshold) {
455 MayIncreasePressure = true;
456 break;
457 }
458 }
459 if (!MayIncreasePressure)
460 for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
461 if (!CSUses.count(&MI)) {
462 MayIncreasePressure = true;
463 break;
464 }
465 }
466 }
467 if (!MayIncreasePressure) return true;
468
469 // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
470 // an immediate predecessor. We don't want to increase register pressure and
471 // end up causing other computation to be spilled.
472 if (TII->isAsCheapAsAMove(*MI)) {
473 MachineBasicBlock *BB = MI->getParent();
474 if (CSBB != BB && !CSBB->isSuccessor(BB))
475 return false;
476 }
477
478 // Heuristics #2: If the expression doesn't not use a vr and the only use
479 // of the redundant computation are copies, do not cse.
480 bool HasVRegUse = false;
481 for (const MachineOperand &MO : MI->all_uses()) {
482 if (MO.getReg().isVirtual()) {
483 HasVRegUse = true;
484 break;
485 }
486 }
487 if (!HasVRegUse) {
488 bool HasNonCopyUse = false;
489 for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
490 // Ignore copies.
491 if (!MI.isCopyLike()) {
492 HasNonCopyUse = true;
493 break;
494 }
495 }
496 if (!HasNonCopyUse)
497 return false;
498 }
499
500 // Heuristics #3: If the common subexpression is used by PHIs, do not reuse
501 // it unless the defined value is already used in the BB of the new use.
502 bool HasPHI = false;
503 for (MachineInstr &UseMI : MRI->use_nodbg_instructions(CSReg)) {
504 HasPHI |= UseMI.isPHI();
505 if (UseMI.getParent() == MI->getParent())
506 return true;
507 }
508
509 return !HasPHI;
510}
511
512void MachineCSEImpl::EnterScope(MachineBasicBlock *MBB) {
513 LLVM_DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
514 ScopeType *Scope = new ScopeType(VNT);
515 ScopeMap[MBB] = Scope;
516}
517
518void MachineCSEImpl::ExitScope(MachineBasicBlock *MBB) {
519 LLVM_DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
520 DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB);
521 assert(SI != ScopeMap.end());
522 delete SI->second;
523 ScopeMap.erase(SI);
524}
525
526bool MachineCSEImpl::ProcessBlockCSE(MachineBasicBlock *MBB) {
527 bool Changed = false;
528
530 SmallVector<unsigned, 2> ImplicitDefsToUpdate;
531 SmallVector<Register, 2> ImplicitDefs;
532 for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) {
533 if (!isCSECandidate(&MI))
534 continue;
535
536 bool FoundCSE = VNT.count(&MI);
537 if (!FoundCSE) {
538 // Using trivial copy propagation to find more CSE opportunities.
539 if (PerformTrivialCopyPropagation(&MI, MBB)) {
540 Changed = true;
541
542 // After coalescing MI itself may become a copy.
543 if (MI.isCopyLike())
544 continue;
545
546 // Try again to see if CSE is possible.
547 FoundCSE = VNT.count(&MI);
548 }
549 }
550
551 // Commute commutable instructions.
552 bool Commuted = false;
553 if (!FoundCSE && MI.isCommutable()) {
554 if (MachineInstr *NewMI = TII->commuteInstruction(MI)) {
555 Commuted = true;
556 FoundCSE = VNT.count(NewMI);
557 if (NewMI != &MI) {
558 // New instruction. It doesn't need to be kept.
559 NewMI->eraseFromParent();
560 Changed = true;
561 } else if (!FoundCSE)
562 // MI was changed but it didn't help, commute it back!
563 (void)TII->commuteInstruction(MI);
564 }
565 }
566
567 // If the instruction defines physical registers and the values *may* be
568 // used, then it's not safe to replace it with a common subexpression.
569 // It's also not safe if the instruction uses physical registers.
570 bool CrossMBBPhysDef = false;
571 SmallSet<MCRegister, 8> PhysRefs;
572 PhysDefVector PhysDefs;
573 bool PhysUseDef = false;
574 if (FoundCSE &&
575 hasLivePhysRegDefUses(&MI, MBB, PhysRefs, PhysDefs, PhysUseDef)) {
576 FoundCSE = false;
577
578 // ... Unless the CS is local or is in the sole predecessor block
579 // and it also defines the physical register which is not clobbered
580 // in between and the physical register uses were not clobbered.
581 // This can never be the case if the instruction both uses and
582 // defines the same physical register, which was detected above.
583 if (!PhysUseDef) {
584 unsigned CSVN = VNT.lookup(&MI);
585 MachineInstr *CSMI = Exps[CSVN];
586 if (PhysRegDefsReach(CSMI, &MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
587 FoundCSE = true;
588 }
589 }
590
591 if (!FoundCSE) {
592 VNT.insert(&MI, CurrVN++);
593 Exps.push_back(&MI);
594 continue;
595 }
596
597 // Found a common subexpression, eliminate it.
598 unsigned CSVN = VNT.lookup(&MI);
599 MachineInstr *CSMI = Exps[CSVN];
600 LLVM_DEBUG(dbgs() << "Examining: " << MI);
601 LLVM_DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
602
603 // Prevent CSE-ing non-local convergent instructions.
604 // LLVM's current definition of `isConvergent` does not necessarily prove
605 // that non-local CSE is illegal. The following check extends the definition
606 // of `isConvergent` to assume a convergent instruction is dependent not
607 // only on additional conditions, but also on fewer conditions. LLVM does
608 // not have a MachineInstr attribute which expresses this extended
609 // definition, so it's necessary to use `isConvergent` to prevent illegally
610 // CSE-ing the subset of `isConvergent` instructions which do fall into this
611 // extended definition.
612 if (MI.isConvergent() && MI.getParent() != CSMI->getParent()) {
613 LLVM_DEBUG(dbgs() << "*** Convergent MI and subexpression exist in "
614 "different BBs, avoid CSE!\n");
615 VNT.insert(&MI, CurrVN++);
616 Exps.push_back(&MI);
617 continue;
618 }
619
620 // Check if it's profitable to perform this CSE.
621 bool DoCSE = true;
622 unsigned NumDefs = MI.getNumDefs();
623
624 for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
625 MachineOperand &MO = MI.getOperand(i);
626 if (!MO.isReg() || !MO.isDef())
627 continue;
628 Register OldReg = MO.getReg();
629 Register NewReg = CSMI->getOperand(i).getReg();
630
631 // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
632 // we should make sure it is not dead at CSMI.
633 if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead())
634 ImplicitDefsToUpdate.push_back(i);
635
636 // Keep track of implicit defs of CSMI and MI, to clear possibly
637 // made-redundant kill flags.
638 if (MO.isImplicit() && !MO.isDead() && OldReg == NewReg)
639 ImplicitDefs.push_back(OldReg);
640
641 if (OldReg == NewReg) {
642 --NumDefs;
643 continue;
644 }
645
646 assert(OldReg.isVirtual() && NewReg.isVirtual() &&
647 "Do not CSE physical register defs!");
648
649 if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), &MI)) {
650 LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
651 DoCSE = false;
652 break;
653 }
654
655 // Don't perform CSE if the result of the new instruction cannot exist
656 // within the constraints (register class, bank, or low-level type) of
657 // the old instruction.
658 if (!MRI->constrainRegAttrs(NewReg, OldReg)) {
660 dbgs() << "*** Not the same register constraints, avoid CSE!\n");
661 DoCSE = false;
662 break;
663 }
664
665 CSEPairs.emplace_back(OldReg, NewReg);
666 --NumDefs;
667 }
668
669 // Actually perform the elimination.
670 if (DoCSE) {
671 for (const std::pair<Register, Register> &CSEPair : CSEPairs) {
672 Register OldReg = CSEPair.first;
673 Register NewReg = CSEPair.second;
674 // OldReg may have been unused but is used now, clear the Dead flag
675 MachineInstr *Def = MRI->getUniqueVRegDef(NewReg);
676 assert(Def != nullptr && "CSEd register has no unique definition?");
677 Def->clearRegisterDeads(NewReg);
678 // Replace with NewReg and clear kill flags which may be wrong now.
679 MRI->replaceRegWith(OldReg, NewReg);
680 MRI->clearKillFlags(NewReg);
681 }
682
683 // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
684 // we should make sure it is not dead at CSMI.
685 for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate)
686 CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false);
687 for (const auto &PhysDef : PhysDefs)
688 if (!MI.getOperand(PhysDef.first).isDead())
689 CSMI->getOperand(PhysDef.first).setIsDead(false);
690
691 // Go through implicit defs of CSMI and MI, and clear the kill flags on
692 // their uses in all the instructions between CSMI and MI.
693 // We might have made some of the kill flags redundant, consider:
694 // subs ... implicit-def %nzcv <- CSMI
695 // csinc ... implicit killed %nzcv <- this kill flag isn't valid anymore
696 // subs ... implicit-def %nzcv <- MI, to be eliminated
697 // csinc ... implicit killed %nzcv
698 // Since we eliminated MI, and reused a register imp-def'd by CSMI
699 // (here %nzcv), that register, if it was killed before MI, should have
700 // that kill flag removed, because it's lifetime was extended.
701 if (CSMI->getParent() == MI.getParent()) {
702 for (MachineBasicBlock::iterator II = CSMI, IE = &MI; II != IE; ++II)
703 for (auto ImplicitDef : ImplicitDefs)
704 if (MachineOperand *MO = II->findRegisterUseOperand(
705 ImplicitDef, TRI, /*isKill=*/true))
706 MO->setIsKill(false);
707 } else {
708 // If the instructions aren't in the same BB, bail out and clear the
709 // kill flag on all uses of the imp-def'd register.
710 for (auto ImplicitDef : ImplicitDefs)
711 MRI->clearKillFlags(ImplicitDef);
712 }
713
714 if (CrossMBBPhysDef) {
715 // Add physical register defs now coming in from a predecessor to MBB
716 // livein list.
717 while (!PhysDefs.empty()) {
718 auto LiveIn = PhysDefs.pop_back_val();
719 if (!MBB->isLiveIn(LiveIn.second))
720 MBB->addLiveIn(LiveIn.second);
721 }
722 ++NumCrossBBCSEs;
723 }
724
725 MI.eraseFromParent();
726 ++NumCSEs;
727 if (!PhysRefs.empty())
728 ++NumPhysCSEs;
729 if (Commuted)
730 ++NumCommutes;
731 Changed = true;
732 } else {
733 VNT.insert(&MI, CurrVN++);
734 Exps.push_back(&MI);
735 }
736 CSEPairs.clear();
737 ImplicitDefsToUpdate.clear();
738 ImplicitDefs.clear();
739 }
740
741 return Changed;
742}
743
744/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
745/// dominator tree node if its a leaf or all of its children are done. Walk
746/// up the dominator tree to destroy ancestors which are now done.
747void MachineCSEImpl::ExitScopeIfDone(
748 MachineDomTreeNode *Node,
749 DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren) {
750 if (OpenChildren[Node])
751 return;
752
753 // Pop scope.
754 ExitScope(Node->getBlock());
755
756 // Now traverse upwards to pop ancestors whose offsprings are all done.
757 while (MachineDomTreeNode *Parent = Node->getIDom()) {
758 unsigned Left = --OpenChildren[Parent];
759 if (Left != 0)
760 break;
761 ExitScope(Parent->getBlock());
762 Node = Parent;
763 }
764}
765
766bool MachineCSEImpl::PerformCSE(MachineDomTreeNode *Node) {
769 DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
770
771 CurrVN = 0;
772
773 // Perform a DFS walk to determine the order of visit.
774 WorkList.push_back(Node);
775 do {
776 Node = WorkList.pop_back_val();
777 Scopes.push_back(Node);
778 size_t WorkListSize = WorkList.size();
779 append_range(WorkList, Node->children());
780 OpenChildren[Node] = WorkList.size() - WorkListSize; // Number of children.
781 } while (!WorkList.empty());
782
783 // Now perform CSE.
784 bool Changed = false;
785 for (MachineDomTreeNode *Node : Scopes) {
786 MachineBasicBlock *MBB = Node->getBlock();
787 EnterScope(MBB);
788 Changed |= ProcessBlockCSE(MBB);
789 // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
790 ExitScopeIfDone(Node, OpenChildren);
791 }
792
793 return Changed;
794}
795
796// We use stronger checks for PRE candidate rather than for CSE ones to embrace
797// checks inside ProcessBlockCSE(), not only inside isCSECandidate(). This helps
798// to exclude instrs created by PRE that won't be CSEed later.
799bool MachineCSEImpl::isPRECandidate(MachineInstr *MI,
800 SmallSet<MCRegister, 8> &PhysRefs) {
801 if (!isCSECandidate(MI) ||
802 MI->isNotDuplicable() ||
803 MI->mayLoad() ||
805 MI->getNumDefs() != 1 ||
806 MI->getNumExplicitDefs() != 1)
807 return false;
808
809 for (const MachineOperand &MO : MI->operands()) {
810 if (MO.isReg() && !MO.getReg().isVirtual()) {
811 if (MO.isDef())
812 return false;
813 else
814 PhysRefs.insert(MO.getReg());
815 }
816 }
817
818 return true;
819}
820
821bool MachineCSEImpl::ProcessBlockPRE(MachineDominatorTree *DT,
822 MachineBasicBlock *MBB) {
823 bool Changed = false;
824 for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) {
825 SmallSet<MCRegister, 8> PhysRefs;
826 if (!isPRECandidate(&MI, PhysRefs))
827 continue;
828
829 auto [It, Inserted] = PREMap.try_emplace(&MI, MBB);
830 if (Inserted)
831 continue;
832
833 auto *MBB1 = It->second;
834 assert(
835 !DT->properlyDominates(MBB, MBB1) &&
836 "MBB cannot properly dominate MBB1 while DFS through dominators tree!");
837 auto CMBB = DT->findNearestCommonDominator(MBB, MBB1);
838 if (!CMBB->isLegalToHoistInto())
839 continue;
840
841 if (!isProfitableToHoistInto(CMBB, MBB, MBB1))
842 continue;
843
844 // Two instrs are partial redundant if their basic blocks are reachable
845 // from one to another but one doesn't dominate another.
846 if (CMBB != MBB1) {
847 auto BB = MBB->getBasicBlock(), BB1 = MBB1->getBasicBlock();
848 if (BB != nullptr && BB1 != nullptr &&
849 (isPotentiallyReachable(BB1, BB) ||
850 isPotentiallyReachable(BB, BB1))) {
851 // The following check extends the definition of `isConvergent` to
852 // assume a convergent instruction is dependent not only on additional
853 // conditions, but also on fewer conditions. LLVM does not have a
854 // MachineInstr attribute which expresses this extended definition, so
855 // it's necessary to use `isConvergent` to prevent illegally PRE-ing the
856 // subset of `isConvergent` instructions which do fall into this
857 // extended definition.
858 if (MI.isConvergent() && CMBB != MBB)
859 continue;
860
861 // If this instruction uses physical registers then we can only do PRE
862 // if it's using the value that is live at the place we're hoisting to.
863 bool NonLocal;
864 PhysDefVector PhysDefs;
865 if (!PhysRefs.empty() &&
866 !PhysRegDefsReach(&*(CMBB->getFirstTerminator()), &MI, PhysRefs,
867 PhysDefs, NonLocal))
868 continue;
869
870 assert(MI.getOperand(0).isDef() &&
871 "First operand of instr with one explicit def must be this def");
872 Register VReg = MI.getOperand(0).getReg();
873 Register NewReg = MRI->cloneVirtualRegister(VReg);
874 if (!isProfitableToCSE(NewReg, VReg, CMBB, &MI))
875 continue;
876 MachineInstr &NewMI =
877 TII->duplicate(*CMBB, CMBB->getFirstTerminator(), MI);
878
879 // When hoisting, make sure we don't carry the debug location of
880 // the original instruction, as that's not correct and can cause
881 // unexpected jumps when debugging optimized code.
882 auto EmptyDL = DebugLoc();
883 NewMI.setDebugLoc(EmptyDL);
884
885 NewMI.getOperand(0).setReg(NewReg);
886
887 PREMap[&MI] = CMBB;
888 ++NumPREs;
889 Changed = true;
890 }
891 }
892 }
893 return Changed;
894}
895
896// This simple PRE (partial redundancy elimination) pass doesn't actually
897// eliminate partial redundancy but transforms it to full redundancy,
898// anticipating that the next CSE step will eliminate this created redundancy.
899// If CSE doesn't eliminate this, than created instruction will remain dead
900// and eliminated later by Remove Dead Machine Instructions pass.
901bool MachineCSEImpl::PerformSimplePRE(MachineDominatorTree *DT) {
903
904 PREMap.clear();
905 bool Changed = false;
906 BBs.push_back(DT->getRootNode());
907 do {
908 auto Node = BBs.pop_back_val();
909 append_range(BBs, Node->children());
910
911 MachineBasicBlock *MBB = Node->getBlock();
912 Changed |= ProcessBlockPRE(DT, MBB);
913
914 } while (!BBs.empty());
915
916 return Changed;
917}
918
919bool MachineCSEImpl::isProfitableToHoistInto(MachineBasicBlock *CandidateBB,
920 MachineBasicBlock *MBB,
921 MachineBasicBlock *MBB1) {
922 if (CandidateBB->getParent()->getFunction().hasMinSize())
923 return true;
924 assert(DT->dominates(CandidateBB, MBB) && "CandidateBB should dominate MBB");
925 assert(DT->dominates(CandidateBB, MBB1) &&
926 "CandidateBB should dominate MBB1");
927 return MBFI->getBlockFreq(CandidateBB) <=
928 MBFI->getBlockFreq(MBB) + MBFI->getBlockFreq(MBB1);
929}
930
931void MachineCSEImpl::releaseMemory() {
932 ScopeMap.clear();
933 PREMap.clear();
934 Exps.clear();
935}
936
937bool MachineCSEImpl::run(MachineFunction &MF) {
940 MRI = &MF.getRegInfo();
941 LookAheadLimit = TII->getMachineCSELookAheadLimit();
942 bool ChangedPRE, ChangedCSE;
943 ChangedPRE = PerformSimplePRE(DT);
944 ChangedCSE = PerformCSE(DT->getRootNode());
945 releaseMemory();
946 return ChangedPRE || ChangedCSE;
947}
948
951 MFPropsModifier _(*this, MF);
952
956 MachineCSEImpl Impl(&MDT, &MBFI);
957 bool Changed = Impl.run(MF);
958 if (!Changed)
959 return PreservedAnalyses::all();
960
962 PA.preserve<MachineLoopAnalysis>();
963 PA.preserve<MachineDominatorTreeAnalysis>();
964 PA.preserve<MachineBlockFrequencyAnalysis>();
965 PA.preserveSet<CFGAnalyses>();
966 return PA;
967}
968
969bool MachineCSELegacy::runOnMachineFunction(MachineFunction &MF) {
970 if (skipFunction(MF.getFunction()))
971 return false;
972
974 getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
976 getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
977 MachineCSEImpl Impl(&MDT, &MBFI);
978 return Impl.run(MF);
979}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
This file defines the BumpPtrAllocator interface.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file defines the DenseMap class.
#define DEBUG_TYPE
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:57
static bool isCallerPreservedOrConstPhysReg(MCRegister Reg, const MachineOperand &MO, const MachineFunction &MF, const TargetRegisterInfo &TRI, const TargetInstrInfo &TII)
static cl::opt< int > CSUsesThreshold("csuses-threshold", cl::Hidden, cl::init(1024), cl::desc("Threshold for the size of CSUses"))
static cl::opt< bool > AggressiveMachineCSE("aggressive-machine-cse", cl::Hidden, cl::init(false), cl::desc("Override the profitability heuristics for Machine CSE"))
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
DomTreeNodeBase< NodeT > * getRootNode()
getRootNode - This returns the entry node for the CFG of the function.
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:709
bool isAsCheapAsAMove(const MachineInstr &MI) const override
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
An RAII based helper class to modify MachineFunctionProperties when running pass.
MachineInstrBundleIterator< const MachineInstr > const_iterator
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
MachineInstrBundleIterator< MachineInstr > iterator
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
LLVM_ABI bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
LLVM_ABI BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineInstr *A, const MachineInstr *B) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
const MachineOperand & getOperand(unsigned i) const
void setDebugLoc(DebugLoc DL)
Replace current source information with new such.
Analysis pass that exposes the MachineLoopInfo for a machine function.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition Register.h:107
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
size_type count(const K &Key) const
Return 1 if the specified key is in the table, 0 otherwise.
void insert(const K &Key, const V &Val)
V lookup(const K &Key) const
ScopedHashTableScope< MachineInstr *, unsigned, MachineInstrExpressionTrait, AllocatorTy > ScopeTy
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
bool empty() const
Definition SmallSet.h:168
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Changed
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2544
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2198
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
LLVM_ABI char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
DomTreeNodeBase< MachineBasicBlock > MachineDomTreeNode
LLVM_ABI char & MachineCSELegacyID
MachineCSE - This pass performs global CSE on machine instructions.
BumpPtrAllocatorImpl<> BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
Definition Allocator.h:383
LLVM_ABI bool isPotentiallyReachable(const Instruction *From, const Instruction *To, const SmallPtrSetImpl< BasicBlock * > *ExclusionSet=nullptr, const DominatorTree *DT=nullptr, const LoopInfo *LI=nullptr)
Determine whether instruction 'To' is reachable from 'From', without passing through any blocks in Ex...
Definition CFG.cpp:282