LLVM  14.0.0git
PeepholeOptimizer.cpp
Go to the documentation of this file.
1 //===- PeepholeOptimizer.cpp - Peephole Optimizations ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Perform peephole optimizations on the machine code:
10 //
11 // - Optimize Extensions
12 //
13 // Optimization of sign / zero extension instructions. It may be extended to
14 // handle other instructions with similar properties.
15 //
16 // On some targets, some instructions, e.g. X86 sign / zero extension, may
17 // leave the source value in the lower part of the result. This optimization
18 // will replace some uses of the pre-extension value with uses of the
19 // sub-register of the results.
20 //
21 // - Optimize Comparisons
22 //
23 // Optimization of comparison instructions. For instance, in this code:
24 //
25 // sub r1, 1
26 // cmp r1, 0
27 // bz L1
28 //
29 // If the "sub" instruction all ready sets (or could be modified to set) the
30 // same flag that the "cmp" instruction sets and that "bz" uses, then we can
31 // eliminate the "cmp" instruction.
32 //
33 // Another instance, in this code:
34 //
35 // sub r1, r3 | sub r1, imm
36 // cmp r3, r1 or cmp r1, r3 | cmp r1, imm
37 // bge L1
38 //
39 // If the branch instruction can use flag from "sub", then we can replace
40 // "sub" with "subs" and eliminate the "cmp" instruction.
41 //
42 // - Optimize Loads:
43 //
44 // Loads that can be folded into a later instruction. A load is foldable
45 // if it loads to virtual registers and the virtual register defined has
46 // a single use.
47 //
48 // - Optimize Copies and Bitcast (more generally, target specific copies):
49 //
50 // Rewrite copies and bitcasts to avoid cross register bank copies
51 // when possible.
52 // E.g., Consider the following example, where capital and lower
53 // letters denote different register file:
54 // b = copy A <-- cross-bank copy
55 // C = copy b <-- cross-bank copy
56 // =>
57 // b = copy A <-- cross-bank copy
58 // C = copy A <-- same-bank copy
59 //
60 // E.g., for bitcast:
61 // b = bitcast A <-- cross-bank copy
62 // C = bitcast b <-- cross-bank copy
63 // =>
64 // b = bitcast A <-- cross-bank copy
65 // C = copy A <-- same-bank copy
66 //===----------------------------------------------------------------------===//
67 
68 #include "llvm/ADT/DenseMap.h"
69 #include "llvm/ADT/Optional.h"
70 #include "llvm/ADT/SmallPtrSet.h"
71 #include "llvm/ADT/SmallSet.h"
72 #include "llvm/ADT/SmallVector.h"
73 #include "llvm/ADT/Statistic.h"
87 #include "llvm/InitializePasses.h"
88 #include "llvm/MC/LaneBitmask.h"
89 #include "llvm/MC/MCInstrDesc.h"
90 #include "llvm/Pass.h"
92 #include "llvm/Support/Debug.h"
95 #include <cassert>
96 #include <cstdint>
97 #include <memory>
98 #include <utility>
99 
100 using namespace llvm;
103 
104 #define DEBUG_TYPE "peephole-opt"
105 
106 // Optimize Extensions
107 static cl::opt<bool>
108 Aggressive("aggressive-ext-opt", cl::Hidden,
109  cl::desc("Aggressive extension optimization"));
110 
111 static cl::opt<bool>
112 DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
113  cl::desc("Disable the peephole optimizer"));
114 
115 /// Specifiy whether or not the value tracking looks through
116 /// complex instructions. When this is true, the value tracker
117 /// bails on everything that is not a copy or a bitcast.
118 static cl::opt<bool>
119 DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false),
120  cl::desc("Disable advanced copy optimization"));
121 
123  "disable-non-allocatable-phys-copy-opt", cl::Hidden, cl::init(false),
124  cl::desc("Disable non-allocatable physical register copy optimization"));
125 
126 // Limit the number of PHI instructions to process
127 // in PeepholeOptimizer::getNextSource.
129  "rewrite-phi-limit", cl::Hidden, cl::init(10),
130  cl::desc("Limit the length of PHI chains to lookup"));
131 
132 // Limit the length of recurrence chain when evaluating the benefit of
133 // commuting operands.
135  "recurrence-chain-limit", cl::Hidden, cl::init(3),
136  cl::desc("Maximum length of recurrence chain when evaluating the benefit "
137  "of commuting operands"));
138 
139 
140 STATISTIC(NumReuse, "Number of extension results reused");
141 STATISTIC(NumCmps, "Number of compares eliminated");
142 STATISTIC(NumImmFold, "Number of move immediate folded");
143 STATISTIC(NumLoadFold, "Number of loads folded");
144 STATISTIC(NumSelects, "Number of selects optimized");
145 STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized");
146 STATISTIC(NumRewrittenCopies, "Number of copies rewritten");
147 STATISTIC(NumNAPhysCopies, "Number of non-allocatable physical copies removed");
148 
149 namespace {
150 
151  class ValueTrackerResult;
152  class RecurrenceInstr;
153 
154  class PeepholeOptimizer : public MachineFunctionPass {
155  const TargetInstrInfo *TII;
156  const TargetRegisterInfo *TRI;
158  MachineDominatorTree *DT; // Machine dominator tree
159  MachineLoopInfo *MLI;
160 
161  public:
162  static char ID; // Pass identification
163 
164  PeepholeOptimizer() : MachineFunctionPass(ID) {
166  }
167 
168  bool runOnMachineFunction(MachineFunction &MF) override;
169 
170  void getAnalysisUsage(AnalysisUsage &AU) const override {
171  AU.setPreservesCFG();
175  if (Aggressive) {
178  }
179  }
180 
181  MachineFunctionProperties getRequiredProperties() const override {
184  }
185 
186  /// Track Def -> Use info used for rewriting copies.
188 
189  /// Sequence of instructions that formulate recurrence cycle.
190  using RecurrenceCycle = SmallVector<RecurrenceInstr, 4>;
191 
192  private:
193  bool optimizeCmpInstr(MachineInstr &MI);
194  bool optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB,
199  bool optimizeCoalescableCopy(MachineInstr &MI);
200  bool optimizeUncoalescableCopy(MachineInstr &MI,
202  bool optimizeRecurrence(MachineInstr &PHI);
203  bool findNextSource(RegSubRegPair RegSubReg, RewriteMapTy &RewriteMap);
204  bool isMoveImmediate(MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
206  bool foldImmediate(MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
208 
209  /// Finds recurrence cycles, but only ones that formulated around
210  /// a def operand and a use operand that are tied. If there is a use
211  /// operand commutable with the tied use operand, find recurrence cycle
212  /// along that operand as well.
213  bool findTargetRecurrence(Register Reg,
214  const SmallSet<Register, 2> &TargetReg,
215  RecurrenceCycle &RC);
216 
217  /// If copy instruction \p MI is a virtual register copy, track it in
218  /// the set \p CopyMIs. If this virtual register was previously seen as a
219  /// copy, replace the uses of this copy with the previously seen copy's
220  /// destination register.
221  bool foldRedundantCopy(MachineInstr &MI,
223 
224  /// Is the register \p Reg a non-allocatable physical register?
225  bool isNAPhysCopy(Register Reg);
226 
227  /// If copy instruction \p MI is a non-allocatable virtual<->physical
228  /// register copy, track it in the \p NAPhysToVirtMIs map. If this
229  /// non-allocatable physical register was previously copied to a virtual
230  /// registered and hasn't been clobbered, the virt->phys copy can be
231  /// deleted.
232  bool foldRedundantNAPhysCopy(
234 
235  bool isLoadFoldable(MachineInstr &MI,
236  SmallSet<Register, 16> &FoldAsLoadDefCandidates);
237 
238  /// Check whether \p MI is understood by the register coalescer
239  /// but may require some rewriting.
240  bool isCoalescableCopy(const MachineInstr &MI) {
241  // SubregToRegs are not interesting, because they are already register
242  // coalescer friendly.
243  return MI.isCopy() || (!DisableAdvCopyOpt &&
244  (MI.isRegSequence() || MI.isInsertSubreg() ||
245  MI.isExtractSubreg()));
246  }
247 
248  /// Check whether \p MI is a copy like instruction that is
249  /// not recognized by the register coalescer.
250  bool isUncoalescableCopy(const MachineInstr &MI) {
251  return MI.isBitcast() ||
252  (!DisableAdvCopyOpt &&
253  (MI.isRegSequenceLike() || MI.isInsertSubregLike() ||
254  MI.isExtractSubregLike()));
255  }
256 
257  MachineInstr &rewriteSource(MachineInstr &CopyLike,
258  RegSubRegPair Def, RewriteMapTy &RewriteMap);
259  };
260 
261  /// Helper class to hold instructions that are inside recurrence cycles.
262  /// The recurrence cycle is formulated around 1) a def operand and its
263  /// tied use operand, or 2) a def operand and a use operand that is commutable
264  /// with another use operand which is tied to the def operand. In the latter
265  /// case, index of the tied use operand and the commutable use operand are
266  /// maintained with CommutePair.
267  class RecurrenceInstr {
268  public:
269  using IndexPair = std::pair<unsigned, unsigned>;
270 
271  RecurrenceInstr(MachineInstr *MI) : MI(MI) {}
272  RecurrenceInstr(MachineInstr *MI, unsigned Idx1, unsigned Idx2)
273  : MI(MI), CommutePair(std::make_pair(Idx1, Idx2)) {}
274 
275  MachineInstr *getMI() const { return MI; }
276  Optional<IndexPair> getCommutePair() const { return CommutePair; }
277 
278  private:
279  MachineInstr *MI;
280  Optional<IndexPair> CommutePair;
281  };
282 
283  /// Helper class to hold a reply for ValueTracker queries.
284  /// Contains the returned sources for a given search and the instructions
285  /// where the sources were tracked from.
286  class ValueTrackerResult {
287  private:
288  /// Track all sources found by one ValueTracker query.
290 
291  /// Instruction using the sources in 'RegSrcs'.
292  const MachineInstr *Inst = nullptr;
293 
294  public:
295  ValueTrackerResult() = default;
296 
297  ValueTrackerResult(Register Reg, unsigned SubReg) {
298  addSource(Reg, SubReg);
299  }
300 
301  bool isValid() const { return getNumSources() > 0; }
302 
303  void setInst(const MachineInstr *I) { Inst = I; }
304  const MachineInstr *getInst() const { return Inst; }
305 
306  void clear() {
307  RegSrcs.clear();
308  Inst = nullptr;
309  }
310 
311  void addSource(Register SrcReg, unsigned SrcSubReg) {
312  RegSrcs.push_back(RegSubRegPair(SrcReg, SrcSubReg));
313  }
314 
315  void setSource(int Idx, Register SrcReg, unsigned SrcSubReg) {
316  assert(Idx < getNumSources() && "Reg pair source out of index");
317  RegSrcs[Idx] = RegSubRegPair(SrcReg, SrcSubReg);
318  }
319 
320  int getNumSources() const { return RegSrcs.size(); }
321 
322  RegSubRegPair getSrc(int Idx) const {
323  return RegSrcs[Idx];
324  }
325 
326  Register getSrcReg(int Idx) const {
327  assert(Idx < getNumSources() && "Reg source out of index");
328  return RegSrcs[Idx].Reg;
329  }
330 
331  unsigned getSrcSubReg(int Idx) const {
332  assert(Idx < getNumSources() && "SubReg source out of index");
333  return RegSrcs[Idx].SubReg;
334  }
335 
336  bool operator==(const ValueTrackerResult &Other) const {
337  if (Other.getInst() != getInst())
338  return false;
339 
340  if (Other.getNumSources() != getNumSources())
341  return false;
342 
343  for (int i = 0, e = Other.getNumSources(); i != e; ++i)
344  if (Other.getSrcReg(i) != getSrcReg(i) ||
345  Other.getSrcSubReg(i) != getSrcSubReg(i))
346  return false;
347  return true;
348  }
349  };
350 
351  /// Helper class to track the possible sources of a value defined by
352  /// a (chain of) copy related instructions.
353  /// Given a definition (instruction and definition index), this class
354  /// follows the use-def chain to find successive suitable sources.
355  /// The given source can be used to rewrite the definition into
356  /// def = COPY src.
357  ///
358  /// For instance, let us consider the following snippet:
359  /// v0 =
360  /// v2 = INSERT_SUBREG v1, v0, sub0
361  /// def = COPY v2.sub0
362  ///
363  /// Using a ValueTracker for def = COPY v2.sub0 will give the following
364  /// suitable sources:
365  /// v2.sub0 and v0.
366  /// Then, def can be rewritten into def = COPY v0.
367  class ValueTracker {
368  private:
369  /// The current point into the use-def chain.
370  const MachineInstr *Def = nullptr;
371 
372  /// The index of the definition in Def.
373  unsigned DefIdx = 0;
374 
375  /// The sub register index of the definition.
376  unsigned DefSubReg;
377 
378  /// The register where the value can be found.
379  Register Reg;
380 
381  /// MachineRegisterInfo used to perform tracking.
382  const MachineRegisterInfo &MRI;
383 
384  /// Optional TargetInstrInfo used to perform some complex tracking.
385  const TargetInstrInfo *TII;
386 
387  /// Dispatcher to the right underlying implementation of getNextSource.
388  ValueTrackerResult getNextSourceImpl();
389 
390  /// Specialized version of getNextSource for Copy instructions.
391  ValueTrackerResult getNextSourceFromCopy();
392 
393  /// Specialized version of getNextSource for Bitcast instructions.
394  ValueTrackerResult getNextSourceFromBitcast();
395 
396  /// Specialized version of getNextSource for RegSequence instructions.
397  ValueTrackerResult getNextSourceFromRegSequence();
398 
399  /// Specialized version of getNextSource for InsertSubreg instructions.
400  ValueTrackerResult getNextSourceFromInsertSubreg();
401 
402  /// Specialized version of getNextSource for ExtractSubreg instructions.
403  ValueTrackerResult getNextSourceFromExtractSubreg();
404 
405  /// Specialized version of getNextSource for SubregToReg instructions.
406  ValueTrackerResult getNextSourceFromSubregToReg();
407 
408  /// Specialized version of getNextSource for PHI instructions.
409  ValueTrackerResult getNextSourceFromPHI();
410 
411  public:
412  /// Create a ValueTracker instance for the value defined by \p Reg.
413  /// \p DefSubReg represents the sub register index the value tracker will
414  /// track. It does not need to match the sub register index used in the
415  /// definition of \p Reg.
416  /// If \p Reg is a physical register, a value tracker constructed with
417  /// this constructor will not find any alternative source.
418  /// Indeed, when \p Reg is a physical register that constructor does not
419  /// know which definition of \p Reg it should track.
420  /// Use the next constructor to track a physical register.
421  ValueTracker(Register Reg, unsigned DefSubReg,
422  const MachineRegisterInfo &MRI,
423  const TargetInstrInfo *TII = nullptr)
424  : DefSubReg(DefSubReg), Reg(Reg), MRI(MRI), TII(TII) {
425  if (!Reg.isPhysical()) {
426  Def = MRI.getVRegDef(Reg);
427  DefIdx = MRI.def_begin(Reg).getOperandNo();
428  }
429  }
430 
431  /// Following the use-def chain, get the next available source
432  /// for the tracked value.
433  /// \return A ValueTrackerResult containing a set of registers
434  /// and sub registers with tracked values. A ValueTrackerResult with
435  /// an empty set of registers means no source was found.
436  ValueTrackerResult getNextSource();
437  };
438 
439 } // end anonymous namespace
440 
441 char PeepholeOptimizer::ID = 0;
442 
444 
445 INITIALIZE_PASS_BEGIN(PeepholeOptimizer, DEBUG_TYPE,
446  "Peephole Optimizations", false, false)
449 INITIALIZE_PASS_END(PeepholeOptimizer, DEBUG_TYPE,
450  "Peephole Optimizations", false, false)
451 
452 /// If instruction is a copy-like instruction, i.e. it reads a single register
453 /// and writes a single register and it does not modify the source, and if the
454 /// source value is preserved as a sub-register of the result, then replace all
455 /// reachable uses of the source with the subreg of the result.
456 ///
457 /// Do not generate an EXTRACT that is used only in a debug use, as this changes
458 /// the code. Since this code does not currently share EXTRACTs, just ignore all
459 /// debug uses.
460 bool PeepholeOptimizer::
461 optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB,
462  SmallPtrSetImpl<MachineInstr*> &LocalMIs) {
463  Register SrcReg, DstReg;
464  unsigned SubIdx;
465  if (!TII->isCoalescableExtInstr(MI, SrcReg, DstReg, SubIdx))
466  return false;
467 
468  if (DstReg.isPhysical() || SrcReg.isPhysical())
469  return false;
470 
471  if (MRI->hasOneNonDBGUse(SrcReg))
472  // No other uses.
473  return false;
474 
475  // Ensure DstReg can get a register class that actually supports
476  // sub-registers. Don't change the class until we commit.
477  const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
478  DstRC = TRI->getSubClassWithSubReg(DstRC, SubIdx);
479  if (!DstRC)
480  return false;
481 
482  // The ext instr may be operating on a sub-register of SrcReg as well.
483  // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit
484  // register.
485  // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of
486  // SrcReg:SubIdx should be replaced.
487  bool UseSrcSubIdx =
488  TRI->getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != nullptr;
489 
490  // The source has other uses. See if we can replace the other uses with use of
491  // the result of the extension.
493  for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
494  ReachedBBs.insert(UI.getParent());
495 
496  // Uses that are in the same BB of uses of the result of the instruction.
498 
499  // Uses that the result of the instruction can reach.
500  SmallVector<MachineOperand*, 8> ExtendedUses;
501 
502  bool ExtendLife = true;
503  for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) {
504  MachineInstr *UseMI = UseMO.getParent();
505  if (UseMI == &MI)
506  continue;
507 
508  if (UseMI->isPHI()) {
509  ExtendLife = false;
510  continue;
511  }
512 
513  // Only accept uses of SrcReg:SubIdx.
514  if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx)
515  continue;
516 
517  // It's an error to translate this:
518  //
519  // %reg1025 = <sext> %reg1024
520  // ...
521  // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4
522  //
523  // into this:
524  //
525  // %reg1025 = <sext> %reg1024
526  // ...
527  // %reg1027 = COPY %reg1025:4
528  // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4
529  //
530  // The problem here is that SUBREG_TO_REG is there to assert that an
531  // implicit zext occurs. It doesn't insert a zext instruction. If we allow
532  // the COPY here, it will give us the value after the <sext>, not the
533  // original value of %reg1024 before <sext>.
534  if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
535  continue;
536 
537  MachineBasicBlock *UseMBB = UseMI->getParent();
538  if (UseMBB == &MBB) {
539  // Local uses that come after the extension.
540  if (!LocalMIs.count(UseMI))
541  Uses.push_back(&UseMO);
542  } else if (ReachedBBs.count(UseMBB)) {
543  // Non-local uses where the result of the extension is used. Always
544  // replace these unless it's a PHI.
545  Uses.push_back(&UseMO);
546  } else if (Aggressive && DT->dominates(&MBB, UseMBB)) {
547  // We may want to extend the live range of the extension result in order
548  // to replace these uses.
549  ExtendedUses.push_back(&UseMO);
550  } else {
551  // Both will be live out of the def MBB anyway. Don't extend live range of
552  // the extension result.
553  ExtendLife = false;
554  break;
555  }
556  }
557 
558  if (ExtendLife && !ExtendedUses.empty())
559  // Extend the liveness of the extension result.
560  Uses.append(ExtendedUses.begin(), ExtendedUses.end());
561 
562  // Now replace all uses.
563  bool Changed = false;
564  if (!Uses.empty()) {
566 
567  // Look for PHI uses of the extended result, we don't want to extend the
568  // liveness of a PHI input. It breaks all kinds of assumptions down
569  // stream. A PHI use is expected to be the kill of its source values.
570  for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
571  if (UI.isPHI())
572  PHIBBs.insert(UI.getParent());
573 
574  const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
575  for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
576  MachineOperand *UseMO = Uses[i];
577  MachineInstr *UseMI = UseMO->getParent();
578  MachineBasicBlock *UseMBB = UseMI->getParent();
579  if (PHIBBs.count(UseMBB))
580  continue;
581 
582  // About to add uses of DstReg, clear DstReg's kill flags.
583  if (!Changed) {
584  MRI->clearKillFlags(DstReg);
585  MRI->constrainRegClass(DstReg, DstRC);
586  }
587 
588  // SubReg defs are illegal in machine SSA phase,
589  // we should not generate SubReg defs.
590  //
591  // For example, for the instructions:
592  //
593  // %1:g8rc_and_g8rc_nox0 = EXTSW %0:g8rc
594  // %3:gprc_and_gprc_nor0 = COPY %0.sub_32:g8rc
595  //
596  // We should generate:
597  //
598  // %1:g8rc_and_g8rc_nox0 = EXTSW %0:g8rc
599  // %6:gprc_and_gprc_nor0 = COPY %1.sub_32:g8rc_and_g8rc_nox0
600  // %3:gprc_and_gprc_nor0 = COPY %6:gprc_and_gprc_nor0
601  //
602  if (UseSrcSubIdx)
603  RC = MRI->getRegClass(UseMI->getOperand(0).getReg());
604 
605  Register NewVR = MRI->createVirtualRegister(RC);
606  BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
607  TII->get(TargetOpcode::COPY), NewVR)
608  .addReg(DstReg, 0, SubIdx);
609  if (UseSrcSubIdx)
610  UseMO->setSubReg(0);
611 
612  UseMO->setReg(NewVR);
613  ++NumReuse;
614  Changed = true;
615  }
616  }
617 
618  return Changed;
619 }
620 
621 /// If the instruction is a compare and the previous instruction it's comparing
622 /// against already sets (or could be modified to set) the same flag as the
623 /// compare, then we can remove the comparison and use the flag from the
624 /// previous instruction.
625 bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr &MI) {
626  // If this instruction is a comparison against zero and isn't comparing a
627  // physical register, we can try to optimize it.
628  Register SrcReg, SrcReg2;
629  int64_t CmpMask, CmpValue;
630  if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) ||
631  SrcReg.isPhysical() || SrcReg2.isPhysical())
632  return false;
633 
634  // Attempt to optimize the comparison instruction.
635  LLVM_DEBUG(dbgs() << "Attempting to optimize compare: " << MI);
636  if (TII->optimizeCompareInstr(MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) {
637  LLVM_DEBUG(dbgs() << " -> Successfully optimized compare!\n");
638  ++NumCmps;
639  return true;
640  }
641 
642  return false;
643 }
644 
645 /// Optimize a select instruction.
648  unsigned TrueOp = 0;
649  unsigned FalseOp = 0;
650  bool Optimizable = false;
652  if (TII->analyzeSelect(MI, Cond, TrueOp, FalseOp, Optimizable))
653  return false;
654  if (!Optimizable)
655  return false;
656  if (!TII->optimizeSelect(MI, LocalMIs))
657  return false;
658  LLVM_DEBUG(dbgs() << "Deleting select: " << MI);
659  MI.eraseFromParent();
660  ++NumSelects;
661  return true;
662 }
663 
664 /// Check if a simpler conditional branch can be generated.
666  return TII->optimizeCondBranch(MI);
667 }
668 
669 /// Try to find the next source that share the same register file
670 /// for the value defined by \p Reg and \p SubReg.
671 /// When true is returned, the \p RewriteMap can be used by the client to
672 /// retrieve all Def -> Use along the way up to the next source. Any found
673 /// Use that is not itself a key for another entry, is the next source to
674 /// use. During the search for the next source, multiple sources can be found
675 /// given multiple incoming sources of a PHI instruction. In this case, we
676 /// look in each PHI source for the next source; all found next sources must
677 /// share the same register file as \p Reg and \p SubReg. The client should
678 /// then be capable to rewrite all intermediate PHIs to get the next source.
679 /// \return False if no alternative sources are available. True otherwise.
680 bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg,
681  RewriteMapTy &RewriteMap) {
682  // Do not try to find a new source for a physical register.
683  // So far we do not have any motivating example for doing that.
684  // Thus, instead of maintaining untested code, we will revisit that if
685  // that changes at some point.
686  Register Reg = RegSubReg.Reg;
687  if (Reg.isPhysical())
688  return false;
689  const TargetRegisterClass *DefRC = MRI->getRegClass(Reg);
690 
692  RegSubRegPair CurSrcPair = RegSubReg;
693  SrcToLook.push_back(CurSrcPair);
694 
695  unsigned PHICount = 0;
696  do {
697  CurSrcPair = SrcToLook.pop_back_val();
698  // As explained above, do not handle physical registers
699  if (Register::isPhysicalRegister(CurSrcPair.Reg))
700  return false;
701 
702  ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI, TII);
703 
704  // Follow the chain of copies until we find a more suitable source, a phi
705  // or have to abort.
706  while (true) {
707  ValueTrackerResult Res = ValTracker.getNextSource();
708  // Abort at the end of a chain (without finding a suitable source).
709  if (!Res.isValid())
710  return false;
711 
712  // Insert the Def -> Use entry for the recently found source.
713  ValueTrackerResult CurSrcRes = RewriteMap.lookup(CurSrcPair);
714  if (CurSrcRes.isValid()) {
715  assert(CurSrcRes == Res && "ValueTrackerResult found must match");
716  // An existent entry with multiple sources is a PHI cycle we must avoid.
717  // Otherwise it's an entry with a valid next source we already found.
718  if (CurSrcRes.getNumSources() > 1) {
719  LLVM_DEBUG(dbgs()
720  << "findNextSource: found PHI cycle, aborting...\n");
721  return false;
722  }
723  break;
724  }
725  RewriteMap.insert(std::make_pair(CurSrcPair, Res));
726 
727  // ValueTrackerResult usually have one source unless it's the result from
728  // a PHI instruction. Add the found PHI edges to be looked up further.
729  unsigned NumSrcs = Res.getNumSources();
730  if (NumSrcs > 1) {
731  PHICount++;
732  if (PHICount >= RewritePHILimit) {
733  LLVM_DEBUG(dbgs() << "findNextSource: PHI limit reached\n");
734  return false;
735  }
736 
737  for (unsigned i = 0; i < NumSrcs; ++i)
738  SrcToLook.push_back(Res.getSrc(i));
739  break;
740  }
741 
742  CurSrcPair = Res.getSrc(0);
743  // Do not extend the live-ranges of physical registers as they add
744  // constraints to the register allocator. Moreover, if we want to extend
745  // the live-range of a physical register, unlike SSA virtual register,
746  // we will have to check that they aren't redefine before the related use.
747  if (Register::isPhysicalRegister(CurSrcPair.Reg))
748  return false;
749 
750  // Keep following the chain if the value isn't any better yet.
751  const TargetRegisterClass *SrcRC = MRI->getRegClass(CurSrcPair.Reg);
752  if (!TRI->shouldRewriteCopySrc(DefRC, RegSubReg.SubReg, SrcRC,
753  CurSrcPair.SubReg))
754  continue;
755 
756  // We currently cannot deal with subreg operands on PHI instructions
757  // (see insertPHI()).
758  if (PHICount > 0 && CurSrcPair.SubReg != 0)
759  continue;
760 
761  // We found a suitable source, and are done with this chain.
762  break;
763  }
764  } while (!SrcToLook.empty());
765 
766  // If we did not find a more suitable source, there is nothing to optimize.
767  return CurSrcPair.Reg != Reg;
768 }
769 
770 /// Insert a PHI instruction with incoming edges \p SrcRegs that are
771 /// guaranteed to have the same register class. This is necessary whenever we
772 /// successfully traverse a PHI instruction and find suitable sources coming
773 /// from its edges. By inserting a new PHI, we provide a rewritten PHI def
774 /// suitable to be used in a new COPY instruction.
775 static MachineInstr &
777  const SmallVectorImpl<RegSubRegPair> &SrcRegs,
778  MachineInstr &OrigPHI) {
779  assert(!SrcRegs.empty() && "No sources to create a PHI instruction?");
780 
781  const TargetRegisterClass *NewRC = MRI.getRegClass(SrcRegs[0].Reg);
782  // NewRC is only correct if no subregisters are involved. findNextSource()
783  // should have rejected those cases already.
784  assert(SrcRegs[0].SubReg == 0 && "should not have subreg operand");
785  Register NewVR = MRI.createVirtualRegister(NewRC);
786  MachineBasicBlock *MBB = OrigPHI.getParent();
787  MachineInstrBuilder MIB = BuildMI(*MBB, &OrigPHI, OrigPHI.getDebugLoc(),
788  TII.get(TargetOpcode::PHI), NewVR);
789 
790  unsigned MBBOpIdx = 2;
791  for (const RegSubRegPair &RegPair : SrcRegs) {
792  MIB.addReg(RegPair.Reg, 0, RegPair.SubReg);
793  MIB.addMBB(OrigPHI.getOperand(MBBOpIdx).getMBB());
794  // Since we're extended the lifetime of RegPair.Reg, clear the
795  // kill flags to account for that and make RegPair.Reg reaches
796  // the new PHI.
797  MRI.clearKillFlags(RegPair.Reg);
798  MBBOpIdx += 2;
799  }
800 
801  return *MIB;
802 }
803 
804 namespace {
805 
806 /// Interface to query instructions amenable to copy rewriting.
807 class Rewriter {
808 protected:
809  MachineInstr &CopyLike;
810  unsigned CurrentSrcIdx = 0; ///< The index of the source being rewritten.
811 public:
812  Rewriter(MachineInstr &CopyLike) : CopyLike(CopyLike) {}
813  virtual ~Rewriter() {}
814 
815  /// Get the next rewritable source (SrcReg, SrcSubReg) and
816  /// the related value that it affects (DstReg, DstSubReg).
817  /// A source is considered rewritable if its register class and the
818  /// register class of the related DstReg may not be register
819  /// coalescer friendly. In other words, given a copy-like instruction
820  /// not all the arguments may be returned at rewritable source, since
821  /// some arguments are none to be register coalescer friendly.
822  ///
823  /// Each call of this method moves the current source to the next
824  /// rewritable source.
825  /// For instance, let CopyLike be the instruction to rewrite.
826  /// CopyLike has one definition and one source:
827  /// dst.dstSubIdx = CopyLike src.srcSubIdx.
828  ///
829  /// The first call will give the first rewritable source, i.e.,
830  /// the only source this instruction has:
831  /// (SrcReg, SrcSubReg) = (src, srcSubIdx).
832  /// This source defines the whole definition, i.e.,
833  /// (DstReg, DstSubReg) = (dst, dstSubIdx).
834  ///
835  /// The second and subsequent calls will return false, as there is only one
836  /// rewritable source.
837  ///
838  /// \return True if a rewritable source has been found, false otherwise.
839  /// The output arguments are valid if and only if true is returned.
840  virtual bool getNextRewritableSource(RegSubRegPair &Src,
841  RegSubRegPair &Dst) = 0;
842 
843  /// Rewrite the current source with \p NewReg and \p NewSubReg if possible.
844  /// \return True if the rewriting was possible, false otherwise.
845  virtual bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) = 0;
846 };
847 
848 /// Rewriter for COPY instructions.
849 class CopyRewriter : public Rewriter {
850 public:
851  CopyRewriter(MachineInstr &MI) : Rewriter(MI) {
852  assert(MI.isCopy() && "Expected copy instruction");
853  }
854  virtual ~CopyRewriter() = default;
855 
856  bool getNextRewritableSource(RegSubRegPair &Src,
857  RegSubRegPair &Dst) override {
858  // CurrentSrcIdx > 0 means this function has already been called.
859  if (CurrentSrcIdx > 0)
860  return false;
861  // This is the first call to getNextRewritableSource.
862  // Move the CurrentSrcIdx to remember that we made that call.
863  CurrentSrcIdx = 1;
864  // The rewritable source is the argument.
865  const MachineOperand &MOSrc = CopyLike.getOperand(1);
866  Src = RegSubRegPair(MOSrc.getReg(), MOSrc.getSubReg());
867  // What we track are the alternative sources of the definition.
868  const MachineOperand &MODef = CopyLike.getOperand(0);
869  Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg());
870  return true;
871  }
872 
873  bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
874  if (CurrentSrcIdx != 1)
875  return false;
876  MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx);
877  MOSrc.setReg(NewReg);
878  MOSrc.setSubReg(NewSubReg);
879  return true;
880  }
881 };
882 
883 /// Helper class to rewrite uncoalescable copy like instructions
884 /// into new COPY (coalescable friendly) instructions.
885 class UncoalescableRewriter : public Rewriter {
886  unsigned NumDefs; ///< Number of defs in the bitcast.
887 
888 public:
889  UncoalescableRewriter(MachineInstr &MI) : Rewriter(MI) {
890  NumDefs = MI.getDesc().getNumDefs();
891  }
892 
893  /// \see See Rewriter::getNextRewritableSource()
894  /// All such sources need to be considered rewritable in order to
895  /// rewrite a uncoalescable copy-like instruction. This method return
896  /// each definition that must be checked if rewritable.
897  bool getNextRewritableSource(RegSubRegPair &Src,
898  RegSubRegPair &Dst) override {
899  // Find the next non-dead definition and continue from there.
900  if (CurrentSrcIdx == NumDefs)
901  return false;
902 
903  while (CopyLike.getOperand(CurrentSrcIdx).isDead()) {
904  ++CurrentSrcIdx;
905  if (CurrentSrcIdx == NumDefs)
906  return false;
907  }
908 
909  // What we track are the alternative sources of the definition.
910  Src = RegSubRegPair(0, 0);
911  const MachineOperand &MODef = CopyLike.getOperand(CurrentSrcIdx);
912  Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg());
913 
914  CurrentSrcIdx++;
915  return true;
916  }
917 
918  bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
919  return false;
920  }
921 };
922 
923 /// Specialized rewriter for INSERT_SUBREG instruction.
924 class InsertSubregRewriter : public Rewriter {
925 public:
926  InsertSubregRewriter(MachineInstr &MI) : Rewriter(MI) {
927  assert(MI.isInsertSubreg() && "Invalid instruction");
928  }
929 
930  /// \see See Rewriter::getNextRewritableSource()
931  /// Here CopyLike has the following form:
932  /// dst = INSERT_SUBREG Src1, Src2.src2SubIdx, subIdx.
933  /// Src1 has the same register class has dst, hence, there is
934  /// nothing to rewrite.
935  /// Src2.src2SubIdx, may not be register coalescer friendly.
936  /// Therefore, the first call to this method returns:
937  /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx).
938  /// (DstReg, DstSubReg) = (dst, subIdx).
939  ///
940  /// Subsequence calls will return false.
941  bool getNextRewritableSource(RegSubRegPair &Src,
942  RegSubRegPair &Dst) override {
943  // If we already get the only source we can rewrite, return false.
944  if (CurrentSrcIdx == 2)
945  return false;
946  // We are looking at v2 = INSERT_SUBREG v0, v1, sub0.
947  CurrentSrcIdx = 2;
948  const MachineOperand &MOInsertedReg = CopyLike.getOperand(2);
949  Src = RegSubRegPair(MOInsertedReg.getReg(), MOInsertedReg.getSubReg());
950  const MachineOperand &MODef = CopyLike.getOperand(0);
951 
952  // We want to track something that is compatible with the
953  // partial definition.
954  if (MODef.getSubReg())
955  // Bail if we have to compose sub-register indices.
956  return false;
957  Dst = RegSubRegPair(MODef.getReg(),
958  (unsigned)CopyLike.getOperand(3).getImm());
959  return true;
960  }
961 
962  bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
963  if (CurrentSrcIdx != 2)
964  return false;
965  // We are rewriting the inserted reg.
966  MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
967  MO.setReg(NewReg);
968  MO.setSubReg(NewSubReg);
969  return true;
970  }
971 };
972 
973 /// Specialized rewriter for EXTRACT_SUBREG instruction.
974 class ExtractSubregRewriter : public Rewriter {
975  const TargetInstrInfo &TII;
976 
977 public:
978  ExtractSubregRewriter(MachineInstr &MI, const TargetInstrInfo &TII)
979  : Rewriter(MI), TII(TII) {
980  assert(MI.isExtractSubreg() && "Invalid instruction");
981  }
982 
983  /// \see Rewriter::getNextRewritableSource()
984  /// Here CopyLike has the following form:
985  /// dst.dstSubIdx = EXTRACT_SUBREG Src, subIdx.
986  /// There is only one rewritable source: Src.subIdx,
987  /// which defines dst.dstSubIdx.
988  bool getNextRewritableSource(RegSubRegPair &Src,
989  RegSubRegPair &Dst) override {
990  // If we already get the only source we can rewrite, return false.
991  if (CurrentSrcIdx == 1)
992  return false;
993  // We are looking at v1 = EXTRACT_SUBREG v0, sub0.
994  CurrentSrcIdx = 1;
995  const MachineOperand &MOExtractedReg = CopyLike.getOperand(1);
996  // If we have to compose sub-register indices, bail out.
997  if (MOExtractedReg.getSubReg())
998  return false;
999 
1000  Src = RegSubRegPair(MOExtractedReg.getReg(),
1001  CopyLike.getOperand(2).getImm());
1002 
1003  // We want to track something that is compatible with the definition.
1004  const MachineOperand &MODef = CopyLike.getOperand(0);
1005  Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg());
1006  return true;
1007  }
1008 
1009  bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
1010  // The only source we can rewrite is the input register.
1011  if (CurrentSrcIdx != 1)
1012  return false;
1013 
1014  CopyLike.getOperand(CurrentSrcIdx).setReg(NewReg);
1015 
1016  // If we find a source that does not require to extract something,
1017  // rewrite the operation with a copy.
1018  if (!NewSubReg) {
1019  // Move the current index to an invalid position.
1020  // We do not want another call to this method to be able
1021  // to do any change.
1022  CurrentSrcIdx = -1;
1023  // Rewrite the operation as a COPY.
1024  // Get rid of the sub-register index.
1025  CopyLike.RemoveOperand(2);
1026  // Morph the operation into a COPY.
1027  CopyLike.setDesc(TII.get(TargetOpcode::COPY));
1028  return true;
1029  }
1030  CopyLike.getOperand(CurrentSrcIdx + 1).setImm(NewSubReg);
1031  return true;
1032  }
1033 };
1034 
1035 /// Specialized rewriter for REG_SEQUENCE instruction.
1036 class RegSequenceRewriter : public Rewriter {
1037 public:
1038  RegSequenceRewriter(MachineInstr &MI) : Rewriter(MI) {
1039  assert(MI.isRegSequence() && "Invalid instruction");
1040  }
1041 
1042  /// \see Rewriter::getNextRewritableSource()
1043  /// Here CopyLike has the following form:
1044  /// dst = REG_SEQUENCE Src1.src1SubIdx, subIdx1, Src2.src2SubIdx, subIdx2.
1045  /// Each call will return a different source, walking all the available
1046  /// source.
1047  ///
1048  /// The first call returns:
1049  /// (SrcReg, SrcSubReg) = (Src1, src1SubIdx).
1050  /// (DstReg, DstSubReg) = (dst, subIdx1).
1051  ///
1052  /// The second call returns:
1053  /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx).
1054  /// (DstReg, DstSubReg) = (dst, subIdx2).
1055  ///
1056  /// And so on, until all the sources have been traversed, then
1057  /// it returns false.
1058  bool getNextRewritableSource(RegSubRegPair &Src,
1059  RegSubRegPair &Dst) override {
1060  // We are looking at v0 = REG_SEQUENCE v1, sub1, v2, sub2, etc.
1061 
1062  // If this is the first call, move to the first argument.
1063  if (CurrentSrcIdx == 0) {
1064  CurrentSrcIdx = 1;
1065  } else {
1066  // Otherwise, move to the next argument and check that it is valid.
1067  CurrentSrcIdx += 2;
1068  if (CurrentSrcIdx >= CopyLike.getNumOperands())
1069  return false;
1070  }
1071  const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx);
1072  Src.Reg = MOInsertedReg.getReg();
1073  // If we have to compose sub-register indices, bail out.
1074  if ((Src.SubReg = MOInsertedReg.getSubReg()))
1075  return false;
1076 
1077  // We want to track something that is compatible with the related
1078  // partial definition.
1079  Dst.SubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm();
1080 
1081  const MachineOperand &MODef = CopyLike.getOperand(0);
1082  Dst.Reg = MODef.getReg();
1083  // If we have to compose sub-registers, bail.
1084  return MODef.getSubReg() == 0;
1085  }
1086 
1087  bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
1088  // We cannot rewrite out of bound operands.
1089  // Moreover, rewritable sources are at odd positions.
1090  if ((CurrentSrcIdx & 1) != 1 || CurrentSrcIdx > CopyLike.getNumOperands())
1091  return false;
1092 
1093  MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
1094  MO.setReg(NewReg);
1095  MO.setSubReg(NewSubReg);
1096  return true;
1097  }
1098 };
1099 
1100 } // end anonymous namespace
1101 
1102 /// Get the appropriated Rewriter for \p MI.
1103 /// \return A pointer to a dynamically allocated Rewriter or nullptr if no
1104 /// rewriter works for \p MI.
1106  // Handle uncoalescable copy-like instructions.
1107  if (MI.isBitcast() || MI.isRegSequenceLike() || MI.isInsertSubregLike() ||
1108  MI.isExtractSubregLike())
1109  return new UncoalescableRewriter(MI);
1110 
1111  switch (MI.getOpcode()) {
1112  default:
1113  return nullptr;
1114  case TargetOpcode::COPY:
1115  return new CopyRewriter(MI);
1116  case TargetOpcode::INSERT_SUBREG:
1117  return new InsertSubregRewriter(MI);
1118  case TargetOpcode::EXTRACT_SUBREG:
1119  return new ExtractSubregRewriter(MI, TII);
1120  case TargetOpcode::REG_SEQUENCE:
1121  return new RegSequenceRewriter(MI);
1122  }
1123 }
1124 
1125 /// Given a \p Def.Reg and Def.SubReg pair, use \p RewriteMap to find
1126 /// the new source to use for rewrite. If \p HandleMultipleSources is true and
1127 /// multiple sources for a given \p Def are found along the way, we found a
1128 /// PHI instructions that needs to be rewritten.
1129 /// TODO: HandleMultipleSources should be removed once we test PHI handling
1130 /// with coalescable copies.
1131 static RegSubRegPair
1134  const PeepholeOptimizer::RewriteMapTy &RewriteMap,
1135  bool HandleMultipleSources = true) {
1136  RegSubRegPair LookupSrc(Def.Reg, Def.SubReg);
1137  while (true) {
1138  ValueTrackerResult Res = RewriteMap.lookup(LookupSrc);
1139  // If there are no entries on the map, LookupSrc is the new source.
1140  if (!Res.isValid())
1141  return LookupSrc;
1142 
1143  // There's only one source for this definition, keep searching...
1144  unsigned NumSrcs = Res.getNumSources();
1145  if (NumSrcs == 1) {
1146  LookupSrc.Reg = Res.getSrcReg(0);
1147  LookupSrc.SubReg = Res.getSrcSubReg(0);
1148  continue;
1149  }
1150 
1151  // TODO: Remove once multiple srcs w/ coalescable copies are supported.
1152  if (!HandleMultipleSources)
1153  break;
1154 
1155  // Multiple sources, recurse into each source to find a new source
1156  // for it. Then, rewrite the PHI accordingly to its new edges.
1157  SmallVector<RegSubRegPair, 4> NewPHISrcs;
1158  for (unsigned i = 0; i < NumSrcs; ++i) {
1159  RegSubRegPair PHISrc(Res.getSrcReg(i), Res.getSrcSubReg(i));
1160  NewPHISrcs.push_back(
1161  getNewSource(MRI, TII, PHISrc, RewriteMap, HandleMultipleSources));
1162  }
1163 
1164  // Build the new PHI node and return its def register as the new source.
1165  MachineInstr &OrigPHI = const_cast<MachineInstr &>(*Res.getInst());
1166  MachineInstr &NewPHI = insertPHI(*MRI, *TII, NewPHISrcs, OrigPHI);
1167  LLVM_DEBUG(dbgs() << "-- getNewSource\n");
1168  LLVM_DEBUG(dbgs() << " Replacing: " << OrigPHI);
1169  LLVM_DEBUG(dbgs() << " With: " << NewPHI);
1170  const MachineOperand &MODef = NewPHI.getOperand(0);
1171  return RegSubRegPair(MODef.getReg(), MODef.getSubReg());
1172  }
1173 
1174  return RegSubRegPair(0, 0);
1175 }
1176 
1177 /// Optimize generic copy instructions to avoid cross register bank copy.
1178 /// The optimization looks through a chain of copies and tries to find a source
1179 /// that has a compatible register class.
1180 /// Two register classes are considered to be compatible if they share the same
1181 /// register bank.
1182 /// New copies issued by this optimization are register allocator
1183 /// friendly. This optimization does not remove any copy as it may
1184 /// overconstrain the register allocator, but replaces some operands
1185 /// when possible.
1186 /// \pre isCoalescableCopy(*MI) is true.
1187 /// \return True, when \p MI has been rewritten. False otherwise.
1188 bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &MI) {
1189  assert(isCoalescableCopy(MI) && "Invalid argument");
1190  assert(MI.getDesc().getNumDefs() == 1 &&
1191  "Coalescer can understand multiple defs?!");
1192  const MachineOperand &MODef = MI.getOperand(0);
1193  // Do not rewrite physical definitions.
1194  if (Register::isPhysicalRegister(MODef.getReg()))
1195  return false;
1196 
1197  bool Changed = false;
1198  // Get the right rewriter for the current copy.
1199  std::unique_ptr<Rewriter> CpyRewriter(getCopyRewriter(MI, *TII));
1200  // If none exists, bail out.
1201  if (!CpyRewriter)
1202  return false;
1203  // Rewrite each rewritable source.
1204  RegSubRegPair Src;
1205  RegSubRegPair TrackPair;
1206  while (CpyRewriter->getNextRewritableSource(Src, TrackPair)) {
1207  // Keep track of PHI nodes and its incoming edges when looking for sources.
1208  RewriteMapTy RewriteMap;
1209  // Try to find a more suitable source. If we failed to do so, or get the
1210  // actual source, move to the next source.
1211  if (!findNextSource(TrackPair, RewriteMap))
1212  continue;
1213 
1214  // Get the new source to rewrite. TODO: Only enable handling of multiple
1215  // sources (PHIs) once we have a motivating example and testcases for it.
1216  RegSubRegPair NewSrc = getNewSource(MRI, TII, TrackPair, RewriteMap,
1217  /*HandleMultipleSources=*/false);
1218  if (Src.Reg == NewSrc.Reg || NewSrc.Reg == 0)
1219  continue;
1220 
1221  // Rewrite source.
1222  if (CpyRewriter->RewriteCurrentSource(NewSrc.Reg, NewSrc.SubReg)) {
1223  // We may have extended the live-range of NewSrc, account for that.
1224  MRI->clearKillFlags(NewSrc.Reg);
1225  Changed = true;
1226  }
1227  }
1228  // TODO: We could have a clean-up method to tidy the instruction.
1229  // E.g., v0 = INSERT_SUBREG v1, v1.sub0, sub0
1230  // => v0 = COPY v1
1231  // Currently we haven't seen motivating example for that and we
1232  // want to avoid untested code.
1233  NumRewrittenCopies += Changed;
1234  return Changed;
1235 }
1236 
1237 /// Rewrite the source found through \p Def, by using the \p RewriteMap
1238 /// and create a new COPY instruction. More info about RewriteMap in
1239 /// PeepholeOptimizer::findNextSource. Right now this is only used to handle
1240 /// Uncoalescable copies, since they are copy like instructions that aren't
1241 /// recognized by the register allocator.
1242 MachineInstr &
1243 PeepholeOptimizer::rewriteSource(MachineInstr &CopyLike,
1244  RegSubRegPair Def, RewriteMapTy &RewriteMap) {
1246  "We do not rewrite physical registers");
1247 
1248  // Find the new source to use in the COPY rewrite.
1249  RegSubRegPair NewSrc = getNewSource(MRI, TII, Def, RewriteMap);
1250 
1251  // Insert the COPY.
1252  const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg);
1253  Register NewVReg = MRI->createVirtualRegister(DefRC);
1254 
1255  MachineInstr *NewCopy =
1256  BuildMI(*CopyLike.getParent(), &CopyLike, CopyLike.getDebugLoc(),
1257  TII->get(TargetOpcode::COPY), NewVReg)
1258  .addReg(NewSrc.Reg, 0, NewSrc.SubReg);
1259 
1260  if (Def.SubReg) {
1261  NewCopy->getOperand(0).setSubReg(Def.SubReg);
1262  NewCopy->getOperand(0).setIsUndef();
1263  }
1264 
1265  LLVM_DEBUG(dbgs() << "-- RewriteSource\n");
1266  LLVM_DEBUG(dbgs() << " Replacing: " << CopyLike);
1267  LLVM_DEBUG(dbgs() << " With: " << *NewCopy);
1268  MRI->replaceRegWith(Def.Reg, NewVReg);
1269  MRI->clearKillFlags(NewVReg);
1270 
1271  // We extended the lifetime of NewSrc.Reg, clear the kill flags to
1272  // account for that.
1273  MRI->clearKillFlags(NewSrc.Reg);
1274 
1275  return *NewCopy;
1276 }
1277 
1278 /// Optimize copy-like instructions to create
1279 /// register coalescer friendly instruction.
1280 /// The optimization tries to kill-off the \p MI by looking
1281 /// through a chain of copies to find a source that has a compatible
1282 /// register class.
1283 /// If such a source is found, it replace \p MI by a generic COPY
1284 /// operation.
1285 /// \pre isUncoalescableCopy(*MI) is true.
1286 /// \return True, when \p MI has been optimized. In that case, \p MI has
1287 /// been removed from its parent.
1288 /// All COPY instructions created, are inserted in \p LocalMIs.
1289 bool PeepholeOptimizer::optimizeUncoalescableCopy(
1291  assert(isUncoalescableCopy(MI) && "Invalid argument");
1292  UncoalescableRewriter CpyRewriter(MI);
1293 
1294  // Rewrite each rewritable source by generating new COPYs. This works
1295  // differently from optimizeCoalescableCopy since it first makes sure that all
1296  // definitions can be rewritten.
1297  RewriteMapTy RewriteMap;
1298  RegSubRegPair Src;
1300  SmallVector<RegSubRegPair, 4> RewritePairs;
1301  while (CpyRewriter.getNextRewritableSource(Src, Def)) {
1302  // If a physical register is here, this is probably for a good reason.
1303  // Do not rewrite that.
1305  return false;
1306 
1307  // If we do not know how to rewrite this definition, there is no point
1308  // in trying to kill this instruction.
1309  if (!findNextSource(Def, RewriteMap))
1310  return false;
1311 
1312  RewritePairs.push_back(Def);
1313  }
1314 
1315  // The change is possible for all defs, do it.
1316  for (const RegSubRegPair &Def : RewritePairs) {
1317  // Rewrite the "copy" in a way the register coalescer understands.
1318  MachineInstr &NewCopy = rewriteSource(MI, Def, RewriteMap);
1319  LocalMIs.insert(&NewCopy);
1320  }
1321 
1322  // MI is now dead.
1323  LLVM_DEBUG(dbgs() << "Deleting uncoalescable copy: " << MI);
1324  MI.eraseFromParent();
1325  ++NumUncoalescableCopies;
1326  return true;
1327 }
1328 
1329 /// Check whether MI is a candidate for folding into a later instruction.
1330 /// We only fold loads to virtual registers and the virtual register defined
1331 /// has a single user.
1332 bool PeepholeOptimizer::isLoadFoldable(
1333  MachineInstr &MI, SmallSet<Register, 16> &FoldAsLoadDefCandidates) {
1334  if (!MI.canFoldAsLoad() || !MI.mayLoad())
1335  return false;
1336  const MCInstrDesc &MCID = MI.getDesc();
1337  if (MCID.getNumDefs() != 1)
1338  return false;
1339 
1340  Register Reg = MI.getOperand(0).getReg();
1341  // To reduce compilation time, we check MRI->hasOneNonDBGUser when inserting
1342  // loads. It should be checked when processing uses of the load, since
1343  // uses can be removed during peephole.
1344  if (Reg.isVirtual() && !MI.getOperand(0).getSubReg() &&
1345  MRI->hasOneNonDBGUser(Reg)) {
1346  FoldAsLoadDefCandidates.insert(Reg);
1347  return true;
1348  }
1349  return false;
1350 }
1351 
1352 bool PeepholeOptimizer::isMoveImmediate(
1353  MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
1355  const MCInstrDesc &MCID = MI.getDesc();
1356  if (!MI.isMoveImmediate())
1357  return false;
1358  if (MCID.getNumDefs() != 1)
1359  return false;
1360  Register Reg = MI.getOperand(0).getReg();
1361  if (Reg.isVirtual()) {
1362  ImmDefMIs.insert(std::make_pair(Reg, &MI));
1363  ImmDefRegs.insert(Reg);
1364  return true;
1365  }
1366 
1367  return false;
1368 }
1369 
1370 /// Try folding register operands that are defined by move immediate
1371 /// instructions, i.e. a trivial constant folding optimization, if
1372 /// and only if the def and use are in the same BB.
1373 bool PeepholeOptimizer::foldImmediate(
1374  MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
1376  for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
1377  MachineOperand &MO = MI.getOperand(i);
1378  if (!MO.isReg() || MO.isDef())
1379  continue;
1380  Register Reg = MO.getReg();
1381  if (!Reg.isVirtual())
1382  continue;
1383  if (ImmDefRegs.count(Reg) == 0)
1384  continue;
1386  assert(II != ImmDefMIs.end() && "couldn't find immediate definition");
1387  if (TII->FoldImmediate(MI, *II->second, Reg, MRI)) {
1388  ++NumImmFold;
1389  return true;
1390  }
1391  }
1392  return false;
1393 }
1394 
1395 // FIXME: This is very simple and misses some cases which should be handled when
1396 // motivating examples are found.
1397 //
1398 // The copy rewriting logic should look at uses as well as defs and be able to
1399 // eliminate copies across blocks.
1400 //
1401 // Later copies that are subregister extracts will also not be eliminated since
1402 // only the first copy is considered.
1403 //
1404 // e.g.
1405 // %1 = COPY %0
1406 // %2 = COPY %0:sub1
1407 //
1408 // Should replace %2 uses with %1:sub1
1409 bool PeepholeOptimizer::foldRedundantCopy(
1411  assert(MI.isCopy() && "expected a COPY machine instruction");
1412 
1413  Register SrcReg = MI.getOperand(1).getReg();
1414  unsigned SrcSubReg = MI.getOperand(1).getSubReg();
1415  if (!SrcReg.isVirtual())
1416  return false;
1417 
1418  Register DstReg = MI.getOperand(0).getReg();
1419  if (!DstReg.isVirtual())
1420  return false;
1421 
1422  RegSubRegPair SrcPair(SrcReg, SrcSubReg);
1423 
1424  if (CopyMIs.insert(std::make_pair(SrcPair, &MI)).second) {
1425  // First copy of this reg seen.
1426  return false;
1427  }
1428 
1429  MachineInstr *PrevCopy = CopyMIs.find(SrcPair)->second;
1430 
1431  assert(SrcSubReg == PrevCopy->getOperand(1).getSubReg() &&
1432  "Unexpected mismatching subreg!");
1433 
1434  Register PrevDstReg = PrevCopy->getOperand(0).getReg();
1435 
1436  // Only replace if the copy register class is the same.
1437  //
1438  // TODO: If we have multiple copies to different register classes, we may want
1439  // to track multiple copies of the same source register.
1440  if (MRI->getRegClass(DstReg) != MRI->getRegClass(PrevDstReg))
1441  return false;
1442 
1443  MRI->replaceRegWith(DstReg, PrevDstReg);
1444 
1445  // Lifetime of the previous copy has been extended.
1446  MRI->clearKillFlags(PrevDstReg);
1447  return true;
1448 }
1449 
1450 bool PeepholeOptimizer::isNAPhysCopy(Register Reg) {
1451  return Reg.isPhysical() && !MRI->isAllocatable(Reg);
1452 }
1453 
1454 bool PeepholeOptimizer::foldRedundantNAPhysCopy(
1455  MachineInstr &MI, DenseMap<Register, MachineInstr *> &NAPhysToVirtMIs) {
1456  assert(MI.isCopy() && "expected a COPY machine instruction");
1457 
1459  return false;
1460 
1461  Register DstReg = MI.getOperand(0).getReg();
1462  Register SrcReg = MI.getOperand(1).getReg();
1463  if (isNAPhysCopy(SrcReg) && Register::isVirtualRegister(DstReg)) {
1464  // %vreg = COPY $physreg
1465  // Avoid using a datastructure which can track multiple live non-allocatable
1466  // phys->virt copies since LLVM doesn't seem to do this.
1467  NAPhysToVirtMIs.insert({SrcReg, &MI});
1468  return false;
1469  }
1470 
1471  if (!(SrcReg.isVirtual() && isNAPhysCopy(DstReg)))
1472  return false;
1473 
1474  // $physreg = COPY %vreg
1475  auto PrevCopy = NAPhysToVirtMIs.find(DstReg);
1476  if (PrevCopy == NAPhysToVirtMIs.end()) {
1477  // We can't remove the copy: there was an intervening clobber of the
1478  // non-allocatable physical register after the copy to virtual.
1479  LLVM_DEBUG(dbgs() << "NAPhysCopy: intervening clobber forbids erasing "
1480  << MI);
1481  return false;
1482  }
1483 
1484  Register PrevDstReg = PrevCopy->second->getOperand(0).getReg();
1485  if (PrevDstReg == SrcReg) {
1486  // Remove the virt->phys copy: we saw the virtual register definition, and
1487  // the non-allocatable physical register's state hasn't changed since then.
1488  LLVM_DEBUG(dbgs() << "NAPhysCopy: erasing " << MI);
1489  ++NumNAPhysCopies;
1490  return true;
1491  }
1492 
1493  // Potential missed optimization opportunity: we saw a different virtual
1494  // register get a copy of the non-allocatable physical register, and we only
1495  // track one such copy. Avoid getting confused by this new non-allocatable
1496  // physical register definition, and remove it from the tracked copies.
1497  LLVM_DEBUG(dbgs() << "NAPhysCopy: missed opportunity " << MI);
1498  NAPhysToVirtMIs.erase(PrevCopy);
1499  return false;
1500 }
1501 
1502 /// \bried Returns true if \p MO is a virtual register operand.
1504  return MO.isReg() && MO.getReg().isVirtual();
1505 }
1506 
1507 bool PeepholeOptimizer::findTargetRecurrence(
1508  Register Reg, const SmallSet<Register, 2> &TargetRegs,
1509  RecurrenceCycle &RC) {
1510  // Recurrence found if Reg is in TargetRegs.
1511  if (TargetRegs.count(Reg))
1512  return true;
1513 
1514  // TODO: Curerntly, we only allow the last instruction of the recurrence
1515  // cycle (the instruction that feeds the PHI instruction) to have more than
1516  // one uses to guarantee that commuting operands does not tie registers
1517  // with overlapping live range. Once we have actual live range info of
1518  // each register, this constraint can be relaxed.
1519  if (!MRI->hasOneNonDBGUse(Reg))
1520  return false;
1521 
1522  // Give up if the reccurrence chain length is longer than the limit.
1523  if (RC.size() >= MaxRecurrenceChain)
1524  return false;
1525 
1527  unsigned Idx = MI.findRegisterUseOperandIdx(Reg);
1528 
1529  // Only interested in recurrences whose instructions have only one def, which
1530  // is a virtual register.
1531  if (MI.getDesc().getNumDefs() != 1)
1532  return false;
1533 
1534  MachineOperand &DefOp = MI.getOperand(0);
1535  if (!isVirtualRegisterOperand(DefOp))
1536  return false;
1537 
1538  // Check if def operand of MI is tied to any use operand. We are only
1539  // interested in the case that all the instructions in the recurrence chain
1540  // have there def operand tied with one of the use operand.
1541  unsigned TiedUseIdx;
1542  if (!MI.isRegTiedToUseOperand(0, &TiedUseIdx))
1543  return false;
1544 
1545  if (Idx == TiedUseIdx) {
1546  RC.push_back(RecurrenceInstr(&MI));
1547  return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC);
1548  } else {
1549  // If Idx is not TiedUseIdx, check if Idx is commutable with TiedUseIdx.
1550  unsigned CommIdx = TargetInstrInfo::CommuteAnyOperandIndex;
1551  if (TII->findCommutedOpIndices(MI, Idx, CommIdx) && CommIdx == TiedUseIdx) {
1552  RC.push_back(RecurrenceInstr(&MI, Idx, CommIdx));
1553  return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC);
1554  }
1555  }
1556 
1557  return false;
1558 }
1559 
1560 /// Phi instructions will eventually be lowered to copy instructions.
1561 /// If phi is in a loop header, a recurrence may formulated around the source
1562 /// and destination of the phi. For such case commuting operands of the
1563 /// instructions in the recurrence may enable coalescing of the copy instruction
1564 /// generated from the phi. For example, if there is a recurrence of
1565 ///
1566 /// LoopHeader:
1567 /// %1 = phi(%0, %100)
1568 /// LoopLatch:
1569 /// %0<def, tied1> = ADD %2<def, tied0>, %1
1570 ///
1571 /// , the fact that %0 and %2 are in the same tied operands set makes
1572 /// the coalescing of copy instruction generated from the phi in
1573 /// LoopHeader(i.e. %1 = COPY %0) impossible, because %1 and
1574 /// %2 have overlapping live range. This introduces additional move
1575 /// instruction to the final assembly. However, if we commute %2 and
1576 /// %1 of ADD instruction, the redundant move instruction can be
1577 /// avoided.
1578 bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &PHI) {
1579  SmallSet<Register, 2> TargetRegs;
1580  for (unsigned Idx = 1; Idx < PHI.getNumOperands(); Idx += 2) {
1581  MachineOperand &MO = PHI.getOperand(Idx);
1582  assert(isVirtualRegisterOperand(MO) && "Invalid PHI instruction");
1583  TargetRegs.insert(MO.getReg());
1584  }
1585 
1586  bool Changed = false;
1587  RecurrenceCycle RC;
1588  if (findTargetRecurrence(PHI.getOperand(0).getReg(), TargetRegs, RC)) {
1589  // Commutes operands of instructions in RC if necessary so that the copy to
1590  // be generated from PHI can be coalesced.
1591  LLVM_DEBUG(dbgs() << "Optimize recurrence chain from " << PHI);
1592  for (auto &RI : RC) {
1593  LLVM_DEBUG(dbgs() << "\tInst: " << *(RI.getMI()));
1594  auto CP = RI.getCommutePair();
1595  if (CP) {
1596  Changed = true;
1597  TII->commuteInstruction(*(RI.getMI()), false, (*CP).first,
1598  (*CP).second);
1599  LLVM_DEBUG(dbgs() << "\t\tCommuted: " << *(RI.getMI()));
1600  }
1601  }
1602  }
1603 
1604  return Changed;
1605 }
1606 
1607 bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
1608  if (skipFunction(MF.getFunction()))
1609  return false;
1610 
1611  LLVM_DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n");
1612  LLVM_DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n');
1613 
1614  if (DisablePeephole)
1615  return false;
1616 
1617  TII = MF.getSubtarget().getInstrInfo();
1618  TRI = MF.getSubtarget().getRegisterInfo();
1619  MRI = &MF.getRegInfo();
1620  DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : nullptr;
1621  MLI = &getAnalysis<MachineLoopInfo>();
1622 
1623  bool Changed = false;
1624 
1625  for (MachineBasicBlock &MBB : MF) {
1626  bool SeenMoveImm = false;
1627 
1628  // During this forward scan, at some point it needs to answer the question
1629  // "given a pointer to an MI in the current BB, is it located before or
1630  // after the current instruction".
1631  // To perform this, the following set keeps track of the MIs already seen
1632  // during the scan, if a MI is not in the set, it is assumed to be located
1633  // after. Newly created MIs have to be inserted in the set as well.
1635  SmallSet<Register, 4> ImmDefRegs;
1637  SmallSet<Register, 16> FoldAsLoadDefCandidates;
1638 
1639  // Track when a non-allocatable physical register is copied to a virtual
1640  // register so that useless moves can be removed.
1641  //
1642  // $physreg is the map index; MI is the last valid `%vreg = COPY $physreg`
1643  // without any intervening re-definition of $physreg.
1644  DenseMap<Register, MachineInstr *> NAPhysToVirtMIs;
1645 
1646  // Set of pairs of virtual registers and their subregs that are copied
1647  // from.
1649 
1650  bool IsLoopHeader = MLI->isLoopHeader(&MBB);
1651 
1652  for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end();
1653  MII != MIE; ) {
1654  MachineInstr *MI = &*MII;
1655  // We may be erasing MI below, increment MII now.
1656  ++MII;
1657  LocalMIs.insert(MI);
1658 
1659  // Skip debug instructions. They should not affect this peephole
1660  // optimization.
1661  if (MI->isDebugInstr())
1662  continue;
1663 
1664  if (MI->isPosition())
1665  continue;
1666 
1667  if (IsLoopHeader && MI->isPHI()) {
1668  if (optimizeRecurrence(*MI)) {
1669  Changed = true;
1670  continue;
1671  }
1672  }
1673 
1674  if (!MI->isCopy()) {
1675  for (const MachineOperand &MO : MI->operands()) {
1676  // Visit all operands: definitions can be implicit or explicit.
1677  if (MO.isReg()) {
1678  Register Reg = MO.getReg();
1679  if (MO.isDef() && isNAPhysCopy(Reg)) {
1680  const auto &Def = NAPhysToVirtMIs.find(Reg);
1681  if (Def != NAPhysToVirtMIs.end()) {
1682  // A new definition of the non-allocatable physical register
1683  // invalidates previous copies.
1684  LLVM_DEBUG(dbgs()
1685  << "NAPhysCopy: invalidating because of " << *MI);
1686  NAPhysToVirtMIs.erase(Def);
1687  }
1688  }
1689  } else if (MO.isRegMask()) {
1690  const uint32_t *RegMask = MO.getRegMask();
1691  for (auto &RegMI : NAPhysToVirtMIs) {
1692  Register Def = RegMI.first;
1693  if (MachineOperand::clobbersPhysReg(RegMask, Def)) {
1694  LLVM_DEBUG(dbgs()
1695  << "NAPhysCopy: invalidating because of " << *MI);
1696  NAPhysToVirtMIs.erase(Def);
1697  }
1698  }
1699  }
1700  }
1701  }
1702 
1703  if (MI->isImplicitDef() || MI->isKill())
1704  continue;
1705 
1706  if (MI->isInlineAsm() || MI->hasUnmodeledSideEffects()) {
1707  // Blow away all non-allocatable physical registers knowledge since we
1708  // don't know what's correct anymore.
1709  //
1710  // FIXME: handle explicit asm clobbers.
1711  LLVM_DEBUG(dbgs() << "NAPhysCopy: blowing away all info due to "
1712  << *MI);
1713  NAPhysToVirtMIs.clear();
1714  }
1715 
1716  if ((isUncoalescableCopy(*MI) &&
1717  optimizeUncoalescableCopy(*MI, LocalMIs)) ||
1718  (MI->isCompare() && optimizeCmpInstr(*MI)) ||
1719  (MI->isSelect() && optimizeSelect(*MI, LocalMIs))) {
1720  // MI is deleted.
1721  LocalMIs.erase(MI);
1722  Changed = true;
1723  continue;
1724  }
1725 
1726  if (MI->isConditionalBranch() && optimizeCondBranch(*MI)) {
1727  Changed = true;
1728  continue;
1729  }
1730 
1731  if (isCoalescableCopy(*MI) && optimizeCoalescableCopy(*MI)) {
1732  // MI is just rewritten.
1733  Changed = true;
1734  continue;
1735  }
1736 
1737  if (MI->isCopy() && (foldRedundantCopy(*MI, CopySrcMIs) ||
1738  foldRedundantNAPhysCopy(*MI, NAPhysToVirtMIs))) {
1739  LocalMIs.erase(MI);
1740  LLVM_DEBUG(dbgs() << "Deleting redundant copy: " << *MI << "\n");
1741  MI->eraseFromParent();
1742  Changed = true;
1743  continue;
1744  }
1745 
1746  if (isMoveImmediate(*MI, ImmDefRegs, ImmDefMIs)) {
1747  SeenMoveImm = true;
1748  } else {
1749  Changed |= optimizeExtInstr(*MI, MBB, LocalMIs);
1750  // optimizeExtInstr might have created new instructions after MI
1751  // and before the already incremented MII. Adjust MII so that the
1752  // next iteration sees the new instructions.
1753  MII = MI;
1754  ++MII;
1755  if (SeenMoveImm)
1756  Changed |= foldImmediate(*MI, ImmDefRegs, ImmDefMIs);
1757  }
1758 
1759  // Check whether MI is a load candidate for folding into a later
1760  // instruction. If MI is not a candidate, check whether we can fold an
1761  // earlier load into MI.
1762  if (!isLoadFoldable(*MI, FoldAsLoadDefCandidates) &&
1763  !FoldAsLoadDefCandidates.empty()) {
1764 
1765  // We visit each operand even after successfully folding a previous
1766  // one. This allows us to fold multiple loads into a single
1767  // instruction. We do assume that optimizeLoadInstr doesn't insert
1768  // foldable uses earlier in the argument list. Since we don't restart
1769  // iteration, we'd miss such cases.
1770  const MCInstrDesc &MIDesc = MI->getDesc();
1771  for (unsigned i = MIDesc.getNumDefs(); i != MI->getNumOperands();
1772  ++i) {
1773  const MachineOperand &MOp = MI->getOperand(i);
1774  if (!MOp.isReg())
1775  continue;
1776  Register FoldAsLoadDefReg = MOp.getReg();
1777  if (FoldAsLoadDefCandidates.count(FoldAsLoadDefReg)) {
1778  // We need to fold load after optimizeCmpInstr, since
1779  // optimizeCmpInstr can enable folding by converting SUB to CMP.
1780  // Save FoldAsLoadDefReg because optimizeLoadInstr() resets it and
1781  // we need it for markUsesInDebugValueAsUndef().
1782  Register FoldedReg = FoldAsLoadDefReg;
1783  MachineInstr *DefMI = nullptr;
1784  if (MachineInstr *FoldMI =
1785  TII->optimizeLoadInstr(*MI, MRI, FoldAsLoadDefReg, DefMI)) {
1786  // Update LocalMIs since we replaced MI with FoldMI and deleted
1787  // DefMI.
1788  LLVM_DEBUG(dbgs() << "Replacing: " << *MI);
1789  LLVM_DEBUG(dbgs() << " With: " << *FoldMI);
1790  LocalMIs.erase(MI);
1791  LocalMIs.erase(DefMI);
1792  LocalMIs.insert(FoldMI);
1793  // Update the call site info.
1794  if (MI->shouldUpdateCallSiteInfo())
1795  MI->getMF()->moveCallSiteInfo(MI, FoldMI);
1796  MI->eraseFromParent();
1798  MRI->markUsesInDebugValueAsUndef(FoldedReg);
1799  FoldAsLoadDefCandidates.erase(FoldedReg);
1800  ++NumLoadFold;
1801 
1802  // MI is replaced with FoldMI so we can continue trying to fold
1803  Changed = true;
1804  MI = FoldMI;
1805  }
1806  }
1807  }
1808  }
1809 
1810  // If we run into an instruction we can't fold across, discard
1811  // the load candidates. Note: We might be able to fold *into* this
1812  // instruction, so this needs to be after the folding logic.
1813  if (MI->isLoadFoldBarrier()) {
1814  LLVM_DEBUG(dbgs() << "Encountered load fold barrier on " << *MI);
1815  FoldAsLoadDefCandidates.clear();
1816  }
1817  }
1818  }
1819 
1820  return Changed;
1821 }
1822 
1823 ValueTrackerResult ValueTracker::getNextSourceFromCopy() {
1824  assert(Def->isCopy() && "Invalid definition");
1825  // Copy instruction are supposed to be: Def = Src.
1826  // If someone breaks this assumption, bad things will happen everywhere.
1827  // There may be implicit uses preventing the copy to be moved across
1828  // some target specific register definitions
1829  assert(Def->getNumOperands() - Def->getNumImplicitOperands() == 2 &&
1830  "Invalid number of operands");
1831  assert(!Def->hasImplicitDef() && "Only implicit uses are allowed");
1832 
1833  if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
1834  // If we look for a different subreg, it means we want a subreg of src.
1835  // Bails as we do not support composing subregs yet.
1836  return ValueTrackerResult();
1837  // Otherwise, we want the whole source.
1838  const MachineOperand &Src = Def->getOperand(1);
1839  if (Src.isUndef())
1840  return ValueTrackerResult();
1841  return ValueTrackerResult(Src.getReg(), Src.getSubReg());
1842 }
1843 
1844 ValueTrackerResult ValueTracker::getNextSourceFromBitcast() {
1845  assert(Def->isBitcast() && "Invalid definition");
1846 
1847  // Bail if there are effects that a plain copy will not expose.
1848  if (Def->mayRaiseFPException() || Def->hasUnmodeledSideEffects())
1849  return ValueTrackerResult();
1850 
1851  // Bitcasts with more than one def are not supported.
1852  if (Def->getDesc().getNumDefs() != 1)
1853  return ValueTrackerResult();
1854  const MachineOperand DefOp = Def->getOperand(DefIdx);
1855  if (DefOp.getSubReg() != DefSubReg)
1856  // If we look for a different subreg, it means we want a subreg of the src.
1857  // Bails as we do not support composing subregs yet.
1858  return ValueTrackerResult();
1859 
1860  unsigned SrcIdx = Def->getNumOperands();
1861  for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx;
1862  ++OpIdx) {
1863  const MachineOperand &MO = Def->getOperand(OpIdx);
1864  if (!MO.isReg() || !MO.getReg())
1865  continue;
1866  // Ignore dead implicit defs.
1867  if (MO.isImplicit() && MO.isDead())
1868  continue;
1869  assert(!MO.isDef() && "We should have skipped all the definitions by now");
1870  if (SrcIdx != EndOpIdx)
1871  // Multiple sources?
1872  return ValueTrackerResult();
1873  SrcIdx = OpIdx;
1874  }
1875 
1876  // In some rare case, Def has no input, SrcIdx is out of bound,
1877  // getOperand(SrcIdx) will fail below.
1878  if (SrcIdx >= Def->getNumOperands())
1879  return ValueTrackerResult();
1880 
1881  // Stop when any user of the bitcast is a SUBREG_TO_REG, replacing with a COPY
1882  // will break the assumed guarantees for the upper bits.
1883  for (const MachineInstr &UseMI : MRI.use_nodbg_instructions(DefOp.getReg())) {
1884  if (UseMI.isSubregToReg())
1885  return ValueTrackerResult();
1886  }
1887 
1888  const MachineOperand &Src = Def->getOperand(SrcIdx);
1889  if (Src.isUndef())
1890  return ValueTrackerResult();
1891  return ValueTrackerResult(Src.getReg(), Src.getSubReg());
1892 }
1893 
1894 ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() {
1895  assert((Def->isRegSequence() || Def->isRegSequenceLike()) &&
1896  "Invalid definition");
1897 
1898  if (Def->getOperand(DefIdx).getSubReg())
1899  // If we are composing subregs, bail out.
1900  // The case we are checking is Def.<subreg> = REG_SEQUENCE.
1901  // This should almost never happen as the SSA property is tracked at
1902  // the register level (as opposed to the subreg level).
1903  // I.e.,
1904  // Def.sub0 =
1905  // Def.sub1 =
1906  // is a valid SSA representation for Def.sub0 and Def.sub1, but not for
1907  // Def. Thus, it must not be generated.
1908  // However, some code could theoretically generates a single
1909  // Def.sub0 (i.e, not defining the other subregs) and we would
1910  // have this case.
1911  // If we can ascertain (or force) that this never happens, we could
1912  // turn that into an assertion.
1913  return ValueTrackerResult();
1914 
1915  if (!TII)
1916  // We could handle the REG_SEQUENCE here, but we do not want to
1917  // duplicate the code from the generic TII.
1918  return ValueTrackerResult();
1919 
1920  SmallVector<RegSubRegPairAndIdx, 8> RegSeqInputRegs;
1921  if (!TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs))
1922  return ValueTrackerResult();
1923 
1924  // We are looking at:
1925  // Def = REG_SEQUENCE v0, sub0, v1, sub1, ...
1926  // Check if one of the operand defines the subreg we are interested in.
1927  for (const RegSubRegPairAndIdx &RegSeqInput : RegSeqInputRegs) {
1928  if (RegSeqInput.SubIdx == DefSubReg)
1929  return ValueTrackerResult(RegSeqInput.Reg, RegSeqInput.SubReg);
1930  }
1931 
1932  // If the subreg we are tracking is super-defined by another subreg,
1933  // we could follow this value. However, this would require to compose
1934  // the subreg and we do not do that for now.
1935  return ValueTrackerResult();
1936 }
1937 
1938 ValueTrackerResult ValueTracker::getNextSourceFromInsertSubreg() {
1939  assert((Def->isInsertSubreg() || Def->isInsertSubregLike()) &&
1940  "Invalid definition");
1941 
1942  if (Def->getOperand(DefIdx).getSubReg())
1943  // If we are composing subreg, bail out.
1944  // Same remark as getNextSourceFromRegSequence.
1945  // I.e., this may be turned into an assert.
1946  return ValueTrackerResult();
1947 
1948  if (!TII)
1949  // We could handle the REG_SEQUENCE here, but we do not want to
1950  // duplicate the code from the generic TII.
1951  return ValueTrackerResult();
1952 
1953  RegSubRegPair BaseReg;
1954  RegSubRegPairAndIdx InsertedReg;
1955  if (!TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg))
1956  return ValueTrackerResult();
1957 
1958  // We are looking at:
1959  // Def = INSERT_SUBREG v0, v1, sub1
1960  // There are two cases:
1961  // 1. DefSubReg == sub1, get v1.
1962  // 2. DefSubReg != sub1, the value may be available through v0.
1963 
1964  // #1 Check if the inserted register matches the required sub index.
1965  if (InsertedReg.SubIdx == DefSubReg) {
1966  return ValueTrackerResult(InsertedReg.Reg, InsertedReg.SubReg);
1967  }
1968  // #2 Otherwise, if the sub register we are looking for is not partial
1969  // defined by the inserted element, we can look through the main
1970  // register (v0).
1971  const MachineOperand &MODef = Def->getOperand(DefIdx);
1972  // If the result register (Def) and the base register (v0) do not
1973  // have the same register class or if we have to compose
1974  // subregisters, bail out.
1975  if (MRI.getRegClass(MODef.getReg()) != MRI.getRegClass(BaseReg.Reg) ||
1976  BaseReg.SubReg)
1977  return ValueTrackerResult();
1978 
1979  // Get the TRI and check if the inserted sub-register overlaps with the
1980  // sub-register we are tracking.
1982  if (!TRI ||
1983  !(TRI->getSubRegIndexLaneMask(DefSubReg) &
1984  TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)).none())
1985  return ValueTrackerResult();
1986  // At this point, the value is available in v0 via the same subreg
1987  // we used for Def.
1988  return ValueTrackerResult(BaseReg.Reg, DefSubReg);
1989 }
1990 
1991 ValueTrackerResult ValueTracker::getNextSourceFromExtractSubreg() {
1992  assert((Def->isExtractSubreg() ||
1993  Def->isExtractSubregLike()) && "Invalid definition");
1994  // We are looking at:
1995  // Def = EXTRACT_SUBREG v0, sub0
1996 
1997  // Bail if we have to compose sub registers.
1998  // Indeed, if DefSubReg != 0, we would have to compose it with sub0.
1999  if (DefSubReg)
2000  return ValueTrackerResult();
2001 
2002  if (!TII)
2003  // We could handle the EXTRACT_SUBREG here, but we do not want to
2004  // duplicate the code from the generic TII.
2005  return ValueTrackerResult();
2006 
2007  RegSubRegPairAndIdx ExtractSubregInputReg;
2008  if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg))
2009  return ValueTrackerResult();
2010 
2011  // Bail if we have to compose sub registers.
2012  // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0.
2013  if (ExtractSubregInputReg.SubReg)
2014  return ValueTrackerResult();
2015  // Otherwise, the value is available in the v0.sub0.
2016  return ValueTrackerResult(ExtractSubregInputReg.Reg,
2017  ExtractSubregInputReg.SubIdx);
2018 }
2019 
2020 ValueTrackerResult ValueTracker::getNextSourceFromSubregToReg() {
2021  assert(Def->isSubregToReg() && "Invalid definition");
2022  // We are looking at:
2023  // Def = SUBREG_TO_REG Imm, v0, sub0
2024 
2025  // Bail if we have to compose sub registers.
2026  // If DefSubReg != sub0, we would have to check that all the bits
2027  // we track are included in sub0 and if yes, we would have to
2028  // determine the right subreg in v0.
2029  if (DefSubReg != Def->getOperand(3).getImm())
2030  return ValueTrackerResult();
2031  // Bail if we have to compose sub registers.
2032  // Likewise, if v0.subreg != 0, we would have to compose it with sub0.
2033  if (Def->getOperand(2).getSubReg())
2034  return ValueTrackerResult();
2035 
2036  return ValueTrackerResult(Def->getOperand(2).getReg(),
2037  Def->getOperand(3).getImm());
2038 }
2039 
2040 /// Explore each PHI incoming operand and return its sources.
2041 ValueTrackerResult ValueTracker::getNextSourceFromPHI() {
2042  assert(Def->isPHI() && "Invalid definition");
2043  ValueTrackerResult Res;
2044 
2045  // If we look for a different subreg, bail as we do not support composing
2046  // subregs yet.
2047  if (Def->getOperand(0).getSubReg() != DefSubReg)
2048  return ValueTrackerResult();
2049 
2050  // Return all register sources for PHI instructions.
2051  for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) {
2052  const MachineOperand &MO = Def->getOperand(i);
2053  assert(MO.isReg() && "Invalid PHI instruction");
2054  // We have no code to deal with undef operands. They shouldn't happen in
2055  // normal programs anyway.
2056  if (MO.isUndef())
2057  return ValueTrackerResult();
2058  Res.addSource(MO.getReg(), MO.getSubReg());
2059  }
2060 
2061  return Res;
2062 }
2063 
2064 ValueTrackerResult ValueTracker::getNextSourceImpl() {
2065  assert(Def && "This method needs a valid definition");
2066 
2067  assert(((Def->getOperand(DefIdx).isDef() &&
2068  (DefIdx < Def->getDesc().getNumDefs() ||
2069  Def->getDesc().isVariadic())) ||
2070  Def->getOperand(DefIdx).isImplicit()) &&
2071  "Invalid DefIdx");
2072  if (Def->isCopy())
2073  return getNextSourceFromCopy();
2074  if (Def->isBitcast())
2075  return getNextSourceFromBitcast();
2076  // All the remaining cases involve "complex" instructions.
2077  // Bail if we did not ask for the advanced tracking.
2078  if (DisableAdvCopyOpt)
2079  return ValueTrackerResult();
2080  if (Def->isRegSequence() || Def->isRegSequenceLike())
2081  return getNextSourceFromRegSequence();
2082  if (Def->isInsertSubreg() || Def->isInsertSubregLike())
2083  return getNextSourceFromInsertSubreg();
2084  if (Def->isExtractSubreg() || Def->isExtractSubregLike())
2085  return getNextSourceFromExtractSubreg();
2086  if (Def->isSubregToReg())
2087  return getNextSourceFromSubregToReg();
2088  if (Def->isPHI())
2089  return getNextSourceFromPHI();
2090  return ValueTrackerResult();
2091 }
2092 
2093 ValueTrackerResult ValueTracker::getNextSource() {
2094  // If we reach a point where we cannot move up in the use-def chain,
2095  // there is nothing we can get.
2096  if (!Def)
2097  return ValueTrackerResult();
2098 
2099  ValueTrackerResult Res = getNextSourceImpl();
2100  if (Res.isValid()) {
2101  // Update definition, definition index, and subregister for the
2102  // next call of getNextSource.
2103  // Update the current register.
2104  bool OneRegSrc = Res.getNumSources() == 1;
2105  if (OneRegSrc)
2106  Reg = Res.getSrcReg(0);
2107  // Update the result before moving up in the use-def chain
2108  // with the instruction containing the last found sources.
2109  Res.setInst(Def);
2110 
2111  // If we can still move up in the use-def chain, move to the next
2112  // definition.
2113  if (!Register::isPhysicalRegister(Reg) && OneRegSrc) {
2115  if (DI != MRI.def_end()) {
2116  Def = DI->getParent();
2117  DefIdx = DI.getOperandNo();
2118  DefSubReg = Res.getSrcSubReg(0);
2119  } else {
2120  Def = nullptr;
2121  }
2122  return Res;
2123  }
2124  }
2125  // If we end up here, this means we will not be able to find another source
2126  // for the next iteration. Make sure any new call to getNextSource bails out
2127  // early by cutting the use-def chain.
2128  Def = nullptr;
2129  return Res;
2130 }
i
i
Definition: README.txt:29
llvm::TargetRegisterInfo::getSubClassWithSubReg
virtual const TargetRegisterClass * getSubClassWithSubReg(const TargetRegisterClass *RC, unsigned Idx) const
Returns the largest legal sub-class of RC that supports the sub-register index Idx.
Definition: TargetRegisterInfo.h:613
llvm::MCInstrDesc::getNumDefs
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:243
llvm::MachineRegisterInfo::markUsesInDebugValueAsUndef
void markUsesInDebugValueAsUndef(Register Reg) const
markUsesInDebugValueAsUndef - Mark every DBG_VALUE referencing the specified register as undefined wh...
Definition: MachineRegisterInfo.cpp:532
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:103
MachineInstr.h
llvm::MachineRegisterInfo::def_begin
def_iterator def_begin(Register RegNo) const
Definition: MachineRegisterInfo.h:384
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::HexagonMCInstrInfo::getDesc
const MCInstrDesc & getDesc(MCInstrInfo const &MCII, MCInst const &MCI)
Definition: HexagonMCInstrInfo.cpp:248
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
UseMI
MachineInstrBuilder & UseMI
Definition: AArch64ExpandPseudoInsts.cpp:102
llvm::SmallPtrSetImpl::erase
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:378
Optional.h
llvm::HexagonInstrInfo::analyzeCompare
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &Mask, int64_t &Value) const override
For a comparison instruction, return the source registers in SrcReg and SrcReg2 if having two registe...
Definition: HexagonInstrInfo.cpp:1793
llvm::PeepholeOptimizerID
char & PeepholeOptimizerID
PeepholeOptimizer - This pass performs peephole optimizations - like extension and comparison elimina...
Definition: PeepholeOptimizer.cpp:443
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:158
optimizeSelect
we should consider alternate ways to model stack dependencies Lots of things could be done in WebAssemblyTargetTransformInfo cpp there are numerous optimization related hooks that can be overridden in WebAssemblyTargetLowering Instead of the OptimizeReturned which should consider preserving the returned attribute through to MachineInstrs and extending the MemIntrinsicResults pass to do this optimization on calls too That would also let the WebAssemblyPeephole pass clean up dead defs for such as it does for stores Consider implementing optimizeSelect
Definition: README.txt:81
MCInstrDesc.h
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::DenseMapBase::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
Pass.h
llvm::TargetSubtargetInfo::getInstrInfo
virtual const TargetInstrInfo * getInstrInfo() const
Definition: TargetSubtargetInfo.h:92
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::TargetInstrInfo::RegSubRegPairAndIdx
A pair composed of a pair of a register and a sub-register index, and another sub-register index.
Definition: TargetInstrInfo.h:495
ErrorHandling.h
llvm::MachineRegisterInfo::getTargetRegisterInfo
const TargetRegisterInfo * getTargetRegisterInfo() const
Definition: MachineRegisterInfo.h:153
llvm::SmallDenseMap
Definition: DenseMap.h:880
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
DisableNAPhysCopyOpt
static cl::opt< bool > DisableNAPhysCopyOpt("disable-non-allocatable-phys-copy-opt", cl::Hidden, cl::init(false), cl::desc("Disable non-allocatable physical register copy optimization"))
llvm::MachineRegisterInfo::use_instr_nodbg_begin
use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const
Definition: MachineRegisterInfo.h:535
MachineBasicBlock.h
llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Definition: TargetSubtargetInfo.h:124
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::erase
bool erase(const KeyT &Val)
Definition: DenseMap.h:302
llvm::MachineRegisterInfo::use_nodbg_instructions
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
Definition: MachineRegisterInfo.h:543
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:233
DenseMap.h
TargetInstrInfo.h
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
llvm::MachineFunctionProperties::Property::IsSSA
@ IsSSA
llvm::MachineFunctionProperties
Properties which a MachineFunction may have at a given point in time.
Definition: MachineFunction.h:111
llvm::Optional
Definition: APInt.h:33
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::MachineDominatorTree::dominates
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
Definition: MachineDominators.h:109
Aggressive
static cl::opt< bool > Aggressive("aggressive-ext-opt", cl::Hidden, cl::desc("Aggressive extension optimization"))
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
llvm::MachineRegisterInfo::defusechain_iterator::getOperandNo
unsigned getOperandNo() const
getOperandNo - Return the operand # of this MachineOperand in its MachineInstr.
Definition: MachineRegisterInfo.h:1071
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:102
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
MaxRecurrenceChain
static cl::opt< unsigned > MaxRecurrenceChain("recurrence-chain-limit", cl::Hidden, cl::init(3), cl::desc("Maximum length of recurrence chain when evaluating the benefit " "of commuting operands"))
llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:90
MachineRegisterInfo.h
llvm::TargetInstrInfo::RegSubRegPair
A pair composed of a register and a sub-register index.
Definition: TargetInstrInfo.h:477
Uses
SmallPtrSet< MachineInstr *, 2 > Uses
Definition: ARMLowOverheadLoops.cpp:588
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
clear
static void clear(coro::Shape &Shape)
Definition: Coroutines.cpp:233
CommandLine.h
llvm::Register::isPhysical
bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:97
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:636
llvm::MachineOperand::isImplicit
bool isImplicit() const
Definition: MachineOperand.h:380
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:97
llvm::MachineRegisterInfo::use_nodbg_operands
iterator_range< use_nodbg_iterator > use_nodbg_operands(Register Reg) const
Definition: MachineRegisterInfo.h:526
MachineLoopInfo.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:146
Rewriter
Virtual Register Rewriter
Definition: VirtRegMap.cpp:237
llvm::TargetRegisterInfo::getSubRegIndexLaneMask
LaneBitmask getSubRegIndexLaneMask(unsigned SubIdx) const
Return a bitmask representing the parts of a register that are covered by SubIdx.
Definition: TargetRegisterInfo.h:377
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:499
llvm::MachineOperand::getRegMask
const uint32_t * getRegMask() const
getRegMask - Returns a bit mask of registers preserved by this RegMask operand.
Definition: MachineOperand.h:630
llvm::MachineOperand::setSubReg
void setSubReg(unsigned subReg)
Definition: MachineOperand.h:471
llvm::Register::isPhysicalRegister
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
false
Definition: StackSlotColoring.cpp:142
TargetOpcodes.h
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:195
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
llvm::MachineFunctionProperties::set
MachineFunctionProperties & set(Property P)
Definition: MachineFunction.h:169
insertPHI
static MachineInstr & insertPHI(MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const SmallVectorImpl< RegSubRegPair > &SrcRegs, MachineInstr &OrigPHI)
Insert a PHI instruction with incoming edges SrcRegs that are guaranteed to have the same register cl...
Definition: PeepholeOptimizer.cpp:776
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
SmallPtrSet.h
RewritePHILimit
static cl::opt< unsigned > RewritePHILimit("rewrite-phi-limit", cl::Hidden, cl::init(10), cl::desc("Limit the length of PHI chains to lookup"))
llvm::MachineRegisterInfo::getVRegDef
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Definition: MachineRegisterInfo.cpp:400
llvm::MachineOperand::getParent
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Definition: MachineOperand.h:238
llvm::MachineLoopInfo::isLoopHeader
bool isLoopHeader(const MachineBasicBlock *BB) const
True if the block is a loop header node.
Definition: MachineLoopInfo.h:142
RegSubRegPairAndIdx
TargetInstrInfo::RegSubRegPairAndIdx RegSubRegPairAndIdx
Definition: PeepholeOptimizer.cpp:102
getNewSource
static RegSubRegPair getNewSource(MachineRegisterInfo *MRI, const TargetInstrInfo *TII, RegSubRegPair Def, const PeepholeOptimizer::RewriteMapTy &RewriteMap, bool HandleMultipleSources=true)
Given a Def.Reg and Def.SubReg pair, use RewriteMap to find the new source to use for rewrite.
Definition: PeepholeOptimizer.cpp:1132
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:634
llvm::Register::isVirtual
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
llvm::MachineRegisterInfo::clearKillFlags
void clearKillFlags(Register Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
Definition: MachineRegisterInfo.cpp:431
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:626
llvm::cl::opt< bool >
llvm::MachineInstr::getDebugLoc
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:418
llvm::MachineOperand::clobbersPhysReg
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
Definition: MachineOperand.h:617
llvm::MachineOperand::isUndef
bool isUndef() const
Definition: MachineOperand.h:395
llvm::SmallSet::count
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164
isVirtualRegisterOperand
static bool isVirtualRegisterOperand(MachineOperand &MO)
\bried Returns true if MO is a virtual register operand.
Definition: PeepholeOptimizer.cpp:1503
llvm::TargetInstrInfo::RegSubRegPair::Reg
Register Reg
Definition: TargetInstrInfo.h:478
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:321
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DenseMap
Definition: DenseMap.h:714
llvm::MachineOperand::isDead
bool isDead() const
Definition: MachineOperand.h:385
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
MachineFunctionPass.h
llvm::Register::isVirtualRegister
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::MachineRegisterInfo::isAllocatable
bool isAllocatable(MCRegister PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn't been...
Definition: MachineRegisterInfo.h:933
llvm::MachineFunction::getName
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Definition: MachineFunction.cpp:541
RegSubRegPair
TargetInstrInfo::RegSubRegPair RegSubRegPair
Definition: PeepholeOptimizer.cpp:101
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::operator==
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:1974
getCopyRewriter
static Rewriter * getCopyRewriter(MachineInstr &MI, const TargetInstrInfo &TII)
Get the appropriated Rewriter for MI.
Definition: PeepholeOptimizer.cpp:1105
llvm::MachineOperand::isRegMask
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
Definition: MachineOperand.h:345
llvm::MachineInstr::isPHI
bool isPHI() const
Definition: MachineInstr.h:1255
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:360
llvm::MachineRegisterInfo::def_end
static def_iterator def_end()
Definition: MachineRegisterInfo.h:387
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::SmallSet::erase
bool erase(const T &V)
Definition: SmallSet.h:207
llvm::MachineFunction
Definition: MachineFunction.h:230
llvm::MachineOperand::getMBB
MachineBasicBlock * getMBB() const
Definition: MachineOperand.h:552
llvm::TargetInstrInfo::RegSubRegPair::SubReg
unsigned SubReg
Definition: TargetInstrInfo.h:479
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:179
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:489
llvm::MachineRegisterInfo::hasOneNonDBGUse
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
Definition: MachineRegisterInfo.cpp:419
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::MachineOperand::setIsUndef
void setIsUndef(bool Val=true)
Definition: MachineOperand.h:511
uint32_t
TargetSubtargetInfo.h
llvm::MachineOperand::isDef
bool isDef() const
Definition: MachineOperand.h:375
llvm::SmallSet::insert
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:180
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:286
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::MachineOperand::getSubReg
unsigned getSubReg() const
Definition: MachineOperand.h:365
llvm::MachineRegisterInfo::replaceRegWith
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
Definition: MachineRegisterInfo.cpp:380
llvm::HexagonISD::CP
@ CP
Definition: HexagonISelLowering.h:53
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::TargetRegisterInfo::shouldRewriteCopySrc
virtual bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC, unsigned DefSubReg, const TargetRegisterClass *SrcRC, unsigned SrcSubReg) const
Definition: TargetRegisterInfo.cpp:412
std
Definition: BitVector.h:838
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:592
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:83
llvm::initializePeepholeOptimizerPass
void initializePeepholeOptimizerPass(PassRegistry &)
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
llvm::MachineRegisterInfo::hasOneNonDBGUser
bool hasOneNonDBGUser(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug instruction using the specified regis...
Definition: MachineRegisterInfo.cpp:423
optimizeCondBranch
we should consider alternate ways to model stack dependencies Lots of things could be done in WebAssemblyTargetTransformInfo cpp there are numerous optimization related hooks that can be overridden in WebAssemblyTargetLowering Instead of the OptimizeReturned which should consider preserving the returned attribute through to MachineInstrs and extending the MemIntrinsicResults pass to do this optimization on calls too That would also let the WebAssemblyPeephole pass clean up dead defs for such as it does for stores Consider implementing optimizeCondBranch
Definition: README.txt:81
DEBUG_TYPE
#define DEBUG_TYPE
Definition: PeepholeOptimizer.cpp:104
isValid
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
Definition: RustDemangle.cpp:216
Optimizations
Peephole Optimizations
Definition: PeepholeOptimizer.cpp:450
DisablePeephole
static cl::opt< bool > DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), cl::desc("Disable the peephole optimizer"))
SmallVector.h
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:268
MachineInstrBuilder.h
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
llvm::MachineOperand::setReg
void setReg(Register Reg)
Change the register this operand corresponds to.
Definition: MachineOperand.cpp:55
DefMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Definition: AArch64ExpandPseudoInsts.cpp:103
llvm::MachineInstr::getNumOperands
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:492
LaneBitmask.h
llvm::SmallSet::clear
void clear()
Definition: SmallSet.h:218
llvm::MachineRegisterInfo::constrainRegClass
const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
Definition: MachineRegisterInfo.cpp:85
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(PeepholeOptimizer, DEBUG_TYPE, "Peephole Optimizations", false, false) INITIALIZE_PASS_END(PeepholeOptimizer
llvm::MachineRegisterInfo::defusechain_iterator
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
Definition: MachineRegisterInfo.h:266
llvm::SmallSet::empty
LLVM_NODISCARD bool empty() const
Definition: SmallSet.h:155
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
MachineOperand.h
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
DisableAdvCopyOpt
static cl::opt< bool > DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false), cl::desc("Disable advanced copy optimization"))
Specifiy whether or not the value tracking looks through complex instructions.
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::cl::desc
Definition: CommandLine.h:414
raw_ostream.h
llvm::TargetInstrInfo::CommuteAnyOperandIndex
static const unsigned CommuteAnyOperandIndex
Definition: TargetInstrInfo.h:424
llvm::MachineDominatorTree
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Definition: MachineDominators.h:45
MachineFunction.h
llvm::MachineInstr::eraseFromParent
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Definition: MachineInstr.cpp:677
llvm::MachineInstrBundleIterator< MachineInstr >
InitializePasses.h
TargetRegisterInfo.h
Debug.h
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:270
SubReg
unsigned SubReg
Definition: AArch64AdvSIMDScalarPass.cpp:104
llvm::TargetInstrInfo::RegSubRegPairAndIdx::SubIdx
unsigned SubIdx
Definition: TargetInstrInfo.h:496
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1172
MachineDominators.h
SmallSet.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37