LLVM  13.0.0git
MachineCombiner.cpp
Go to the documentation of this file.
1 //===---- MachineCombiner.cpp - Instcombining on SSA form machine code ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The machine combiner pass uses machine trace metrics to ensure the combined
10 // instructions do not lengthen the critical path or the resource depth.
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/ADT/DenseMap.h"
14 #include "llvm/ADT/Statistic.h"
24 #include "llvm/CodeGen/Passes.h"
30 #include "llvm/InitializePasses.h"
32 #include "llvm/Support/Debug.h"
34 
35 using namespace llvm;
36 
37 #define DEBUG_TYPE "machine-combiner"
38 
39 STATISTIC(NumInstCombined, "Number of machineinst combined");
40 
41 static cl::opt<unsigned>
42 inc_threshold("machine-combiner-inc-threshold", cl::Hidden,
43  cl::desc("Incremental depth computation will be used for basic "
44  "blocks with more instructions."), cl::init(500));
45 
46 static cl::opt<bool> dump_intrs("machine-combiner-dump-subst-intrs", cl::Hidden,
47  cl::desc("Dump all substituted intrs"),
48  cl::init(false));
49 
50 #ifdef EXPENSIVE_CHECKS
52  "machine-combiner-verify-pattern-order", cl::Hidden,
53  cl::desc(
54  "Verify that the generated patterns are ordered by increasing latency"),
55  cl::init(true));
56 #else
58  "machine-combiner-verify-pattern-order", cl::Hidden,
59  cl::desc(
60  "Verify that the generated patterns are ordered by increasing latency"),
61  cl::init(false));
62 #endif
63 
64 namespace {
65 class MachineCombiner : public MachineFunctionPass {
66  const TargetSubtargetInfo *STI;
67  const TargetInstrInfo *TII;
68  const TargetRegisterInfo *TRI;
69  MCSchedModel SchedModel;
71  MachineLoopInfo *MLI; // Current MachineLoopInfo
72  MachineTraceMetrics *Traces;
75  ProfileSummaryInfo *PSI;
76  RegisterClassInfo RegClassInfo;
77 
78  TargetSchedModel TSchedModel;
79 
80  /// True if optimizing for code size.
81  bool OptSize;
82 
83 public:
84  static char ID;
85  MachineCombiner() : MachineFunctionPass(ID) {
87  }
88  void getAnalysisUsage(AnalysisUsage &AU) const override;
89  bool runOnMachineFunction(MachineFunction &MF) override;
90  StringRef getPassName() const override { return "Machine InstCombiner"; }
91 
92 private:
93  bool doSubstitute(unsigned NewSize, unsigned OldSize, bool OptForSize);
94  bool combineInstructions(MachineBasicBlock *);
95  MachineInstr *getOperandDef(const MachineOperand &MO);
96  unsigned getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
97  DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
98  MachineTraceMetrics::Trace BlockTrace);
99  unsigned getLatency(MachineInstr *Root, MachineInstr *NewRoot,
100  MachineTraceMetrics::Trace BlockTrace);
101  bool
102  improvesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root,
103  MachineTraceMetrics::Trace BlockTrace,
106  DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
107  MachineCombinerPattern Pattern, bool SlackIsAccurate);
108  bool reduceRegisterPressure(MachineInstr &Root, MachineBasicBlock *MBB,
112  bool preservesResourceLen(MachineBasicBlock *MBB,
113  MachineTraceMetrics::Trace BlockTrace,
116  void instr2instrSC(SmallVectorImpl<MachineInstr *> &Instrs,
118  std::pair<unsigned, unsigned>
119  getLatenciesForInstrSequences(MachineInstr &MI,
122  MachineTraceMetrics::Trace BlockTrace);
123 
124  void verifyPatternOrder(MachineBasicBlock *MBB, MachineInstr &Root,
126 };
127 }
128 
129 char MachineCombiner::ID = 0;
131 
132 INITIALIZE_PASS_BEGIN(MachineCombiner, DEBUG_TYPE,
133  "Machine InstCombiner", false, false)
138 
139 void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
140  AU.setPreservesCFG();
141  AU.addPreserved<MachineDominatorTree>();
142  AU.addRequired<MachineLoopInfo>();
143  AU.addPreserved<MachineLoopInfo>();
144  AU.addRequired<MachineTraceMetrics>();
145  AU.addPreserved<MachineTraceMetrics>();
146  AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
147  AU.addRequired<ProfileSummaryInfoWrapperPass>();
149 }
150 
151 MachineInstr *MachineCombiner::getOperandDef(const MachineOperand &MO) {
152  MachineInstr *DefInstr = nullptr;
153  // We need a virtual register definition.
154  if (MO.isReg() && Register::isVirtualRegister(MO.getReg()))
155  DefInstr = MRI->getUniqueVRegDef(MO.getReg());
156  // PHI's have no depth etc.
157  if (DefInstr && DefInstr->isPHI())
158  DefInstr = nullptr;
159  return DefInstr;
160 }
161 
162 /// Computes depth of instructions in vector \InsInstr.
163 ///
164 /// \param InsInstrs is a vector of machine instructions
165 /// \param InstrIdxForVirtReg is a dense map of virtual register to index
166 /// of defining machine instruction in \p InsInstrs
167 /// \param BlockTrace is a trace of machine instructions
168 ///
169 /// \returns Depth of last instruction in \InsInstrs ("NewRoot")
170 unsigned
171 MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
172  DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
173  MachineTraceMetrics::Trace BlockTrace) {
174  SmallVector<unsigned, 16> InstrDepth;
175  assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
176  "Missing machine model\n");
177 
178  // For each instruction in the new sequence compute the depth based on the
179  // operands. Use the trace information when possible. For new operands which
180  // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth
181  for (auto *InstrPtr : InsInstrs) { // for each Use
182  unsigned IDepth = 0;
183  for (const MachineOperand &MO : InstrPtr->operands()) {
184  // Check for virtual register operand.
185  if (!(MO.isReg() && Register::isVirtualRegister(MO.getReg())))
186  continue;
187  if (!MO.isUse())
188  continue;
189  unsigned DepthOp = 0;
190  unsigned LatencyOp = 0;
192  InstrIdxForVirtReg.find(MO.getReg());
193  if (II != InstrIdxForVirtReg.end()) {
194  // Operand is new virtual register not in trace
195  assert(II->second < InstrDepth.size() && "Bad Index");
196  MachineInstr *DefInstr = InsInstrs[II->second];
197  assert(DefInstr &&
198  "There must be a definition for a new virtual register");
199  DepthOp = InstrDepth[II->second];
200  int DefIdx = DefInstr->findRegisterDefOperandIdx(MO.getReg());
201  int UseIdx = InstrPtr->findRegisterUseOperandIdx(MO.getReg());
202  LatencyOp = TSchedModel.computeOperandLatency(DefInstr, DefIdx,
203  InstrPtr, UseIdx);
204  } else {
205  MachineInstr *DefInstr = getOperandDef(MO);
206  if (DefInstr) {
207  DepthOp = BlockTrace.getInstrCycles(*DefInstr).Depth;
208  LatencyOp = TSchedModel.computeOperandLatency(
209  DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()),
210  InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg()));
211  }
212  }
213  IDepth = std::max(IDepth, DepthOp + LatencyOp);
214  }
215  InstrDepth.push_back(IDepth);
216  }
217  unsigned NewRootIdx = InsInstrs.size() - 1;
218  return InstrDepth[NewRootIdx];
219 }
220 
221 /// Computes instruction latency as max of latency of defined operands.
222 ///
223 /// \param Root is a machine instruction that could be replaced by NewRoot.
224 /// It is used to compute a more accurate latency information for NewRoot in
225 /// case there is a dependent instruction in the same trace (\p BlockTrace)
226 /// \param NewRoot is the instruction for which the latency is computed
227 /// \param BlockTrace is a trace of machine instructions
228 ///
229 /// \returns Latency of \p NewRoot
230 unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
231  MachineTraceMetrics::Trace BlockTrace) {
232  assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
233  "Missing machine model\n");
234 
235  // Check each definition in NewRoot and compute the latency
236  unsigned NewRootLatency = 0;
237 
238  for (const MachineOperand &MO : NewRoot->operands()) {
239  // Check for virtual register operand.
240  if (!(MO.isReg() && Register::isVirtualRegister(MO.getReg())))
241  continue;
242  if (!MO.isDef())
243  continue;
244  // Get the first instruction that uses MO
246  RI++;
247  if (RI == MRI->reg_end())
248  continue;
249  MachineInstr *UseMO = RI->getParent();
250  unsigned LatencyOp = 0;
251  if (UseMO && BlockTrace.isDepInTrace(*Root, *UseMO)) {
252  LatencyOp = TSchedModel.computeOperandLatency(
253  NewRoot, NewRoot->findRegisterDefOperandIdx(MO.getReg()), UseMO,
254  UseMO->findRegisterUseOperandIdx(MO.getReg()));
255  } else {
256  LatencyOp = TSchedModel.computeInstrLatency(NewRoot);
257  }
258  NewRootLatency = std::max(NewRootLatency, LatencyOp);
259  }
260  return NewRootLatency;
261 }
262 
263 /// The combiner's goal may differ based on which pattern it is attempting
264 /// to optimize.
265 enum class CombinerObjective {
266  MustReduceDepth, // The data dependency chain must be improved.
267  MustReduceRegisterPressure, // The register pressure must be reduced.
268  Default // The critical path must not be lengthened.
269 };
270 
272  // TODO: If C++ ever gets a real enum class, make this part of the
273  // MachineCombinerPattern class.
274  switch (P) {
285  default:
287  }
288 }
289 
290 /// Estimate the latency of the new and original instruction sequence by summing
291 /// up the latencies of the inserted and deleted instructions. This assumes
292 /// that the inserted and deleted instructions are dependent instruction chains,
293 /// which might not hold in all cases.
294 std::pair<unsigned, unsigned> MachineCombiner::getLatenciesForInstrSequences(
297  MachineTraceMetrics::Trace BlockTrace) {
298  assert(!InsInstrs.empty() && "Only support sequences that insert instrs.");
299  unsigned NewRootLatency = 0;
300  // NewRoot is the last instruction in the \p InsInstrs vector.
301  MachineInstr *NewRoot = InsInstrs.back();
302  for (unsigned i = 0; i < InsInstrs.size() - 1; i++)
303  NewRootLatency += TSchedModel.computeInstrLatency(InsInstrs[i]);
304  NewRootLatency += getLatency(&MI, NewRoot, BlockTrace);
305 
306  unsigned RootLatency = 0;
307  for (auto I : DelInstrs)
308  RootLatency += TSchedModel.computeInstrLatency(I);
309 
310  return {NewRootLatency, RootLatency};
311 }
312 
313 bool MachineCombiner::reduceRegisterPressure(
318  // FIXME: for now, we don't do any check for the register pressure patterns.
319  // We treat them as always profitable. But we can do better if we make
320  // RegPressureTracker class be aware of TIE attribute. Then we can get an
321  // accurate compare of register pressure with DelInstrs or InsInstrs.
322  return true;
323 }
324 
325 /// The DAGCombine code sequence ends in MI (Machine Instruction) Root.
326 /// The new code sequence ends in MI NewRoot. A necessary condition for the new
327 /// sequence to replace the old sequence is that it cannot lengthen the critical
328 /// path. The definition of "improve" may be restricted by specifying that the
329 /// new path improves the data dependency chain (MustReduceDepth).
330 bool MachineCombiner::improvesCriticalPathLen(
332  MachineTraceMetrics::Trace BlockTrace,
335  DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
337  bool SlackIsAccurate) {
338  assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
339  "Missing machine model\n");
340  // Get depth and latency of NewRoot and Root.
341  unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace);
342  unsigned RootDepth = BlockTrace.getInstrCycles(*Root).Depth;
343 
344  LLVM_DEBUG(dbgs() << " Dependence data for " << *Root << "\tNewRootDepth: "
345  << NewRootDepth << "\tRootDepth: " << RootDepth);
346 
347  // For a transform such as reassociation, the cost equation is
348  // conservatively calculated so that we must improve the depth (data
349  // dependency cycles) in the critical path to proceed with the transform.
350  // Being conservative also protects against inaccuracies in the underlying
351  // machine trace metrics and CPU models.
353  LLVM_DEBUG(dbgs() << "\tIt MustReduceDepth ");
354  LLVM_DEBUG(NewRootDepth < RootDepth
355  ? dbgs() << "\t and it does it\n"
356  : dbgs() << "\t but it does NOT do it\n");
357  return NewRootDepth < RootDepth;
358  }
359 
360  // A more flexible cost calculation for the critical path includes the slack
361  // of the original code sequence. This may allow the transform to proceed
362  // even if the instruction depths (data dependency cycles) become worse.
363 
364  // Account for the latency of the inserted and deleted instructions by
365  unsigned NewRootLatency, RootLatency;
366  std::tie(NewRootLatency, RootLatency) =
367  getLatenciesForInstrSequences(*Root, InsInstrs, DelInstrs, BlockTrace);
368 
369  unsigned RootSlack = BlockTrace.getInstrSlack(*Root);
370  unsigned NewCycleCount = NewRootDepth + NewRootLatency;
371  unsigned OldCycleCount =
372  RootDepth + RootLatency + (SlackIsAccurate ? RootSlack : 0);
373  LLVM_DEBUG(dbgs() << "\n\tNewRootLatency: " << NewRootLatency
374  << "\tRootLatency: " << RootLatency << "\n\tRootSlack: "
375  << RootSlack << " SlackIsAccurate=" << SlackIsAccurate
376  << "\n\tNewRootDepth + NewRootLatency = " << NewCycleCount
377  << "\n\tRootDepth + RootLatency + RootSlack = "
378  << OldCycleCount;);
379  LLVM_DEBUG(NewCycleCount <= OldCycleCount
380  ? dbgs() << "\n\t It IMPROVES PathLen because"
381  : dbgs() << "\n\t It DOES NOT improve PathLen because");
382  LLVM_DEBUG(dbgs() << "\n\t\tNewCycleCount = " << NewCycleCount
383  << ", OldCycleCount = " << OldCycleCount << "\n");
384 
385  return NewCycleCount <= OldCycleCount;
386 }
387 
388 /// helper routine to convert instructions into SC
389 void MachineCombiner::instr2instrSC(
392  for (auto *InstrPtr : Instrs) {
393  unsigned Opc = InstrPtr->getOpcode();
394  unsigned Idx = TII->get(Opc).getSchedClass();
395  const MCSchedClassDesc *SC = SchedModel.getSchedClassDesc(Idx);
396  InstrsSC.push_back(SC);
397  }
398 }
399 
400 /// True when the new instructions do not increase resource length
401 bool MachineCombiner::preservesResourceLen(
404  SmallVectorImpl<MachineInstr *> &DelInstrs) {
405  if (!TSchedModel.hasInstrSchedModel())
406  return true;
407 
408  // Compute current resource length
409 
410  //ArrayRef<const MachineBasicBlock *> MBBarr(MBB);
412  MBBarr.push_back(MBB);
413  unsigned ResLenBeforeCombine = BlockTrace.getResourceLength(MBBarr);
414 
415  // Deal with SC rather than Instructions.
418 
419  instr2instrSC(InsInstrs, InsInstrsSC);
420  instr2instrSC(DelInstrs, DelInstrsSC);
421 
422  ArrayRef<const MCSchedClassDesc *> MSCInsArr = makeArrayRef(InsInstrsSC);
423  ArrayRef<const MCSchedClassDesc *> MSCDelArr = makeArrayRef(DelInstrsSC);
424 
425  // Compute new resource length.
426  unsigned ResLenAfterCombine =
427  BlockTrace.getResourceLength(MBBarr, MSCInsArr, MSCDelArr);
428 
429  LLVM_DEBUG(dbgs() << "\t\tResource length before replacement: "
430  << ResLenBeforeCombine
431  << " and after: " << ResLenAfterCombine << "\n";);
432  LLVM_DEBUG(
433  ResLenAfterCombine <=
434  ResLenBeforeCombine + TII->getExtendResourceLenLimit()
435  ? dbgs() << "\t\t As result it IMPROVES/PRESERVES Resource Length\n"
436  : dbgs() << "\t\t As result it DOES NOT improve/preserve Resource "
437  "Length\n");
438 
439  return ResLenAfterCombine <=
440  ResLenBeforeCombine + TII->getExtendResourceLenLimit();
441 }
442 
443 /// \returns true when new instruction sequence should be generated
444 /// independent if it lengthens critical path or not
445 bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize,
446  bool OptForSize) {
447  if (OptForSize && (NewSize < OldSize))
448  return true;
449  if (!TSchedModel.hasInstrSchedModelOrItineraries())
450  return true;
451  return false;
452 }
453 
454 /// Inserts InsInstrs and deletes DelInstrs. Incrementally updates instruction
455 /// depths if requested.
456 ///
457 /// \param MBB basic block to insert instructions in
458 /// \param MI current machine instruction
459 /// \param InsInstrs new instructions to insert in \p MBB
460 /// \param DelInstrs instruction to delete from \p MBB
461 /// \param MinInstr is a pointer to the machine trace information
462 /// \param RegUnits set of live registers, needed to compute instruction depths
463 /// \param TII is target instruction info, used to call target hook
464 /// \param Pattern is used to call target hook finalizeInsInstrs
465 /// \param IncrementalUpdate if true, compute instruction depths incrementally,
466 /// otherwise invalidate the trace
471  SparseSet<LiveRegUnit> &RegUnits,
472  const TargetInstrInfo *TII,
474  bool IncrementalUpdate) {
475  // If we want to fix up some placeholder for some target, do it now.
476  // We need this because in genAlternativeCodeSequence, we have not decided the
477  // better pattern InsInstrs or DelInstrs, so we don't want generate some
478  // sideeffect to the function. For example we need to delay the constant pool
479  // entry creation here after InsInstrs is selected as better pattern.
480  // Otherwise the constant pool entry created for InsInstrs will not be deleted
481  // even if InsInstrs is not the better pattern.
482  TII->finalizeInsInstrs(MI, Pattern, InsInstrs);
483 
484  for (auto *InstrPtr : InsInstrs)
486 
487  for (auto *InstrPtr : DelInstrs) {
488  InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval();
489  // Erase all LiveRegs defined by the removed instruction
490  for (auto I = RegUnits.begin(); I != RegUnits.end(); ) {
491  if (I->MI == InstrPtr)
492  I = RegUnits.erase(I);
493  else
494  I++;
495  }
496  }
497 
498  if (IncrementalUpdate)
499  for (auto *InstrPtr : InsInstrs)
500  MinInstr->updateDepth(MBB, *InstrPtr, RegUnits);
501  else
502  MinInstr->invalidate(MBB);
503 
504  NumInstCombined++;
505 }
506 
507 // Check that the difference between original and new latency is decreasing for
508 // later patterns. This helps to discover sub-optimal pattern orderings.
509 void MachineCombiner::verifyPatternOrder(
512  long PrevLatencyDiff = std::numeric_limits<long>::max();
513  (void)PrevLatencyDiff; // Variable is used in assert only.
514  for (auto P : Patterns) {
517  DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
518  TII->genAlternativeCodeSequence(Root, P, InsInstrs, DelInstrs,
519  InstrIdxForVirtReg);
520  // Found pattern, but did not generate alternative sequence.
521  // This can happen e.g. when an immediate could not be materialized
522  // in a single instruction.
523  if (InsInstrs.empty() || !TSchedModel.hasInstrSchedModelOrItineraries())
524  continue;
525 
526  unsigned NewRootLatency, RootLatency;
527  std::tie(NewRootLatency, RootLatency) = getLatenciesForInstrSequences(
528  Root, InsInstrs, DelInstrs, MinInstr->getTrace(MBB));
529  long CurrentLatencyDiff = ((long)RootLatency) - ((long)NewRootLatency);
530  assert(CurrentLatencyDiff <= PrevLatencyDiff &&
531  "Current pattern is better than previous pattern.");
532  PrevLatencyDiff = CurrentLatencyDiff;
533  }
534 }
535 
536 /// Substitute a slow code sequence with a faster one by
537 /// evaluating instruction combining pattern.
538 /// The prototype of such a pattern is MUl + ADD -> MADD. Performs instruction
539 /// combining based on machine trace metrics. Only combine a sequence of
540 /// instructions when this neither lengthens the critical path nor increases
541 /// resource pressure. When optimizing for codesize always combine when the new
542 /// sequence is shorter.
543 bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
544  bool Changed = false;
545  LLVM_DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n");
546 
547  bool IncrementalUpdate = false;
548  auto BlockIter = MBB->begin();
549  decltype(BlockIter) LastUpdate;
550  // Check if the block is in a loop.
551  const MachineLoop *ML = MLI->getLoopFor(MBB);
552  if (!MinInstr)
553  MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
554 
555  SparseSet<LiveRegUnit> RegUnits;
556  RegUnits.setUniverse(TRI->getNumRegUnits());
557 
558  bool OptForSize = OptSize || llvm::shouldOptimizeForSize(MBB, PSI, MBFI);
559 
560  bool DoRegPressureReduce =
561  TII->shouldReduceRegisterPressure(MBB, &RegClassInfo);
562 
563  while (BlockIter != MBB->end()) {
564  auto &MI = *BlockIter++;
566  // The motivating example is:
567  //
568  // MUL Other MUL_op1 MUL_op2 Other
569  // \ / \ | /
570  // ADD/SUB => MADD/MSUB
571  // (=Root) (=NewRoot)
572 
573  // The DAGCombine code always replaced MUL + ADD/SUB by MADD. While this is
574  // usually beneficial for code size it unfortunately can hurt performance
575  // when the ADD is on the critical path, but the MUL is not. With the
576  // substitution the MUL becomes part of the critical path (in form of the
577  // MADD) and can lengthen it on architectures where the MADD latency is
578  // longer than the ADD latency.
579  //
580  // For each instruction we check if it can be the root of a combiner
581  // pattern. Then for each pattern the new code sequence in form of MI is
582  // generated and evaluated. When the efficiency criteria (don't lengthen
583  // critical path, don't use more resources) is met the new sequence gets
584  // hooked up into the basic block before the old sequence is removed.
585  //
586  // The algorithm does not try to evaluate all patterns and pick the best.
587  // This is only an artificial restriction though. In practice there is
588  // mostly one pattern, and getMachineCombinerPatterns() can order patterns
589  // based on an internal cost heuristic. If
590  // machine-combiner-verify-pattern-order is enabled, all patterns are
591  // checked to ensure later patterns do not provide better latency savings.
592 
593  if (!TII->getMachineCombinerPatterns(MI, Patterns, DoRegPressureReduce))
594  continue;
595 
596  if (VerifyPatternOrder)
597  verifyPatternOrder(MBB, MI, Patterns);
598 
599  for (auto P : Patterns) {
602  DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
603  TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs,
604  InstrIdxForVirtReg);
605  unsigned NewInstCount = InsInstrs.size();
606  unsigned OldInstCount = DelInstrs.size();
607  // Found pattern, but did not generate alternative sequence.
608  // This can happen e.g. when an immediate could not be materialized
609  // in a single instruction.
610  if (!NewInstCount)
611  continue;
612 
613  LLVM_DEBUG(if (dump_intrs) {
614  dbgs() << "\tFor the Pattern (" << (int)P
615  << ") these instructions could be removed\n";
616  for (auto const *InstrPtr : DelInstrs)
617  InstrPtr->print(dbgs(), /*IsStandalone*/false, /*SkipOpers*/false,
618  /*SkipDebugLoc*/false, /*AddNewLine*/true, TII);
619  dbgs() << "\tThese instructions could replace the removed ones\n";
620  for (auto const *InstrPtr : InsInstrs)
621  InstrPtr->print(dbgs(), /*IsStandalone*/false, /*SkipOpers*/false,
622  /*SkipDebugLoc*/false, /*AddNewLine*/true, TII);
623  });
624 
625  bool SubstituteAlways = false;
626  if (ML && TII->isThroughputPattern(P))
627  SubstituteAlways = true;
628 
629  if (IncrementalUpdate && LastUpdate != BlockIter) {
630  // Update depths since the last incremental update.
631  MinInstr->updateDepths(LastUpdate, BlockIter, RegUnits);
632  LastUpdate = BlockIter;
633  }
634 
635  if (DoRegPressureReduce &&
638  if (MBB->size() > inc_threshold) {
639  // Use incremental depth updates for basic blocks above threshold
640  IncrementalUpdate = true;
641  LastUpdate = BlockIter;
642  }
643  if (reduceRegisterPressure(MI, MBB, InsInstrs, DelInstrs, P)) {
644  // Replace DelInstrs with InsInstrs.
645  insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
646  RegUnits, TII, P, IncrementalUpdate);
647  Changed |= true;
648 
649  // Go back to previous instruction as it may have ILP reassociation
650  // opportunity.
651  BlockIter--;
652  break;
653  }
654  }
655 
656  // Substitute when we optimize for codesize and the new sequence has
657  // fewer instructions OR
658  // the new sequence neither lengthens the critical path nor increases
659  // resource pressure.
660  if (SubstituteAlways ||
661  doSubstitute(NewInstCount, OldInstCount, OptForSize)) {
662  insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
663  RegUnits, TII, P, IncrementalUpdate);
664  // Eagerly stop after the first pattern fires.
665  Changed = true;
666  break;
667  } else {
668  // For big basic blocks, we only compute the full trace the first time
669  // we hit this. We do not invalidate the trace, but instead update the
670  // instruction depths incrementally.
671  // NOTE: Only the instruction depths up to MI are accurate. All other
672  // trace information is not updated.
673  MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB);
674  Traces->verifyAnalysis();
675  if (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, DelInstrs,
676  InstrIdxForVirtReg, P,
677  !IncrementalUpdate) &&
678  preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs)) {
679  if (MBB->size() > inc_threshold) {
680  // Use incremental depth updates for basic blocks above treshold
681  IncrementalUpdate = true;
682  LastUpdate = BlockIter;
683  }
684 
685  insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
686  RegUnits, TII, P, IncrementalUpdate);
687 
688  // Eagerly stop after the first pattern fires.
689  Changed = true;
690  break;
691  }
692  // Cleanup instructions of the alternative code sequence. There is no
693  // use for them.
694  MachineFunction *MF = MBB->getParent();
695  for (auto *InstrPtr : InsInstrs)
696  MF->DeleteMachineInstr(InstrPtr);
697  }
698  InstrIdxForVirtReg.clear();
699  }
700  }
701 
702  if (Changed && IncrementalUpdate)
703  Traces->invalidate(MBB);
704  return Changed;
705 }
706 
707 bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
708  STI = &MF.getSubtarget();
709  TII = STI->getInstrInfo();
710  TRI = STI->getRegisterInfo();
711  SchedModel = STI->getSchedModel();
712  TSchedModel.init(STI);
713  MRI = &MF.getRegInfo();
714  MLI = &getAnalysis<MachineLoopInfo>();
715  Traces = &getAnalysis<MachineTraceMetrics>();
716  PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
717  MBFI = (PSI && PSI->hasProfileSummary()) ?
718  &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
719  nullptr;
720  MinInstr = nullptr;
721  OptSize = MF.getFunction().hasOptSize();
722  RegClassInfo.runOnMachineFunction(MF);
723 
724  LLVM_DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n');
725  if (!TII->useMachineCombiner()) {
726  LLVM_DEBUG(
727  dbgs()
728  << " Skipping pass: Target does not support machine combiner\n");
729  return false;
730  }
731 
732  bool Changed = false;
733 
734  // Try to combine instructions.
735  for (auto &MBB : MF)
736  Changed |= combineInstructions(&MBB);
737 
738  return Changed;
739 }
i
i
Definition: README.txt:29
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:100
llvm
Definition: AllocatorList.h:23
llvm::MachineTraceMetrics::InstrCycles::Depth
unsigned Depth
Earliest issue cycle as determined by data dependencies and instruction latencies from the beginning ...
Definition: MachineTraceMetrics.h:245
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::MachineTraceMetrics::Ensemble::invalidate
void invalidate(const MachineBasicBlock *MBB)
Invalidate traces through BadMBB.
Definition: MachineTraceMetrics.cpp:523
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::MachineRegisterInfo::getUniqueVRegDef
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
Definition: MachineRegisterInfo.cpp:411
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
MachineSizeOpts.h
RegisterClassInfo.h
llvm::SparseSet::begin
const_iterator begin() const
Definition: SparseSet.h:174
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::MachineTraceMetrics::TS_MinInstrCount
@ TS_MinInstrCount
Select the trace through a block that has the fewest instructions.
Definition: MachineTraceMetrics.h:379
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:231
insertDeleteInstructions
static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI, SmallVector< MachineInstr *, 16 > InsInstrs, SmallVector< MachineInstr *, 16 > DelInstrs, MachineTraceMetrics::Ensemble *MinInstr, SparseSet< LiveRegUnit > &RegUnits, const TargetInstrInfo *TII, MachineCombinerPattern Pattern, bool IncrementalUpdate)
Inserts InsInstrs and deletes DelInstrs.
Definition: MachineCombiner.cpp:467
DenseMap.h
TargetInstrInfo.h
llvm::MachineCombinerPattern::REASSOC_XY_BAC
@ REASSOC_XY_BAC
CombinerObjective::MustReduceRegisterPressure
@ MustReduceRegisterPressure
llvm::MachineRegisterInfo::reg_end
static reg_iterator reg_end()
Definition: MachineRegisterInfo.h:284
llvm::MCRegisterInfo::getNumRegUnits
unsigned getNumRegUnits() const
Return the number of (native) register units in the target.
Definition: MCRegisterInfo.h:505
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:102
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:90
MachineRegisterInfo.h
MachineTraceMetrics.h
llvm::MachineCombinerPattern::REASSOC_AX_YB
@ REASSOC_AX_YB
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
llvm::MachineTraceMetrics::Trace::getInstrCycles
InstrCycles getInstrCycles(const MachineInstr &MI) const
Return the depth and height of MI.
Definition: MachineTraceMetrics.h:298
CommandLine.h
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:568
llvm::shouldOptimizeForSize
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
Definition: MachineSizeOpts.cpp:183
llvm::MachineTraceMetrics::Ensemble::updateDepth
void updateDepth(TraceBlockInfo &TBI, const MachineInstr &, SparseSet< LiveRegUnit > &RegUnits)
Updates the depth of an machine instruction, given RegUnits.
Definition: MachineTraceMetrics.cpp:782
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:97
MachineLoopInfo.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::PPCISD::SC
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
Definition: PPCISelLowering.h:410
llvm::MachineBlockFrequencyInfo
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
Definition: MachineBlockFrequencyInfo.h:33
VerifyPatternOrder
static cl::opt< bool > VerifyPatternOrder("machine-combiner-verify-pattern-order", cl::Hidden, cl::desc("Verify that the generated patterns are ordered by increasing latency"), cl::init(false))
llvm::SparseSet::end
const_iterator end() const
Definition: SparseSet.h:175
llvm::MachineCombinerPattern::REASSOC_XMM_AMM_BMM
@ REASSOC_XMM_AMM_BMM
llvm::MachineOperand::isUse
bool isUse() const
Definition: MachineOperand.h:370
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::MCSchedClassDesc
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition: MCSchedule.h:110
llvm::MachineFunction::DeleteMachineInstr
void DeleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
Definition: MachineFunction.cpp:393
getLatency
static int getLatency(LLVMDisasmContext *DC, const MCInst &Inst)
Gets latency information for Inst, based on DC information.
Definition: Disassembler.cpp:195
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
false
Definition: StackSlotColoring.cpp:142
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::MachineTraceMetrics::Trace
A trace represents a plausible sequence of executed basic blocks that passes through the current basi...
Definition: MachineTraceMetrics.h:255
CombinerObjective::Default
@ Default
llvm::MachineOperand::getParent
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Definition: MachineOperand.h:238
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
llvm::MachineTraceMetrics::Trace::isDepInTrace
bool isDepInTrace(const MachineInstr &DefMI, const MachineInstr &UseMI) const
A dependence is useful if the basic block of the defining instruction is part of the trace of the use...
Definition: MachineTraceMetrics.cpp:1278
Passes.h
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:558
llvm::cl::opt
Definition: CommandLine.h:1422
CombinerObjective
CombinerObjective
The combiner's goal may differ based on which pattern it is attempting to optimize.
Definition: MachineCombiner.cpp:265
LazyMachineBlockFrequencyInfo.h
===- LazyMachineBlockFrequencyInfo.h - Lazy Block Frequency -*- C++ -*–===//
llvm::MachineLoop
Definition: MachineLoopInfo.h:45
TargetSchedule.h
inc_threshold
static cl::opt< unsigned > inc_threshold("machine-combiner-inc-threshold", cl::Hidden, cl::desc("Incremental depth computation will be used for basic " "blocks with more instructions."), cl::init(500))
llvm::DenseMapBase::clear
void clear()
Definition: DenseMap.h:111
llvm::TargetSchedModel
Provide an instruction scheduling machine model to CodeGen passes.
Definition: TargetSchedule.h:31
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:321
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
ProfileSummaryInfo.h
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::DenseMap< unsigned, unsigned >
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::ProfileSummaryInfoWrapperPass
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Definition: ProfileSummaryInfo.h:185
MachineFunctionPass.h
llvm::SparseSet::setUniverse
void setUniverse(unsigned U)
setUniverse - Set the universe size which determines the largest key the set can hold.
Definition: SparseSet.h:155
llvm::Register::isVirtualRegister
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::MachineFunction::getName
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Definition: MachineFunction.cpp:522
llvm::initializeMachineCombinerPass
void initializeMachineCombinerPass(PassRegistry &)
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineBasicBlock::size
unsigned size() const
Definition: MachineBasicBlock.h:239
llvm::RegisterClassInfo
Definition: RegisterClassInfo.h:30
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:225
llvm::MachineTraceMetrics::Trace::getResourceLength
unsigned getResourceLength(ArrayRef< const MachineBasicBlock * > Extrablocks=None, ArrayRef< const MCSchedClassDesc * > ExtraInstrs=None, ArrayRef< const MCSchedClassDesc * > RemoveInstrs=None) const
Return the resource length of the trace.
Definition: MachineTraceMetrics.cpp:1224
llvm::MachineInstr::isPHI
bool isPHI() const
Definition: MachineInstr.h:1243
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:360
llvm::MachineCombinerPattern::REASSOC_AX_BY
@ REASSOC_AX_BY
llvm::LazyMachineBlockFrequencyInfoPass
This is an alternative analysis pass to MachineBlockFrequencyInfo.
Definition: LazyMachineBlockFrequencyInfo.h:37
llvm::MachineCombinerPattern::REASSOC_XY_BCA
@ REASSOC_XY_BCA
llvm::MachineFunction
Definition: MachineFunction.h:230
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
TargetSubtargetInfo.h
llvm::MachineOperand::isDef
bool isDef() const
Definition: MachineOperand.h:375
llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:59
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Function::hasOptSize
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:716
getCombinerObjective
static CombinerObjective getCombinerObjective(MachineCombinerPattern P)
Definition: MachineCombiner.cpp:271
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(MachineCombiner, DEBUG_TYPE, "Machine InstCombiner", false, false) INITIALIZE_PASS_END(MachineCombiner
llvm::MachineRegisterInfo::reg_begin
reg_iterator reg_begin(Register RegNo) const
Definition: MachineRegisterInfo.h:281
llvm::MachineTraceMetrics::Ensemble
A trace ensemble is a collection of traces selected using the same strategy, for example 'minimum res...
Definition: MachineTraceMetrics.h:321
llvm::MachineCombinerPattern::REASSOC_XY_AMM_BMM
@ REASSOC_XY_AMM_BMM
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:524
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:83
llvm::MachineBasicBlock::insert
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
Definition: MachineBasicBlock.cpp:1337
DEBUG_TYPE
#define DEBUG_TYPE
Definition: MachineCombiner.cpp:37
llvm::SparseSet::erase
iterator erase(iterator I)
erase - Erases an existing element identified by a valid iterator.
Definition: SparseSet.h:288
llvm::MCSchedModel
Machine model for scheduling, bundling, and heuristics.
Definition: MCSchedule.h:245
llvm::InstCombiner
The core instruction combiner logic.
Definition: InstCombiner.h:45
llvm::makeArrayRef
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:474
dump_intrs
static cl::opt< bool > dump_intrs("machine-combiner-dump-subst-intrs", cl::Hidden, cl::desc("Dump all substituted intrs"), cl::init(false))
llvm::Pattern
Definition: FileCheckImpl.h:614
llvm::MachineInstr::findRegisterDefOperandIdx
int findRegisterDefOperandIdx(Register Reg, bool isDead=false, bool Overlap=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
Definition: MachineInstr.cpp:1041
Machine
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:365
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:268
llvm::MachineTraceMetrics::Trace::getInstrSlack
unsigned getInstrSlack(const MachineInstr &MI) const
Return the slack of MI.
Definition: MachineTraceMetrics.cpp:1174
llvm::MachineCombinerPattern::REASSOC_XA_BY
@ REASSOC_XA_BY
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:350
llvm::MachineCombinerID
char & MachineCombinerID
This pass performs instruction combining using trace metrics to estimate critical-path and resource d...
Definition: MachineCombiner.cpp:130
llvm::MachineRegisterInfo::defusechain_iterator
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
Definition: MachineRegisterInfo.h:266
CombinerObjective::MustReduceDepth
@ MustReduceDepth
llvm::SmallVectorImpl< MachineInstr * >
llvm::SparseSet
SparseSet - Fast set implementation for objects that can be identified by small unsigned keys.
Definition: SparseSet.h:123
llvm::MachineCombinerPattern::REASSOC_XA_YB
@ REASSOC_XA_YB
llvm::cl::desc
Definition: CommandLine.h:414
raw_ostream.h
llvm::MachineDominatorTree
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Definition: MachineDominators.h:45
MachineFunction.h
llvm::MachineInstrBundleIterator< MachineInstr >
llvm::MachineTraceMetrics
Definition: MachineTraceMetrics.h:87
InitializePasses.h
llvm::MachineInstr::operands
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:607
llvm::MachineInstr::findRegisterUseOperandIdx
int findRegisterUseOperandIdx(Register Reg, bool isKill=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a use of the specific register or -1 if it is not found.
Definition: MachineInstr.cpp:992
TargetRegisterInfo.h
llvm::MachineBasicBlock::getName
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
Definition: MachineBasicBlock.cpp:313
Debug.h
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:270
MachineDominators.h
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::MachineCombinerPattern
MachineCombinerPattern
These are instruction patterns matched by the machine combiner pass.
Definition: MachineCombinerPattern.h:20