LLVM  13.0.0git
X86CmovConversion.cpp
Go to the documentation of this file.
1 //====- X86CmovConversion.cpp - Convert Cmov to Branch --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file implements a pass that converts X86 cmov instructions into
11 /// branches when profitable. This pass is conservative. It transforms if and
12 /// only if it can guarantee a gain with high confidence.
13 ///
14 /// Thus, the optimization applies under the following conditions:
15 /// 1. Consider as candidates only CMOVs in innermost loops (assume that
16 /// most hotspots are represented by these loops).
17 /// 2. Given a group of CMOV instructions that are using the same EFLAGS def
18 /// instruction:
19 /// a. Consider them as candidates only if all have the same code condition
20 /// or the opposite one to prevent generating more than one conditional
21 /// jump per EFLAGS def instruction.
22 /// b. Consider them as candidates only if all are profitable to be
23 /// converted (assume that one bad conversion may cause a degradation).
24 /// 3. Apply conversion only for loops that are found profitable and only for
25 /// CMOV candidates that were found profitable.
26 /// a. A loop is considered profitable only if conversion will reduce its
27 /// depth cost by some threshold.
28 /// b. CMOV is considered profitable if the cost of its condition is higher
29 /// than the average cost of its true-value and false-value by 25% of
30 /// branch-misprediction-penalty. This assures no degradation even with
31 /// 25% branch misprediction.
32 ///
33 /// Note: This pass is assumed to run on SSA machine code.
34 //
35 //===----------------------------------------------------------------------===//
36 //
37 // External interfaces:
38 // FunctionPass *llvm::createX86CmovConverterPass();
39 // bool X86CmovConverterPass::runOnMachineFunction(MachineFunction &MF);
40 //
41 //===----------------------------------------------------------------------===//
42 
43 #include "X86.h"
44 #include "X86InstrInfo.h"
45 #include "llvm/ADT/ArrayRef.h"
46 #include "llvm/ADT/DenseMap.h"
47 #include "llvm/ADT/STLExtras.h"
48 #include "llvm/ADT/SmallPtrSet.h"
49 #include "llvm/ADT/SmallVector.h"
50 #include "llvm/ADT/Statistic.h"
63 #include "llvm/IR/DebugLoc.h"
64 #include "llvm/InitializePasses.h"
65 #include "llvm/MC/MCSchedule.h"
66 #include "llvm/Pass.h"
68 #include "llvm/Support/Debug.h"
70 #include <algorithm>
71 #include <cassert>
72 #include <iterator>
73 #include <utility>
74 
75 using namespace llvm;
76 
77 #define DEBUG_TYPE "x86-cmov-conversion"
78 
79 STATISTIC(NumOfSkippedCmovGroups, "Number of unsupported CMOV-groups");
80 STATISTIC(NumOfCmovGroupCandidate, "Number of CMOV-group candidates");
81 STATISTIC(NumOfLoopCandidate, "Number of CMOV-conversion profitable loops");
82 STATISTIC(NumOfOptimizedCmovGroups, "Number of optimized CMOV-groups");
83 
84 // This internal switch can be used to turn off the cmov/branch optimization.
85 static cl::opt<bool>
86  EnableCmovConverter("x86-cmov-converter",
87  cl::desc("Enable the X86 cmov-to-branch optimization."),
88  cl::init(true), cl::Hidden);
89 
90 static cl::opt<unsigned>
91  GainCycleThreshold("x86-cmov-converter-threshold",
92  cl::desc("Minimum gain per loop (in cycles) threshold."),
93  cl::init(4), cl::Hidden);
94 
96  "x86-cmov-converter-force-mem-operand",
97  cl::desc("Convert cmovs to branches whenever they have memory operands."),
98  cl::init(true), cl::Hidden);
99 
100 namespace {
101 
102 /// Converts X86 cmov instructions into branches when profitable.
103 class X86CmovConverterPass : public MachineFunctionPass {
104 public:
105  X86CmovConverterPass() : MachineFunctionPass(ID) { }
106 
107  StringRef getPassName() const override { return "X86 cmov Conversion"; }
108  bool runOnMachineFunction(MachineFunction &MF) override;
109  void getAnalysisUsage(AnalysisUsage &AU) const override;
110 
111  /// Pass identification, replacement for typeid.
112  static char ID;
113 
114 private:
115  MachineRegisterInfo *MRI = nullptr;
116  const TargetInstrInfo *TII = nullptr;
117  const TargetRegisterInfo *TRI = nullptr;
118  TargetSchedModel TSchedModel;
119 
120  /// List of consecutive CMOV instructions.
121  using CmovGroup = SmallVector<MachineInstr *, 2>;
122  using CmovGroups = SmallVector<CmovGroup, 2>;
123 
124  /// Collect all CMOV-group-candidates in \p CurrLoop and update \p
125  /// CmovInstGroups accordingly.
126  ///
127  /// \param Blocks List of blocks to process.
128  /// \param CmovInstGroups List of consecutive CMOV instructions in CurrLoop.
129  /// \returns true iff it found any CMOV-group-candidate.
130  bool collectCmovCandidates(ArrayRef<MachineBasicBlock *> Blocks,
131  CmovGroups &CmovInstGroups,
132  bool IncludeLoads = false);
133 
134  /// Check if it is profitable to transform each CMOV-group-candidates into
135  /// branch. Remove all groups that are not profitable from \p CmovInstGroups.
136  ///
137  /// \param Blocks List of blocks to process.
138  /// \param CmovInstGroups List of consecutive CMOV instructions in CurrLoop.
139  /// \returns true iff any CMOV-group-candidate remain.
140  bool checkForProfitableCmovCandidates(ArrayRef<MachineBasicBlock *> Blocks,
141  CmovGroups &CmovInstGroups);
142 
143  /// Convert the given list of consecutive CMOV instructions into a branch.
144  ///
145  /// \param Group Consecutive CMOV instructions to be converted into branch.
146  void convertCmovInstsToBranches(SmallVectorImpl<MachineInstr *> &Group) const;
147 };
148 
149 } // end anonymous namespace
150 
151 char X86CmovConverterPass::ID = 0;
152 
153 void X86CmovConverterPass::getAnalysisUsage(AnalysisUsage &AU) const {
156 }
157 
158 bool X86CmovConverterPass::runOnMachineFunction(MachineFunction &MF) {
159  if (skipFunction(MF.getFunction()))
160  return false;
161  if (!EnableCmovConverter)
162  return false;
163 
164  LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
165  << "**********\n");
166 
167  bool Changed = false;
168  MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
169  const TargetSubtargetInfo &STI = MF.getSubtarget();
170  MRI = &MF.getRegInfo();
171  TII = STI.getInstrInfo();
172  TRI = STI.getRegisterInfo();
173  TSchedModel.init(&STI);
174 
175  // Before we handle the more subtle cases of register-register CMOVs inside
176  // of potentially hot loops, we want to quickly remove all CMOVs with
177  // a memory operand. The CMOV will risk a stall waiting for the load to
178  // complete that speculative execution behind a branch is better suited to
179  // handle on modern x86 chips.
180  if (ForceMemOperand) {
181  CmovGroups AllCmovGroups;
183  for (auto &MBB : MF)
184  Blocks.push_back(&MBB);
185  if (collectCmovCandidates(Blocks, AllCmovGroups, /*IncludeLoads*/ true)) {
186  for (auto &Group : AllCmovGroups) {
187  // Skip any group that doesn't do at least one memory operand cmov.
188  if (!llvm::any_of(Group, [&](MachineInstr *I) { return I->mayLoad(); }))
189  continue;
190 
191  // For CMOV groups which we can rewrite and which contain a memory load,
192  // always rewrite them. On x86, a CMOV will dramatically amplify any
193  // memory latency by blocking speculative execution.
194  Changed = true;
195  convertCmovInstsToBranches(Group);
196  }
197  }
198  }
199 
200  //===--------------------------------------------------------------------===//
201  // Register-operand Conversion Algorithm
202  // ---------
203  // For each inner most loop
204  // collectCmovCandidates() {
205  // Find all CMOV-group-candidates.
206  // }
207  //
208  // checkForProfitableCmovCandidates() {
209  // * Calculate both loop-depth and optimized-loop-depth.
210  // * Use these depth to check for loop transformation profitability.
211  // * Check for CMOV-group-candidate transformation profitability.
212  // }
213  //
214  // For each profitable CMOV-group-candidate
215  // convertCmovInstsToBranches() {
216  // * Create FalseBB, SinkBB, Conditional branch to SinkBB.
217  // * Replace each CMOV instruction with a PHI instruction in SinkBB.
218  // }
219  //
220  // Note: For more details, see each function description.
221  //===--------------------------------------------------------------------===//
222 
223  // Build up the loops in pre-order.
225  // Note that we need to check size on each iteration as we accumulate child
226  // loops.
227  for (int i = 0; i < (int)Loops.size(); ++i)
228  for (MachineLoop *Child : Loops[i]->getSubLoops())
229  Loops.push_back(Child);
230 
231  for (MachineLoop *CurrLoop : Loops) {
232  // Optimize only inner most loops.
233  if (!CurrLoop->getSubLoops().empty())
234  continue;
235 
236  // List of consecutive CMOV instructions to be processed.
237  CmovGroups CmovInstGroups;
238 
239  if (!collectCmovCandidates(CurrLoop->getBlocks(), CmovInstGroups))
240  continue;
241 
242  if (!checkForProfitableCmovCandidates(CurrLoop->getBlocks(),
243  CmovInstGroups))
244  continue;
245 
246  Changed = true;
247  for (auto &Group : CmovInstGroups)
248  convertCmovInstsToBranches(Group);
249  }
250 
251  return Changed;
252 }
253 
254 bool X86CmovConverterPass::collectCmovCandidates(
255  ArrayRef<MachineBasicBlock *> Blocks, CmovGroups &CmovInstGroups,
256  bool IncludeLoads) {
257  //===--------------------------------------------------------------------===//
258  // Collect all CMOV-group-candidates and add them into CmovInstGroups.
259  //
260  // CMOV-group:
261  // CMOV instructions, in same MBB, that uses same EFLAGS def instruction.
262  //
263  // CMOV-group-candidate:
264  // CMOV-group where all the CMOV instructions are
265  // 1. consecutive.
266  // 2. have same condition code or opposite one.
267  // 3. have only operand registers (X86::CMOVrr).
268  //===--------------------------------------------------------------------===//
269  // List of possible improvement (TODO's):
270  // --------------------------------------
271  // TODO: Add support for X86::CMOVrm instructions.
272  // TODO: Add support for X86::SETcc instructions.
273  // TODO: Add support for CMOV-groups with non consecutive CMOV instructions.
274  //===--------------------------------------------------------------------===//
275 
276  // Current processed CMOV-Group.
277  CmovGroup Group;
278  for (auto *MBB : Blocks) {
279  Group.clear();
280  // Condition code of first CMOV instruction current processed range and its
281  // opposite condition code.
282  X86::CondCode FirstCC = X86::COND_INVALID, FirstOppCC = X86::COND_INVALID,
283  MemOpCC = X86::COND_INVALID;
284  // Indicator of a non CMOVrr instruction in the current processed range.
285  bool FoundNonCMOVInst = false;
286  // Indicator for current processed CMOV-group if it should be skipped.
287  bool SkipGroup = false;
288 
289  for (auto &I : *MBB) {
290  // Skip debug instructions.
291  if (I.isDebugInstr())
292  continue;
294  // Check if we found a X86::CMOVrr instruction.
295  if (CC != X86::COND_INVALID && (IncludeLoads || !I.mayLoad())) {
296  if (Group.empty()) {
297  // We found first CMOV in the range, reset flags.
298  FirstCC = CC;
299  FirstOppCC = X86::GetOppositeBranchCondition(CC);
300  // Clear out the prior group's memory operand CC.
301  MemOpCC = X86::COND_INVALID;
302  FoundNonCMOVInst = false;
303  SkipGroup = false;
304  }
305  Group.push_back(&I);
306  // Check if it is a non-consecutive CMOV instruction or it has different
307  // condition code than FirstCC or FirstOppCC.
308  if (FoundNonCMOVInst || (CC != FirstCC && CC != FirstOppCC))
309  // Mark the SKipGroup indicator to skip current processed CMOV-Group.
310  SkipGroup = true;
311  if (I.mayLoad()) {
312  if (MemOpCC == X86::COND_INVALID)
313  // The first memory operand CMOV.
314  MemOpCC = CC;
315  else if (CC != MemOpCC)
316  // Can't handle mixed conditions with memory operands.
317  SkipGroup = true;
318  }
319  // Check if we were relying on zero-extending behavior of the CMOV.
320  if (!SkipGroup &&
321  llvm::any_of(
322  MRI->use_nodbg_instructions(I.defs().begin()->getReg()),
323  [&](MachineInstr &UseI) {
324  return UseI.getOpcode() == X86::SUBREG_TO_REG;
325  }))
326  // FIXME: We should model the cost of using an explicit MOV to handle
327  // the zero-extension rather than just refusing to handle this.
328  SkipGroup = true;
329  continue;
330  }
331  // If Group is empty, keep looking for first CMOV in the range.
332  if (Group.empty())
333  continue;
334 
335  // We found a non X86::CMOVrr instruction.
336  FoundNonCMOVInst = true;
337  // Check if this instruction define EFLAGS, to determine end of processed
338  // range, as there would be no more instructions using current EFLAGS def.
339  if (I.definesRegister(X86::EFLAGS)) {
340  // Check if current processed CMOV-group should not be skipped and add
341  // it as a CMOV-group-candidate.
342  if (!SkipGroup)
343  CmovInstGroups.push_back(Group);
344  else
345  ++NumOfSkippedCmovGroups;
346  Group.clear();
347  }
348  }
349  // End of basic block is considered end of range, check if current processed
350  // CMOV-group should not be skipped and add it as a CMOV-group-candidate.
351  if (Group.empty())
352  continue;
353  if (!SkipGroup)
354  CmovInstGroups.push_back(Group);
355  else
356  ++NumOfSkippedCmovGroups;
357  }
358 
359  NumOfCmovGroupCandidate += CmovInstGroups.size();
360  return !CmovInstGroups.empty();
361 }
362 
363 /// \returns Depth of CMOV instruction as if it was converted into branch.
364 /// \param TrueOpDepth depth cost of CMOV true value operand.
365 /// \param FalseOpDepth depth cost of CMOV false value operand.
366 static unsigned getDepthOfOptCmov(unsigned TrueOpDepth, unsigned FalseOpDepth) {
367  // The depth of the result after branch conversion is
368  // TrueOpDepth * TrueOpProbability + FalseOpDepth * FalseOpProbability.
369  // As we have no info about branch weight, we assume 75% for one and 25% for
370  // the other, and pick the result with the largest resulting depth.
371  return std::max(
372  divideCeil(TrueOpDepth * 3 + FalseOpDepth, 4),
373  divideCeil(FalseOpDepth * 3 + TrueOpDepth, 4));
374 }
375 
376 bool X86CmovConverterPass::checkForProfitableCmovCandidates(
377  ArrayRef<MachineBasicBlock *> Blocks, CmovGroups &CmovInstGroups) {
378  struct DepthInfo {
379  /// Depth of original loop.
380  unsigned Depth;
381  /// Depth of optimized loop.
382  unsigned OptDepth;
383  };
384  /// Number of loop iterations to calculate depth for ?!
385  static const unsigned LoopIterations = 2;
387  DepthInfo LoopDepth[LoopIterations] = {{0, 0}, {0, 0}};
388  enum { PhyRegType = 0, VirRegType = 1, RegTypeNum = 2 };
389  /// For each register type maps the register to its last def instruction.
390  DenseMap<unsigned, MachineInstr *> RegDefMaps[RegTypeNum];
391  /// Maps register operand to its def instruction, which can be nullptr if it
392  /// is unknown (e.g., operand is defined outside the loop).
394 
395  // Set depth of unknown instruction (i.e., nullptr) to zero.
396  DepthMap[nullptr] = {0, 0};
397 
398  SmallPtrSet<MachineInstr *, 4> CmovInstructions;
399  for (auto &Group : CmovInstGroups)
400  CmovInstructions.insert(Group.begin(), Group.end());
401 
402  //===--------------------------------------------------------------------===//
403  // Step 1: Calculate instruction depth and loop depth.
404  // Optimized-Loop:
405  // loop with CMOV-group-candidates converted into branches.
406  //
407  // Instruction-Depth:
408  // instruction latency + max operand depth.
409  // * For CMOV instruction in optimized loop the depth is calculated as:
410  // CMOV latency + getDepthOfOptCmov(True-Op-Depth, False-Op-depth)
411  // TODO: Find a better way to estimate the latency of the branch instruction
412  // rather than using the CMOV latency.
413  //
414  // Loop-Depth:
415  // max instruction depth of all instructions in the loop.
416  // Note: instruction with max depth represents the critical-path in the loop.
417  //
418  // Loop-Depth[i]:
419  // Loop-Depth calculated for first `i` iterations.
420  // Note: it is enough to calculate depth for up to two iterations.
421  //
422  // Depth-Diff[i]:
423  // Number of cycles saved in first 'i` iterations by optimizing the loop.
424  //===--------------------------------------------------------------------===//
425  for (unsigned I = 0; I < LoopIterations; ++I) {
426  DepthInfo &MaxDepth = LoopDepth[I];
427  for (auto *MBB : Blocks) {
428  // Clear physical registers Def map.
429  RegDefMaps[PhyRegType].clear();
430  for (MachineInstr &MI : *MBB) {
431  // Skip debug instructions.
432  if (MI.isDebugInstr())
433  continue;
434  unsigned MIDepth = 0;
435  unsigned MIDepthOpt = 0;
436  bool IsCMOV = CmovInstructions.count(&MI);
437  for (auto &MO : MI.uses()) {
438  // Checks for "isUse()" as "uses()" returns also implicit definitions.
439  if (!MO.isReg() || !MO.isUse())
440  continue;
441  Register Reg = MO.getReg();
442  auto &RDM = RegDefMaps[Reg.isVirtual()];
443  if (MachineInstr *DefMI = RDM.lookup(Reg)) {
444  OperandToDefMap[&MO] = DefMI;
445  DepthInfo Info = DepthMap.lookup(DefMI);
446  MIDepth = std::max(MIDepth, Info.Depth);
447  if (!IsCMOV)
448  MIDepthOpt = std::max(MIDepthOpt, Info.OptDepth);
449  }
450  }
451 
452  if (IsCMOV)
453  MIDepthOpt = getDepthOfOptCmov(
454  DepthMap[OperandToDefMap.lookup(&MI.getOperand(1))].OptDepth,
455  DepthMap[OperandToDefMap.lookup(&MI.getOperand(2))].OptDepth);
456 
457  // Iterates over all operands to handle implicit definitions as well.
458  for (auto &MO : MI.operands()) {
459  if (!MO.isReg() || !MO.isDef())
460  continue;
461  Register Reg = MO.getReg();
462  RegDefMaps[Reg.isVirtual()][Reg] = &MI;
463  }
464 
465  unsigned Latency = TSchedModel.computeInstrLatency(&MI);
466  DepthMap[&MI] = {MIDepth += Latency, MIDepthOpt += Latency};
467  MaxDepth.Depth = std::max(MaxDepth.Depth, MIDepth);
468  MaxDepth.OptDepth = std::max(MaxDepth.OptDepth, MIDepthOpt);
469  }
470  }
471  }
472 
473  unsigned Diff[LoopIterations] = {LoopDepth[0].Depth - LoopDepth[0].OptDepth,
474  LoopDepth[1].Depth - LoopDepth[1].OptDepth};
475 
476  //===--------------------------------------------------------------------===//
477  // Step 2: Check if Loop worth to be optimized.
478  // Worth-Optimize-Loop:
479  // case 1: Diff[1] == Diff[0]
480  // Critical-path is iteration independent - there is no dependency
481  // of critical-path instructions on critical-path instructions of
482  // previous iteration.
483  // Thus, it is enough to check gain percent of 1st iteration -
484  // To be conservative, the optimized loop need to have a depth of
485  // 12.5% cycles less than original loop, per iteration.
486  //
487  // case 2: Diff[1] > Diff[0]
488  // Critical-path is iteration dependent - there is dependency of
489  // critical-path instructions on critical-path instructions of
490  // previous iteration.
491  // Thus, check the gain percent of the 2nd iteration (similar to the
492  // previous case), but it is also required to check the gradient of
493  // the gain - the change in Depth-Diff compared to the change in
494  // Loop-Depth between 1st and 2nd iterations.
495  // To be conservative, the gradient need to be at least 50%.
496  //
497  // In addition, In order not to optimize loops with very small gain, the
498  // gain (in cycles) after 2nd iteration should not be less than a given
499  // threshold. Thus, the check (Diff[1] >= GainCycleThreshold) must apply.
500  //
501  // If loop is not worth optimizing, remove all CMOV-group-candidates.
502  //===--------------------------------------------------------------------===//
503  if (Diff[1] < GainCycleThreshold)
504  return false;
505 
506  bool WorthOptLoop = false;
507  if (Diff[1] == Diff[0])
508  WorthOptLoop = Diff[0] * 8 >= LoopDepth[0].Depth;
509  else if (Diff[1] > Diff[0])
510  WorthOptLoop =
511  (Diff[1] - Diff[0]) * 2 >= (LoopDepth[1].Depth - LoopDepth[0].Depth) &&
512  (Diff[1] * 8 >= LoopDepth[1].Depth);
513 
514  if (!WorthOptLoop)
515  return false;
516 
517  ++NumOfLoopCandidate;
518 
519  //===--------------------------------------------------------------------===//
520  // Step 3: Check for each CMOV-group-candidate if it worth to be optimized.
521  // Worth-Optimize-Group:
522  // Iff it worths to optimize all CMOV instructions in the group.
523  //
524  // Worth-Optimize-CMOV:
525  // Predicted branch is faster than CMOV by the difference between depth of
526  // condition operand and depth of taken (predicted) value operand.
527  // To be conservative, the gain of such CMOV transformation should cover at
528  // at least 25% of branch-misprediction-penalty.
529  //===--------------------------------------------------------------------===//
530  unsigned MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty;
531  CmovGroups TempGroups;
532  std::swap(TempGroups, CmovInstGroups);
533  for (auto &Group : TempGroups) {
534  bool WorthOpGroup = true;
535  for (auto *MI : Group) {
536  // Avoid CMOV instruction which value is used as a pointer to load from.
537  // This is another conservative check to avoid converting CMOV instruction
538  // used with tree-search like algorithm, where the branch is unpredicted.
539  auto UIs = MRI->use_instructions(MI->defs().begin()->getReg());
540  if (!UIs.empty() && ++UIs.begin() == UIs.end()) {
541  unsigned Op = UIs.begin()->getOpcode();
542  if (Op == X86::MOV64rm || Op == X86::MOV32rm) {
543  WorthOpGroup = false;
544  break;
545  }
546  }
547 
548  unsigned CondCost =
549  DepthMap[OperandToDefMap.lookup(&MI->getOperand(4))].Depth;
550  unsigned ValCost = getDepthOfOptCmov(
551  DepthMap[OperandToDefMap.lookup(&MI->getOperand(1))].Depth,
552  DepthMap[OperandToDefMap.lookup(&MI->getOperand(2))].Depth);
553  if (ValCost > CondCost || (CondCost - ValCost) * 4 < MispredictPenalty) {
554  WorthOpGroup = false;
555  break;
556  }
557  }
558 
559  if (WorthOpGroup)
560  CmovInstGroups.push_back(Group);
561  }
562 
563  return !CmovInstGroups.empty();
564 }
565 
567  if (MI->killsRegister(X86::EFLAGS))
568  return false;
569 
570  // The EFLAGS operand of MI might be missing a kill marker.
571  // Figure out whether EFLAGS operand should LIVE after MI instruction.
572  MachineBasicBlock *BB = MI->getParent();
574 
575  // Scan forward through BB for a use/def of EFLAGS.
576  for (auto I = std::next(ItrMI), E = BB->end(); I != E; ++I) {
577  if (I->readsRegister(X86::EFLAGS))
578  return true;
579  if (I->definesRegister(X86::EFLAGS))
580  return false;
581  }
582 
583  // We hit the end of the block, check whether EFLAGS is live into a successor.
584  for (auto I = BB->succ_begin(), E = BB->succ_end(); I != E; ++I) {
585  if ((*I)->isLiveIn(X86::EFLAGS))
586  return true;
587  }
588 
589  return false;
590 }
591 
592 /// Given /p First CMOV instruction and /p Last CMOV instruction representing a
593 /// group of CMOV instructions, which may contain debug instructions in between,
594 /// move all debug instructions to after the last CMOV instruction, making the
595 /// CMOV group consecutive.
596 static void packCmovGroup(MachineInstr *First, MachineInstr *Last) {
598  "Last instruction in a CMOV group must be a CMOV instruction");
599 
600  SmallVector<MachineInstr *, 2> DBGInstructions;
601  for (auto I = First->getIterator(), E = Last->getIterator(); I != E; I++) {
602  if (I->isDebugInstr())
603  DBGInstructions.push_back(&*I);
604  }
605 
606  // Splice the debug instruction after the cmov group.
607  MachineBasicBlock *MBB = First->getParent();
608  for (auto *MI : DBGInstructions)
609  MBB->insertAfter(Last, MI->removeFromParent());
610 }
611 
612 void X86CmovConverterPass::convertCmovInstsToBranches(
613  SmallVectorImpl<MachineInstr *> &Group) const {
614  assert(!Group.empty() && "No CMOV instructions to convert");
615  ++NumOfOptimizedCmovGroups;
616 
617  // If the CMOV group is not packed, e.g., there are debug instructions between
618  // first CMOV and last CMOV, then pack the group and make the CMOV instruction
619  // consecutive by moving the debug instructions to after the last CMOV.
620  packCmovGroup(Group.front(), Group.back());
621 
622  // To convert a CMOVcc instruction, we actually have to insert the diamond
623  // control-flow pattern. The incoming instruction knows the destination vreg
624  // to set, the condition code register to branch on, the true/false values to
625  // select between, and a branch opcode to use.
626 
627  // Before
628  // -----
629  // MBB:
630  // cond = cmp ...
631  // v1 = CMOVge t1, f1, cond
632  // v2 = CMOVlt t2, f2, cond
633  // v3 = CMOVge v1, f3, cond
634  //
635  // After
636  // -----
637  // MBB:
638  // cond = cmp ...
639  // jge %SinkMBB
640  //
641  // FalseMBB:
642  // jmp %SinkMBB
643  //
644  // SinkMBB:
645  // %v1 = phi[%f1, %FalseMBB], [%t1, %MBB]
646  // %v2 = phi[%t2, %FalseMBB], [%f2, %MBB] ; For CMOV with OppCC switch
647  // ; true-value with false-value
648  // %v3 = phi[%f3, %FalseMBB], [%t1, %MBB] ; Phi instruction cannot use
649  // ; previous Phi instruction result
650 
651  MachineInstr &MI = *Group.front();
652  MachineInstr *LastCMOV = Group.back();
653  DebugLoc DL = MI.getDebugLoc();
654 
657  // Potentially swap the condition codes so that any memory operand to a CMOV
658  // is in the *false* position instead of the *true* position. We can invert
659  // any non-memory operand CMOV instructions to cope with this and we ensure
660  // memory operand CMOVs are only included with a single condition code.
661  if (llvm::any_of(Group, [&](MachineInstr *I) {
662  return I->mayLoad() && X86::getCondFromCMov(*I) == CC;
663  }))
664  std::swap(CC, OppCC);
665 
666  MachineBasicBlock *MBB = MI.getParent();
669  const BasicBlock *BB = MBB->getBasicBlock();
670 
671  MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(BB);
672  MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(BB);
673  F->insert(It, FalseMBB);
674  F->insert(It, SinkMBB);
675 
676  // If the EFLAGS register isn't dead in the terminator, then claim that it's
677  // live into the sink and copy blocks.
678  if (checkEFLAGSLive(LastCMOV)) {
679  FalseMBB->addLiveIn(X86::EFLAGS);
680  SinkMBB->addLiveIn(X86::EFLAGS);
681  }
682 
683  // Transfer the remainder of BB and its successor edges to SinkMBB.
684  SinkMBB->splice(SinkMBB->begin(), MBB,
685  std::next(MachineBasicBlock::iterator(LastCMOV)), MBB->end());
687 
688  // Add the false and sink blocks as its successors.
689  MBB->addSuccessor(FalseMBB);
690  MBB->addSuccessor(SinkMBB);
691 
692  // Create the conditional branch instruction.
693  BuildMI(MBB, DL, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(CC);
694 
695  // Add the sink block to the false block successors.
696  FalseMBB->addSuccessor(SinkMBB);
697 
701  std::next(MachineBasicBlock::iterator(LastCMOV));
702  MachineBasicBlock::iterator FalseInsertionPoint = FalseMBB->begin();
703  MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();
704 
705  // First we need to insert an explicit load on the false path for any memory
706  // operand. We also need to potentially do register rewriting here, but it is
707  // simpler as the memory operands are always on the false path so we can
708  // simply take that input, whatever it is.
709  DenseMap<unsigned, unsigned> FalseBBRegRewriteTable;
710  for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd;) {
711  auto &MI = *MIIt++;
712  // Skip any CMOVs in this group which don't load from memory.
713  if (!MI.mayLoad()) {
714  // Remember the false-side register input.
715  Register FalseReg =
716  MI.getOperand(X86::getCondFromCMov(MI) == CC ? 1 : 2).getReg();
717  // Walk back through any intermediate cmovs referenced.
718  while (true) {
719  auto FRIt = FalseBBRegRewriteTable.find(FalseReg);
720  if (FRIt == FalseBBRegRewriteTable.end())
721  break;
722  FalseReg = FRIt->second;
723  }
724  FalseBBRegRewriteTable[MI.getOperand(0).getReg()] = FalseReg;
725  continue;
726  }
727 
728  // The condition must be the *opposite* of the one we've decided to branch
729  // on as the branch will go *around* the load and the load should happen
730  // when the CMOV condition is false.
731  assert(X86::getCondFromCMov(MI) == OppCC &&
732  "Can only handle memory-operand cmov instructions with a condition "
733  "opposite to the selected branch direction.");
734 
735  // The goal is to rewrite the cmov from:
736  //
737  // MBB:
738  // %A = CMOVcc %B (tied), (mem)
739  //
740  // to
741  //
742  // MBB:
743  // %A = CMOVcc %B (tied), %C
744  // FalseMBB:
745  // %C = MOV (mem)
746  //
747  // Which will allow the next loop to rewrite the CMOV in terms of a PHI:
748  //
749  // MBB:
750  // JMP!cc SinkMBB
751  // FalseMBB:
752  // %C = MOV (mem)
753  // SinkMBB:
754  // %A = PHI [ %C, FalseMBB ], [ %B, MBB]
755 
756  // Get a fresh register to use as the destination of the MOV.
757  const TargetRegisterClass *RC = MRI->getRegClass(MI.getOperand(0).getReg());
758  Register TmpReg = MRI->createVirtualRegister(RC);
759 
761  bool Unfolded = TII->unfoldMemoryOperand(*MBB->getParent(), MI, TmpReg,
762  /*UnfoldLoad*/ true,
763  /*UnfoldStore*/ false, NewMIs);
764  (void)Unfolded;
765  assert(Unfolded && "Should never fail to unfold a loading cmov!");
766 
767  // Move the new CMOV to just before the old one and reset any impacted
768  // iterator.
769  auto *NewCMOV = NewMIs.pop_back_val();
770  assert(X86::getCondFromCMov(*NewCMOV) == OppCC &&
771  "Last new instruction isn't the expected CMOV!");
772  LLVM_DEBUG(dbgs() << "\tRewritten cmov: "; NewCMOV->dump());
774  if (&*MIItBegin == &MI)
775  MIItBegin = MachineBasicBlock::iterator(NewCMOV);
776 
777  // Sink whatever instructions were needed to produce the unfolded operand
778  // into the false block.
779  for (auto *NewMI : NewMIs) {
780  LLVM_DEBUG(dbgs() << "\tRewritten load instr: "; NewMI->dump());
781  FalseMBB->insert(FalseInsertionPoint, NewMI);
782  // Re-map any operands that are from other cmovs to the inputs for this block.
783  for (auto &MOp : NewMI->uses()) {
784  if (!MOp.isReg())
785  continue;
786  auto It = FalseBBRegRewriteTable.find(MOp.getReg());
787  if (It == FalseBBRegRewriteTable.end())
788  continue;
789 
790  MOp.setReg(It->second);
791  // This might have been a kill when it referenced the cmov result, but
792  // it won't necessarily be once rewritten.
793  // FIXME: We could potentially improve this by tracking whether the
794  // operand to the cmov was also a kill, and then skipping the PHI node
795  // construction below.
796  MOp.setIsKill(false);
797  }
798  }
800  std::next(MachineBasicBlock::iterator(MI)));
801 
802  // Add this PHI to the rewrite table.
803  FalseBBRegRewriteTable[NewCMOV->getOperand(0).getReg()] = TmpReg;
804  }
805 
806  // As we are creating the PHIs, we have to be careful if there is more than
807  // one. Later CMOVs may reference the results of earlier CMOVs, but later
808  // PHIs have to reference the individual true/false inputs from earlier PHIs.
809  // That also means that PHI construction must work forward from earlier to
810  // later, and that the code must maintain a mapping from earlier PHI's
811  // destination registers, and the registers that went into the PHI.
813 
814  for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) {
815  Register DestReg = MIIt->getOperand(0).getReg();
816  Register Op1Reg = MIIt->getOperand(1).getReg();
817  Register Op2Reg = MIIt->getOperand(2).getReg();
818 
819  // If this CMOV we are processing is the opposite condition from the jump we
820  // generated, then we have to swap the operands for the PHI that is going to
821  // be generated.
822  if (X86::getCondFromCMov(*MIIt) == OppCC)
823  std::swap(Op1Reg, Op2Reg);
824 
825  auto Op1Itr = RegRewriteTable.find(Op1Reg);
826  if (Op1Itr != RegRewriteTable.end())
827  Op1Reg = Op1Itr->second.first;
828 
829  auto Op2Itr = RegRewriteTable.find(Op2Reg);
830  if (Op2Itr != RegRewriteTable.end())
831  Op2Reg = Op2Itr->second.second;
832 
833  // SinkMBB:
834  // %Result = phi [ %FalseValue, FalseMBB ], [ %TrueValue, MBB ]
835  // ...
836  MIB = BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(X86::PHI), DestReg)
837  .addReg(Op1Reg)
838  .addMBB(FalseMBB)
839  .addReg(Op2Reg)
840  .addMBB(MBB);
841  (void)MIB;
842  LLVM_DEBUG(dbgs() << "\tFrom: "; MIIt->dump());
843  LLVM_DEBUG(dbgs() << "\tTo: "; MIB->dump());
844 
845  // Add this PHI to the rewrite table.
846  RegRewriteTable[DestReg] = std::make_pair(Op1Reg, Op2Reg);
847  }
848 
849  // Now remove the CMOV(s).
850  MBB->erase(MIItBegin, MIItEnd);
851 }
852 
853 INITIALIZE_PASS_BEGIN(X86CmovConverterPass, DEBUG_TYPE, "X86 cmov Conversion",
854  false, false)
856 INITIALIZE_PASS_END(X86CmovConverterPass, DEBUG_TYPE, "X86 cmov Conversion",
858 
860  return new X86CmovConverterPass();
861 }
i
i
Definition: README.txt:29
packCmovGroup
static void packCmovGroup(MachineInstr *First, MachineInstr *Last)
Given /p First CMOV instruction and /p Last CMOV instruction representing a group of CMOV instruction...
Definition: X86CmovConversion.cpp:596
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:100
MachineInstr.h
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
Definition: AllocatorList.h:23
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
llvm::MachineBasicBlock::getBasicBlock
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
Definition: MachineBasicBlock.h:202
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:158
llvm::Latency
@ Latency
Definition: SIMachineScheduler.h:32
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
Pass.h
llvm::TargetSubtargetInfo::getInstrInfo
virtual const TargetInstrInfo * getInstrInfo() const
Definition: TargetSubtargetInfo.h:92
Loops
Hexagon Hardware Loops
Definition: HexagonHardwareLoops.cpp:372
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
checkEFLAGSLive
static bool checkEFLAGSLive(MachineInstr *MI)
Definition: X86CmovConversion.cpp:566
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
MachineBasicBlock.h
llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Definition: TargetSubtargetInfo.h:124
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::MachineRegisterInfo::use_nodbg_instructions
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
Definition: MachineRegisterInfo.h:543
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:231
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:34
DenseMap.h
llvm::MachineRegisterInfo::use_instructions
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
Definition: MachineRegisterInfo.h:485
TargetInstrInfo.h
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
getDepthOfOptCmov
static unsigned getDepthOfOptCmov(unsigned TrueOpDepth, unsigned FalseOpDepth)
Definition: X86CmovConversion.cpp:366
STLExtras.h
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
llvm::X86::CondCode
CondCode
Definition: X86BaseInfo.h:80
llvm::X86::COND_INVALID
@ COND_INVALID
Definition: X86BaseInfo.h:107
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:102
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:90
MachineRegisterInfo.h
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::MachineBasicBlock::erase
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
Definition: MachineBasicBlock.cpp:1324
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
llvm::X86::getCondFromCMov
CondCode getCondFromCMov(const MachineInstr &MI)
Return condition code of a CMov opcode.
Definition: X86InstrInfo.cpp:2693
llvm::MachineBasicBlock::addSuccessor
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Definition: MachineBasicBlock.cpp:745
CommandLine.h
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:568
X86.h
llvm::MachineBasicBlock::insertAfter
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
Definition: MachineBasicBlock.h:784
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:97
MachineLoopInfo.h
llvm::MachineLoopInfo::end
iterator end() const
Definition: MachineLoopInfo.h:119
llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:146
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
ForceMemOperand
static cl::opt< bool > ForceMemOperand("x86-cmov-converter-force-mem-operand", cl::desc("Convert cmovs to branches whenever they have memory operands."), cl::init(true), cl::Hidden)
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
DEBUG_TYPE
#define DEBUG_TYPE
Definition: X86CmovConversion.cpp:77
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
false
Definition: StackSlotColoring.cpp:142
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
First
into llvm powi allowing the code generator to produce balanced multiplication trees First
Definition: README.txt:54
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
DebugLoc.h
SmallPtrSet.h
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:634
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:558
llvm::cl::opt< bool >
llvm::divideCeil
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:742
llvm::MachineLoop
Definition: MachineLoopInfo.h:45
TargetSchedule.h
MCSchedule.h
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::clear
void clear()
Definition: DenseMap.h:111
llvm::TargetSchedModel
Provide an instruction scheduling machine model to CodeGen passes.
Definition: TargetSchedule.h:31
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::DenseMap
Definition: DenseMap.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
ArrayRef.h
MachineFunctionPass.h
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::MachineFunction::getName
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Definition: MachineFunction.cpp:522
EnableCmovConverter
static cl::opt< bool > EnableCmovConverter("x86-cmov-converter", cl::desc("Enable the X86 cmov-to-branch optimization."), cl::init(true), cl::Hidden)
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:225
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::X86::GetOppositeBranchCondition
CondCode GetOppositeBranchCondition(CondCode CC)
GetOppositeBranchCondition - Return the inverse of the specified cond, e.g.
Definition: X86InstrInfo.cpp:2705
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::MachineFunction
Definition: MachineFunction.h:230
llvm::MachineInstr::dump
void dump() const
Definition: MachineInstr.cpp:1540
GainCycleThreshold
static cl::opt< unsigned > GainCycleThreshold("x86-cmov-converter-threshold", cl::desc("Minimum gain per loop (in cycles) threshold."), cl::init(4), cl::Hidden)
llvm::MachineBasicBlock::iterator
MachineInstrBundleIterator< MachineInstr > iterator
Definition: MachineBasicBlock.h:233
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1489
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
llvm::MachineBasicBlock::splice
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Definition: MachineBasicBlock.h:863
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
TargetSubtargetInfo.h
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:59
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MaxDepth
static const unsigned MaxDepth
Definition: InstCombineMulDivRem.cpp:853
llvm::MachineBasicBlock::addLiveIn
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
Definition: MachineBasicBlock.h:367
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:524
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:83
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:314
llvm::MachineBasicBlock::insert
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
Definition: MachineBasicBlock.cpp:1337
llvm::ilist_iterator
Iterator for intrusive lists based on ilist_node.
Definition: ilist_iterator.h:57
Conversion
X86 cmov Conversion
Definition: X86CmovConversion.cpp:856
llvm::MachineLoopInfo::begin
iterator begin() const
Definition: MachineLoopInfo.h:118
SmallVector.h
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:268
MachineInstrBuilder.h
llvm::createX86CmovConverterPass
FunctionPass * createX86CmovConverterPass()
This pass converts X86 cmov instructions into branch when profitable.
Definition: X86CmovConversion.cpp:859
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
DefMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Definition: AArch64ExpandPseudoInsts.cpp:101
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::SmallVectorImpl< MachineInstr * >
MachineOperand.h
llvm::MachineBasicBlock::transferSuccessorsAndUpdatePHIs
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
Definition: MachineBasicBlock.cpp:884
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::cl::desc
Definition: CommandLine.h:414
raw_ostream.h
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(X86CmovConverterPass, DEBUG_TYPE, "X86 cmov Conversion", false, false) INITIALIZE_PASS_END(X86CmovConverterPass
MachineFunction.h
X86InstrInfo.h
llvm::MachineInstrBundleIterator< MachineInstr >
InitializePasses.h
TargetRegisterInfo.h
Debug.h
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:270
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38