LLVM  14.0.0git
X86CmovConversion.cpp
Go to the documentation of this file.
1 //====- X86CmovConversion.cpp - Convert Cmov to Branch --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file implements a pass that converts X86 cmov instructions into
11 /// branches when profitable. This pass is conservative. It transforms if and
12 /// only if it can guarantee a gain with high confidence.
13 ///
14 /// Thus, the optimization applies under the following conditions:
15 /// 1. Consider as candidates only CMOVs in innermost loops (assume that
16 /// most hotspots are represented by these loops).
17 /// 2. Given a group of CMOV instructions that are using the same EFLAGS def
18 /// instruction:
19 /// a. Consider them as candidates only if all have the same code condition
20 /// or the opposite one to prevent generating more than one conditional
21 /// jump per EFLAGS def instruction.
22 /// b. Consider them as candidates only if all are profitable to be
23 /// converted (assume that one bad conversion may cause a degradation).
24 /// 3. Apply conversion only for loops that are found profitable and only for
25 /// CMOV candidates that were found profitable.
26 /// a. A loop is considered profitable only if conversion will reduce its
27 /// depth cost by some threshold.
28 /// b. CMOV is considered profitable if the cost of its condition is higher
29 /// than the average cost of its true-value and false-value by 25% of
30 /// branch-misprediction-penalty. This assures no degradation even with
31 /// 25% branch misprediction.
32 ///
33 /// Note: This pass is assumed to run on SSA machine code.
34 //
35 //===----------------------------------------------------------------------===//
36 //
37 // External interfaces:
38 // FunctionPass *llvm::createX86CmovConverterPass();
39 // bool X86CmovConverterPass::runOnMachineFunction(MachineFunction &MF);
40 //
41 //===----------------------------------------------------------------------===//
42 
43 #include "X86.h"
44 #include "X86InstrInfo.h"
45 #include "llvm/ADT/ArrayRef.h"
46 #include "llvm/ADT/DenseMap.h"
47 #include "llvm/ADT/STLExtras.h"
48 #include "llvm/ADT/SmallPtrSet.h"
49 #include "llvm/ADT/SmallVector.h"
50 #include "llvm/ADT/Statistic.h"
63 #include "llvm/IR/DebugLoc.h"
64 #include "llvm/InitializePasses.h"
65 #include "llvm/MC/MCSchedule.h"
66 #include "llvm/Pass.h"
68 #include "llvm/Support/Debug.h"
70 #include <algorithm>
71 #include <cassert>
72 #include <iterator>
73 #include <utility>
74 
75 using namespace llvm;
76 
77 #define DEBUG_TYPE "x86-cmov-conversion"
78 
79 STATISTIC(NumOfSkippedCmovGroups, "Number of unsupported CMOV-groups");
80 STATISTIC(NumOfCmovGroupCandidate, "Number of CMOV-group candidates");
81 STATISTIC(NumOfLoopCandidate, "Number of CMOV-conversion profitable loops");
82 STATISTIC(NumOfOptimizedCmovGroups, "Number of optimized CMOV-groups");
83 
84 // This internal switch can be used to turn off the cmov/branch optimization.
85 static cl::opt<bool>
86  EnableCmovConverter("x86-cmov-converter",
87  cl::desc("Enable the X86 cmov-to-branch optimization."),
88  cl::init(true), cl::Hidden);
89 
90 static cl::opt<unsigned>
91  GainCycleThreshold("x86-cmov-converter-threshold",
92  cl::desc("Minimum gain per loop (in cycles) threshold."),
93  cl::init(4), cl::Hidden);
94 
96  "x86-cmov-converter-force-mem-operand",
97  cl::desc("Convert cmovs to branches whenever they have memory operands."),
98  cl::init(true), cl::Hidden);
99 
100 namespace {
101 
102 /// Converts X86 cmov instructions into branches when profitable.
103 class X86CmovConverterPass : public MachineFunctionPass {
104 public:
105  X86CmovConverterPass() : MachineFunctionPass(ID) { }
106 
107  StringRef getPassName() const override { return "X86 cmov Conversion"; }
108  bool runOnMachineFunction(MachineFunction &MF) override;
109  void getAnalysisUsage(AnalysisUsage &AU) const override;
110 
111  /// Pass identification, replacement for typeid.
112  static char ID;
113 
114 private:
115  MachineRegisterInfo *MRI = nullptr;
116  const TargetInstrInfo *TII = nullptr;
117  const TargetRegisterInfo *TRI = nullptr;
118  MachineLoopInfo *MLI = nullptr;
119  TargetSchedModel TSchedModel;
120 
121  /// List of consecutive CMOV instructions.
122  using CmovGroup = SmallVector<MachineInstr *, 2>;
123  using CmovGroups = SmallVector<CmovGroup, 2>;
124 
125  /// Collect all CMOV-group-candidates in \p CurrLoop and update \p
126  /// CmovInstGroups accordingly.
127  ///
128  /// \param Blocks List of blocks to process.
129  /// \param CmovInstGroups List of consecutive CMOV instructions in CurrLoop.
130  /// \returns true iff it found any CMOV-group-candidate.
131  bool collectCmovCandidates(ArrayRef<MachineBasicBlock *> Blocks,
132  CmovGroups &CmovInstGroups,
133  bool IncludeLoads = false);
134 
135  /// Check if it is profitable to transform each CMOV-group-candidates into
136  /// branch. Remove all groups that are not profitable from \p CmovInstGroups.
137  ///
138  /// \param Blocks List of blocks to process.
139  /// \param CmovInstGroups List of consecutive CMOV instructions in CurrLoop.
140  /// \returns true iff any CMOV-group-candidate remain.
141  bool checkForProfitableCmovCandidates(ArrayRef<MachineBasicBlock *> Blocks,
142  CmovGroups &CmovInstGroups);
143 
144  /// Convert the given list of consecutive CMOV instructions into a branch.
145  ///
146  /// \param Group Consecutive CMOV instructions to be converted into branch.
147  void convertCmovInstsToBranches(SmallVectorImpl<MachineInstr *> &Group) const;
148 };
149 
150 } // end anonymous namespace
151 
152 char X86CmovConverterPass::ID = 0;
153 
154 void X86CmovConverterPass::getAnalysisUsage(AnalysisUsage &AU) const {
157 }
158 
159 bool X86CmovConverterPass::runOnMachineFunction(MachineFunction &MF) {
160  if (skipFunction(MF.getFunction()))
161  return false;
162  if (!EnableCmovConverter)
163  return false;
164 
165  LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
166  << "**********\n");
167 
168  bool Changed = false;
169  MLI = &getAnalysis<MachineLoopInfo>();
170  const TargetSubtargetInfo &STI = MF.getSubtarget();
171  MRI = &MF.getRegInfo();
172  TII = STI.getInstrInfo();
173  TRI = STI.getRegisterInfo();
174  TSchedModel.init(&STI);
175 
176  // Before we handle the more subtle cases of register-register CMOVs inside
177  // of potentially hot loops, we want to quickly remove all CMOVs with
178  // a memory operand. The CMOV will risk a stall waiting for the load to
179  // complete that speculative execution behind a branch is better suited to
180  // handle on modern x86 chips.
181  if (ForceMemOperand) {
182  CmovGroups AllCmovGroups;
184  for (auto &MBB : MF)
185  Blocks.push_back(&MBB);
186  if (collectCmovCandidates(Blocks, AllCmovGroups, /*IncludeLoads*/ true)) {
187  for (auto &Group : AllCmovGroups) {
188  // Skip any group that doesn't do at least one memory operand cmov.
189  if (!llvm::any_of(Group, [&](MachineInstr *I) { return I->mayLoad(); }))
190  continue;
191 
192  // For CMOV groups which we can rewrite and which contain a memory load,
193  // always rewrite them. On x86, a CMOV will dramatically amplify any
194  // memory latency by blocking speculative execution.
195  Changed = true;
196  convertCmovInstsToBranches(Group);
197  }
198  }
199  }
200 
201  //===--------------------------------------------------------------------===//
202  // Register-operand Conversion Algorithm
203  // ---------
204  // For each inner most loop
205  // collectCmovCandidates() {
206  // Find all CMOV-group-candidates.
207  // }
208  //
209  // checkForProfitableCmovCandidates() {
210  // * Calculate both loop-depth and optimized-loop-depth.
211  // * Use these depth to check for loop transformation profitability.
212  // * Check for CMOV-group-candidate transformation profitability.
213  // }
214  //
215  // For each profitable CMOV-group-candidate
216  // convertCmovInstsToBranches() {
217  // * Create FalseBB, SinkBB, Conditional branch to SinkBB.
218  // * Replace each CMOV instruction with a PHI instruction in SinkBB.
219  // }
220  //
221  // Note: For more details, see each function description.
222  //===--------------------------------------------------------------------===//
223 
224  // Build up the loops in pre-order.
225  SmallVector<MachineLoop *, 4> Loops(MLI->begin(), MLI->end());
226  // Note that we need to check size on each iteration as we accumulate child
227  // loops.
228  for (int i = 0; i < (int)Loops.size(); ++i)
229  for (MachineLoop *Child : Loops[i]->getSubLoops())
230  Loops.push_back(Child);
231 
232  for (MachineLoop *CurrLoop : Loops) {
233  // Optimize only inner most loops.
234  if (!CurrLoop->getSubLoops().empty())
235  continue;
236 
237  // List of consecutive CMOV instructions to be processed.
238  CmovGroups CmovInstGroups;
239 
240  if (!collectCmovCandidates(CurrLoop->getBlocks(), CmovInstGroups))
241  continue;
242 
243  if (!checkForProfitableCmovCandidates(CurrLoop->getBlocks(),
244  CmovInstGroups))
245  continue;
246 
247  Changed = true;
248  for (auto &Group : CmovInstGroups)
249  convertCmovInstsToBranches(Group);
250  }
251 
252  return Changed;
253 }
254 
255 bool X86CmovConverterPass::collectCmovCandidates(
256  ArrayRef<MachineBasicBlock *> Blocks, CmovGroups &CmovInstGroups,
257  bool IncludeLoads) {
258  //===--------------------------------------------------------------------===//
259  // Collect all CMOV-group-candidates and add them into CmovInstGroups.
260  //
261  // CMOV-group:
262  // CMOV instructions, in same MBB, that uses same EFLAGS def instruction.
263  //
264  // CMOV-group-candidate:
265  // CMOV-group where all the CMOV instructions are
266  // 1. consecutive.
267  // 2. have same condition code or opposite one.
268  // 3. have only operand registers (X86::CMOVrr).
269  //===--------------------------------------------------------------------===//
270  // List of possible improvement (TODO's):
271  // --------------------------------------
272  // TODO: Add support for X86::CMOVrm instructions.
273  // TODO: Add support for X86::SETcc instructions.
274  // TODO: Add support for CMOV-groups with non consecutive CMOV instructions.
275  //===--------------------------------------------------------------------===//
276 
277  // Current processed CMOV-Group.
278  CmovGroup Group;
279  for (auto *MBB : Blocks) {
280  Group.clear();
281  // Condition code of first CMOV instruction current processed range and its
282  // opposite condition code.
283  X86::CondCode FirstCC = X86::COND_INVALID, FirstOppCC = X86::COND_INVALID,
284  MemOpCC = X86::COND_INVALID;
285  // Indicator of a non CMOVrr instruction in the current processed range.
286  bool FoundNonCMOVInst = false;
287  // Indicator for current processed CMOV-group if it should be skipped.
288  bool SkipGroup = false;
289 
290  for (auto &I : *MBB) {
291  // Skip debug instructions.
292  if (I.isDebugInstr())
293  continue;
295  // Check if we found a X86::CMOVrr instruction.
296  if (CC != X86::COND_INVALID && (IncludeLoads || !I.mayLoad())) {
297  if (Group.empty()) {
298  // We found first CMOV in the range, reset flags.
299  FirstCC = CC;
300  FirstOppCC = X86::GetOppositeBranchCondition(CC);
301  // Clear out the prior group's memory operand CC.
302  MemOpCC = X86::COND_INVALID;
303  FoundNonCMOVInst = false;
304  SkipGroup = false;
305  }
306  Group.push_back(&I);
307  // Check if it is a non-consecutive CMOV instruction or it has different
308  // condition code than FirstCC or FirstOppCC.
309  if (FoundNonCMOVInst || (CC != FirstCC && CC != FirstOppCC))
310  // Mark the SKipGroup indicator to skip current processed CMOV-Group.
311  SkipGroup = true;
312  if (I.mayLoad()) {
313  if (MemOpCC == X86::COND_INVALID)
314  // The first memory operand CMOV.
315  MemOpCC = CC;
316  else if (CC != MemOpCC)
317  // Can't handle mixed conditions with memory operands.
318  SkipGroup = true;
319  }
320  // Check if we were relying on zero-extending behavior of the CMOV.
321  if (!SkipGroup &&
322  llvm::any_of(
323  MRI->use_nodbg_instructions(I.defs().begin()->getReg()),
324  [&](MachineInstr &UseI) {
325  return UseI.getOpcode() == X86::SUBREG_TO_REG;
326  }))
327  // FIXME: We should model the cost of using an explicit MOV to handle
328  // the zero-extension rather than just refusing to handle this.
329  SkipGroup = true;
330  continue;
331  }
332  // If Group is empty, keep looking for first CMOV in the range.
333  if (Group.empty())
334  continue;
335 
336  // We found a non X86::CMOVrr instruction.
337  FoundNonCMOVInst = true;
338  // Check if this instruction define EFLAGS, to determine end of processed
339  // range, as there would be no more instructions using current EFLAGS def.
340  if (I.definesRegister(X86::EFLAGS)) {
341  // Check if current processed CMOV-group should not be skipped and add
342  // it as a CMOV-group-candidate.
343  if (!SkipGroup)
344  CmovInstGroups.push_back(Group);
345  else
346  ++NumOfSkippedCmovGroups;
347  Group.clear();
348  }
349  }
350  // End of basic block is considered end of range, check if current processed
351  // CMOV-group should not be skipped and add it as a CMOV-group-candidate.
352  if (Group.empty())
353  continue;
354  if (!SkipGroup)
355  CmovInstGroups.push_back(Group);
356  else
357  ++NumOfSkippedCmovGroups;
358  }
359 
360  NumOfCmovGroupCandidate += CmovInstGroups.size();
361  return !CmovInstGroups.empty();
362 }
363 
364 /// \returns Depth of CMOV instruction as if it was converted into branch.
365 /// \param TrueOpDepth depth cost of CMOV true value operand.
366 /// \param FalseOpDepth depth cost of CMOV false value operand.
367 static unsigned getDepthOfOptCmov(unsigned TrueOpDepth, unsigned FalseOpDepth) {
368  // The depth of the result after branch conversion is
369  // TrueOpDepth * TrueOpProbability + FalseOpDepth * FalseOpProbability.
370  // As we have no info about branch weight, we assume 75% for one and 25% for
371  // the other, and pick the result with the largest resulting depth.
372  return std::max(
373  divideCeil(TrueOpDepth * 3 + FalseOpDepth, 4),
374  divideCeil(FalseOpDepth * 3 + TrueOpDepth, 4));
375 }
376 
377 bool X86CmovConverterPass::checkForProfitableCmovCandidates(
378  ArrayRef<MachineBasicBlock *> Blocks, CmovGroups &CmovInstGroups) {
379  struct DepthInfo {
380  /// Depth of original loop.
381  unsigned Depth;
382  /// Depth of optimized loop.
383  unsigned OptDepth;
384  };
385  /// Number of loop iterations to calculate depth for ?!
386  static const unsigned LoopIterations = 2;
388  DepthInfo LoopDepth[LoopIterations] = {{0, 0}, {0, 0}};
389  enum { PhyRegType = 0, VirRegType = 1, RegTypeNum = 2 };
390  /// For each register type maps the register to its last def instruction.
391  DenseMap<unsigned, MachineInstr *> RegDefMaps[RegTypeNum];
392  /// Maps register operand to its def instruction, which can be nullptr if it
393  /// is unknown (e.g., operand is defined outside the loop).
395 
396  // Set depth of unknown instruction (i.e., nullptr) to zero.
397  DepthMap[nullptr] = {0, 0};
398 
399  SmallPtrSet<MachineInstr *, 4> CmovInstructions;
400  for (auto &Group : CmovInstGroups)
401  CmovInstructions.insert(Group.begin(), Group.end());
402 
403  //===--------------------------------------------------------------------===//
404  // Step 1: Calculate instruction depth and loop depth.
405  // Optimized-Loop:
406  // loop with CMOV-group-candidates converted into branches.
407  //
408  // Instruction-Depth:
409  // instruction latency + max operand depth.
410  // * For CMOV instruction in optimized loop the depth is calculated as:
411  // CMOV latency + getDepthOfOptCmov(True-Op-Depth, False-Op-depth)
412  // TODO: Find a better way to estimate the latency of the branch instruction
413  // rather than using the CMOV latency.
414  //
415  // Loop-Depth:
416  // max instruction depth of all instructions in the loop.
417  // Note: instruction with max depth represents the critical-path in the loop.
418  //
419  // Loop-Depth[i]:
420  // Loop-Depth calculated for first `i` iterations.
421  // Note: it is enough to calculate depth for up to two iterations.
422  //
423  // Depth-Diff[i]:
424  // Number of cycles saved in first 'i` iterations by optimizing the loop.
425  //===--------------------------------------------------------------------===//
426  for (unsigned I = 0; I < LoopIterations; ++I) {
427  DepthInfo &MaxDepth = LoopDepth[I];
428  for (auto *MBB : Blocks) {
429  // Clear physical registers Def map.
430  RegDefMaps[PhyRegType].clear();
431  for (MachineInstr &MI : *MBB) {
432  // Skip debug instructions.
433  if (MI.isDebugInstr())
434  continue;
435  unsigned MIDepth = 0;
436  unsigned MIDepthOpt = 0;
437  bool IsCMOV = CmovInstructions.count(&MI);
438  for (auto &MO : MI.uses()) {
439  // Checks for "isUse()" as "uses()" returns also implicit definitions.
440  if (!MO.isReg() || !MO.isUse())
441  continue;
442  Register Reg = MO.getReg();
443  auto &RDM = RegDefMaps[Reg.isVirtual()];
444  if (MachineInstr *DefMI = RDM.lookup(Reg)) {
445  OperandToDefMap[&MO] = DefMI;
446  DepthInfo Info = DepthMap.lookup(DefMI);
447  MIDepth = std::max(MIDepth, Info.Depth);
448  if (!IsCMOV)
449  MIDepthOpt = std::max(MIDepthOpt, Info.OptDepth);
450  }
451  }
452 
453  if (IsCMOV)
454  MIDepthOpt = getDepthOfOptCmov(
455  DepthMap[OperandToDefMap.lookup(&MI.getOperand(1))].OptDepth,
456  DepthMap[OperandToDefMap.lookup(&MI.getOperand(2))].OptDepth);
457 
458  // Iterates over all operands to handle implicit definitions as well.
459  for (auto &MO : MI.operands()) {
460  if (!MO.isReg() || !MO.isDef())
461  continue;
462  Register Reg = MO.getReg();
463  RegDefMaps[Reg.isVirtual()][Reg] = &MI;
464  }
465 
466  unsigned Latency = TSchedModel.computeInstrLatency(&MI);
467  DepthMap[&MI] = {MIDepth += Latency, MIDepthOpt += Latency};
468  MaxDepth.Depth = std::max(MaxDepth.Depth, MIDepth);
469  MaxDepth.OptDepth = std::max(MaxDepth.OptDepth, MIDepthOpt);
470  }
471  }
472  }
473 
474  unsigned Diff[LoopIterations] = {LoopDepth[0].Depth - LoopDepth[0].OptDepth,
475  LoopDepth[1].Depth - LoopDepth[1].OptDepth};
476 
477  //===--------------------------------------------------------------------===//
478  // Step 2: Check if Loop worth to be optimized.
479  // Worth-Optimize-Loop:
480  // case 1: Diff[1] == Diff[0]
481  // Critical-path is iteration independent - there is no dependency
482  // of critical-path instructions on critical-path instructions of
483  // previous iteration.
484  // Thus, it is enough to check gain percent of 1st iteration -
485  // To be conservative, the optimized loop need to have a depth of
486  // 12.5% cycles less than original loop, per iteration.
487  //
488  // case 2: Diff[1] > Diff[0]
489  // Critical-path is iteration dependent - there is dependency of
490  // critical-path instructions on critical-path instructions of
491  // previous iteration.
492  // Thus, check the gain percent of the 2nd iteration (similar to the
493  // previous case), but it is also required to check the gradient of
494  // the gain - the change in Depth-Diff compared to the change in
495  // Loop-Depth between 1st and 2nd iterations.
496  // To be conservative, the gradient need to be at least 50%.
497  //
498  // In addition, In order not to optimize loops with very small gain, the
499  // gain (in cycles) after 2nd iteration should not be less than a given
500  // threshold. Thus, the check (Diff[1] >= GainCycleThreshold) must apply.
501  //
502  // If loop is not worth optimizing, remove all CMOV-group-candidates.
503  //===--------------------------------------------------------------------===//
504  if (Diff[1] < GainCycleThreshold)
505  return false;
506 
507  bool WorthOptLoop = false;
508  if (Diff[1] == Diff[0])
509  WorthOptLoop = Diff[0] * 8 >= LoopDepth[0].Depth;
510  else if (Diff[1] > Diff[0])
511  WorthOptLoop =
512  (Diff[1] - Diff[0]) * 2 >= (LoopDepth[1].Depth - LoopDepth[0].Depth) &&
513  (Diff[1] * 8 >= LoopDepth[1].Depth);
514 
515  if (!WorthOptLoop)
516  return false;
517 
518  ++NumOfLoopCandidate;
519 
520  //===--------------------------------------------------------------------===//
521  // Step 3: Check for each CMOV-group-candidate if it worth to be optimized.
522  // Worth-Optimize-Group:
523  // Iff it worths to optimize all CMOV instructions in the group.
524  //
525  // Worth-Optimize-CMOV:
526  // Predicted branch is faster than CMOV by the difference between depth of
527  // condition operand and depth of taken (predicted) value operand.
528  // To be conservative, the gain of such CMOV transformation should cover at
529  // at least 25% of branch-misprediction-penalty.
530  //===--------------------------------------------------------------------===//
531  unsigned MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty;
532  CmovGroups TempGroups;
533  std::swap(TempGroups, CmovInstGroups);
534  for (auto &Group : TempGroups) {
535  bool WorthOpGroup = true;
536  for (auto *MI : Group) {
537  // Avoid CMOV instruction which value is used as a pointer to load from.
538  // This is another conservative check to avoid converting CMOV instruction
539  // used with tree-search like algorithm, where the branch is unpredicted.
540  auto UIs = MRI->use_instructions(MI->defs().begin()->getReg());
541  if (!UIs.empty() && ++UIs.begin() == UIs.end()) {
542  unsigned Op = UIs.begin()->getOpcode();
543  if (Op == X86::MOV64rm || Op == X86::MOV32rm) {
544  WorthOpGroup = false;
545  break;
546  }
547  }
548 
549  unsigned CondCost =
550  DepthMap[OperandToDefMap.lookup(&MI->getOperand(4))].Depth;
551  unsigned ValCost = getDepthOfOptCmov(
552  DepthMap[OperandToDefMap.lookup(&MI->getOperand(1))].Depth,
553  DepthMap[OperandToDefMap.lookup(&MI->getOperand(2))].Depth);
554  if (ValCost > CondCost || (CondCost - ValCost) * 4 < MispredictPenalty) {
555  WorthOpGroup = false;
556  break;
557  }
558  }
559 
560  if (WorthOpGroup)
561  CmovInstGroups.push_back(Group);
562  }
563 
564  return !CmovInstGroups.empty();
565 }
566 
568  if (MI->killsRegister(X86::EFLAGS))
569  return false;
570 
571  // The EFLAGS operand of MI might be missing a kill marker.
572  // Figure out whether EFLAGS operand should LIVE after MI instruction.
573  MachineBasicBlock *BB = MI->getParent();
575 
576  // Scan forward through BB for a use/def of EFLAGS.
577  for (auto I = std::next(ItrMI), E = BB->end(); I != E; ++I) {
578  if (I->readsRegister(X86::EFLAGS))
579  return true;
580  if (I->definesRegister(X86::EFLAGS))
581  return false;
582  }
583 
584  // We hit the end of the block, check whether EFLAGS is live into a successor.
585  for (auto I = BB->succ_begin(), E = BB->succ_end(); I != E; ++I) {
586  if ((*I)->isLiveIn(X86::EFLAGS))
587  return true;
588  }
589 
590  return false;
591 }
592 
593 /// Given /p First CMOV instruction and /p Last CMOV instruction representing a
594 /// group of CMOV instructions, which may contain debug instructions in between,
595 /// move all debug instructions to after the last CMOV instruction, making the
596 /// CMOV group consecutive.
597 static void packCmovGroup(MachineInstr *First, MachineInstr *Last) {
599  "Last instruction in a CMOV group must be a CMOV instruction");
600 
601  SmallVector<MachineInstr *, 2> DBGInstructions;
602  for (auto I = First->getIterator(), E = Last->getIterator(); I != E; I++) {
603  if (I->isDebugInstr())
604  DBGInstructions.push_back(&*I);
605  }
606 
607  // Splice the debug instruction after the cmov group.
608  MachineBasicBlock *MBB = First->getParent();
609  for (auto *MI : DBGInstructions)
610  MBB->insertAfter(Last, MI->removeFromParent());
611 }
612 
613 void X86CmovConverterPass::convertCmovInstsToBranches(
614  SmallVectorImpl<MachineInstr *> &Group) const {
615  assert(!Group.empty() && "No CMOV instructions to convert");
616  ++NumOfOptimizedCmovGroups;
617 
618  // If the CMOV group is not packed, e.g., there are debug instructions between
619  // first CMOV and last CMOV, then pack the group and make the CMOV instruction
620  // consecutive by moving the debug instructions to after the last CMOV.
621  packCmovGroup(Group.front(), Group.back());
622 
623  // To convert a CMOVcc instruction, we actually have to insert the diamond
624  // control-flow pattern. The incoming instruction knows the destination vreg
625  // to set, the condition code register to branch on, the true/false values to
626  // select between, and a branch opcode to use.
627 
628  // Before
629  // -----
630  // MBB:
631  // cond = cmp ...
632  // v1 = CMOVge t1, f1, cond
633  // v2 = CMOVlt t2, f2, cond
634  // v3 = CMOVge v1, f3, cond
635  //
636  // After
637  // -----
638  // MBB:
639  // cond = cmp ...
640  // jge %SinkMBB
641  //
642  // FalseMBB:
643  // jmp %SinkMBB
644  //
645  // SinkMBB:
646  // %v1 = phi[%f1, %FalseMBB], [%t1, %MBB]
647  // %v2 = phi[%t2, %FalseMBB], [%f2, %MBB] ; For CMOV with OppCC switch
648  // ; true-value with false-value
649  // %v3 = phi[%f3, %FalseMBB], [%t1, %MBB] ; Phi instruction cannot use
650  // ; previous Phi instruction result
651 
652  MachineInstr &MI = *Group.front();
653  MachineInstr *LastCMOV = Group.back();
654  DebugLoc DL = MI.getDebugLoc();
655 
658  // Potentially swap the condition codes so that any memory operand to a CMOV
659  // is in the *false* position instead of the *true* position. We can invert
660  // any non-memory operand CMOV instructions to cope with this and we ensure
661  // memory operand CMOVs are only included with a single condition code.
662  if (llvm::any_of(Group, [&](MachineInstr *I) {
663  return I->mayLoad() && X86::getCondFromCMov(*I) == CC;
664  }))
665  std::swap(CC, OppCC);
666 
667  MachineBasicBlock *MBB = MI.getParent();
670  const BasicBlock *BB = MBB->getBasicBlock();
671 
672  MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(BB);
673  MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(BB);
674  F->insert(It, FalseMBB);
675  F->insert(It, SinkMBB);
676 
677  // If the EFLAGS register isn't dead in the terminator, then claim that it's
678  // live into the sink and copy blocks.
679  if (checkEFLAGSLive(LastCMOV)) {
680  FalseMBB->addLiveIn(X86::EFLAGS);
681  SinkMBB->addLiveIn(X86::EFLAGS);
682  }
683 
684  // Transfer the remainder of BB and its successor edges to SinkMBB.
685  SinkMBB->splice(SinkMBB->begin(), MBB,
686  std::next(MachineBasicBlock::iterator(LastCMOV)), MBB->end());
688 
689  // Add the false and sink blocks as its successors.
690  MBB->addSuccessor(FalseMBB);
691  MBB->addSuccessor(SinkMBB);
692 
693  // Create the conditional branch instruction.
694  BuildMI(MBB, DL, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(CC);
695 
696  // Add the sink block to the false block successors.
697  FalseMBB->addSuccessor(SinkMBB);
698 
702  std::next(MachineBasicBlock::iterator(LastCMOV));
703  MachineBasicBlock::iterator FalseInsertionPoint = FalseMBB->begin();
704  MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();
705 
706  // First we need to insert an explicit load on the false path for any memory
707  // operand. We also need to potentially do register rewriting here, but it is
708  // simpler as the memory operands are always on the false path so we can
709  // simply take that input, whatever it is.
710  DenseMap<unsigned, unsigned> FalseBBRegRewriteTable;
711  for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd;) {
712  auto &MI = *MIIt++;
713  // Skip any CMOVs in this group which don't load from memory.
714  if (!MI.mayLoad()) {
715  // Remember the false-side register input.
716  Register FalseReg =
717  MI.getOperand(X86::getCondFromCMov(MI) == CC ? 1 : 2).getReg();
718  // Walk back through any intermediate cmovs referenced.
719  while (true) {
720  auto FRIt = FalseBBRegRewriteTable.find(FalseReg);
721  if (FRIt == FalseBBRegRewriteTable.end())
722  break;
723  FalseReg = FRIt->second;
724  }
725  FalseBBRegRewriteTable[MI.getOperand(0).getReg()] = FalseReg;
726  continue;
727  }
728 
729  // The condition must be the *opposite* of the one we've decided to branch
730  // on as the branch will go *around* the load and the load should happen
731  // when the CMOV condition is false.
732  assert(X86::getCondFromCMov(MI) == OppCC &&
733  "Can only handle memory-operand cmov instructions with a condition "
734  "opposite to the selected branch direction.");
735 
736  // The goal is to rewrite the cmov from:
737  //
738  // MBB:
739  // %A = CMOVcc %B (tied), (mem)
740  //
741  // to
742  //
743  // MBB:
744  // %A = CMOVcc %B (tied), %C
745  // FalseMBB:
746  // %C = MOV (mem)
747  //
748  // Which will allow the next loop to rewrite the CMOV in terms of a PHI:
749  //
750  // MBB:
751  // JMP!cc SinkMBB
752  // FalseMBB:
753  // %C = MOV (mem)
754  // SinkMBB:
755  // %A = PHI [ %C, FalseMBB ], [ %B, MBB]
756 
757  // Get a fresh register to use as the destination of the MOV.
758  const TargetRegisterClass *RC = MRI->getRegClass(MI.getOperand(0).getReg());
759  Register TmpReg = MRI->createVirtualRegister(RC);
760 
762  bool Unfolded = TII->unfoldMemoryOperand(*MBB->getParent(), MI, TmpReg,
763  /*UnfoldLoad*/ true,
764  /*UnfoldStore*/ false, NewMIs);
765  (void)Unfolded;
766  assert(Unfolded && "Should never fail to unfold a loading cmov!");
767 
768  // Move the new CMOV to just before the old one and reset any impacted
769  // iterator.
770  auto *NewCMOV = NewMIs.pop_back_val();
771  assert(X86::getCondFromCMov(*NewCMOV) == OppCC &&
772  "Last new instruction isn't the expected CMOV!");
773  LLVM_DEBUG(dbgs() << "\tRewritten cmov: "; NewCMOV->dump());
775  if (&*MIItBegin == &MI)
776  MIItBegin = MachineBasicBlock::iterator(NewCMOV);
777 
778  // Sink whatever instructions were needed to produce the unfolded operand
779  // into the false block.
780  for (auto *NewMI : NewMIs) {
781  LLVM_DEBUG(dbgs() << "\tRewritten load instr: "; NewMI->dump());
782  FalseMBB->insert(FalseInsertionPoint, NewMI);
783  // Re-map any operands that are from other cmovs to the inputs for this block.
784  for (auto &MOp : NewMI->uses()) {
785  if (!MOp.isReg())
786  continue;
787  auto It = FalseBBRegRewriteTable.find(MOp.getReg());
788  if (It == FalseBBRegRewriteTable.end())
789  continue;
790 
791  MOp.setReg(It->second);
792  // This might have been a kill when it referenced the cmov result, but
793  // it won't necessarily be once rewritten.
794  // FIXME: We could potentially improve this by tracking whether the
795  // operand to the cmov was also a kill, and then skipping the PHI node
796  // construction below.
797  MOp.setIsKill(false);
798  }
799  }
801  std::next(MachineBasicBlock::iterator(MI)));
802 
803  // Add this PHI to the rewrite table.
804  FalseBBRegRewriteTable[NewCMOV->getOperand(0).getReg()] = TmpReg;
805  }
806 
807  // As we are creating the PHIs, we have to be careful if there is more than
808  // one. Later CMOVs may reference the results of earlier CMOVs, but later
809  // PHIs have to reference the individual true/false inputs from earlier PHIs.
810  // That also means that PHI construction must work forward from earlier to
811  // later, and that the code must maintain a mapping from earlier PHI's
812  // destination registers, and the registers that went into the PHI.
814 
815  for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) {
816  Register DestReg = MIIt->getOperand(0).getReg();
817  Register Op1Reg = MIIt->getOperand(1).getReg();
818  Register Op2Reg = MIIt->getOperand(2).getReg();
819 
820  // If this CMOV we are processing is the opposite condition from the jump we
821  // generated, then we have to swap the operands for the PHI that is going to
822  // be generated.
823  if (X86::getCondFromCMov(*MIIt) == OppCC)
824  std::swap(Op1Reg, Op2Reg);
825 
826  auto Op1Itr = RegRewriteTable.find(Op1Reg);
827  if (Op1Itr != RegRewriteTable.end())
828  Op1Reg = Op1Itr->second.first;
829 
830  auto Op2Itr = RegRewriteTable.find(Op2Reg);
831  if (Op2Itr != RegRewriteTable.end())
832  Op2Reg = Op2Itr->second.second;
833 
834  // SinkMBB:
835  // %Result = phi [ %FalseValue, FalseMBB ], [ %TrueValue, MBB ]
836  // ...
837  MIB = BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(X86::PHI), DestReg)
838  .addReg(Op1Reg)
839  .addMBB(FalseMBB)
840  .addReg(Op2Reg)
841  .addMBB(MBB);
842  (void)MIB;
843  LLVM_DEBUG(dbgs() << "\tFrom: "; MIIt->dump());
844  LLVM_DEBUG(dbgs() << "\tTo: "; MIB->dump());
845 
846  // Add this PHI to the rewrite table.
847  RegRewriteTable[DestReg] = std::make_pair(Op1Reg, Op2Reg);
848  }
849 
850  // Now remove the CMOV(s).
851  MBB->erase(MIItBegin, MIItEnd);
852 
853  // Add new basic blocks to MachineLoopInfo.
854  if (MachineLoop *L = MLI->getLoopFor(MBB)) {
855  L->addBasicBlockToLoop(FalseMBB, MLI->getBase());
856  L->addBasicBlockToLoop(SinkMBB, MLI->getBase());
857  }
858 }
859 
860 INITIALIZE_PASS_BEGIN(X86CmovConverterPass, DEBUG_TYPE, "X86 cmov Conversion",
861  false, false)
863 INITIALIZE_PASS_END(X86CmovConverterPass, DEBUG_TYPE, "X86 cmov Conversion",
865 
867  return new X86CmovConverterPass();
868 }
i
i
Definition: README.txt:29
packCmovGroup
static void packCmovGroup(MachineInstr *First, MachineInstr *Last)
Given /p First CMOV instruction and /p Last CMOV instruction representing a group of CMOV instruction...
Definition: X86CmovConversion.cpp:597
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:103
MachineInstr.h
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
llvm::MachineBasicBlock::getBasicBlock
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
Definition: MachineBasicBlock.h:202
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:158
llvm::Latency
@ Latency
Definition: SIMachineScheduler.h:34
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
Pass.h
llvm::TargetSubtargetInfo::getInstrInfo
virtual const TargetInstrInfo * getInstrInfo() const
Definition: TargetSubtargetInfo.h:92
Loops
Hexagon Hardware Loops
Definition: HexagonHardwareLoops.cpp:372
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
checkEFLAGSLive
static bool checkEFLAGSLive(MachineInstr *MI)
Definition: X86CmovConversion.cpp:567
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
MachineBasicBlock.h
llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Definition: TargetSubtargetInfo.h:124
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::MachineRegisterInfo::use_nodbg_instructions
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
Definition: MachineRegisterInfo.h:543
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:233
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:36
DenseMap.h
llvm::MachineRegisterInfo::use_instructions
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
Definition: MachineRegisterInfo.h:485
TargetInstrInfo.h
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
getDepthOfOptCmov
static unsigned getDepthOfOptCmov(unsigned TrueOpDepth, unsigned FalseOpDepth)
Definition: X86CmovConversion.cpp:367
STLExtras.h
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
llvm::X86::CondCode
CondCode
Definition: X86BaseInfo.h:80
llvm::X86::COND_INVALID
@ COND_INVALID
Definition: X86BaseInfo.h:107
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:102
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:90
MachineRegisterInfo.h
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::MachineBasicBlock::erase
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
Definition: MachineBasicBlock.cpp:1299
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::X86::getCondFromCMov
CondCode getCondFromCMov(const MachineInstr &MI)
Return condition code of a CMov opcode.
Definition: X86InstrInfo.cpp:2773
llvm::MachineBasicBlock::addSuccessor
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Definition: MachineBasicBlock.cpp:746
CommandLine.h
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:636
X86.h
llvm::MachineBasicBlock::insertAfter
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
Definition: MachineBasicBlock.h:871
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:97
MachineLoopInfo.h
llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:146
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
ForceMemOperand
static cl::opt< bool > ForceMemOperand("x86-cmov-converter-force-mem-operand", cl::desc("Convert cmovs to branches whenever they have memory operands."), cl::init(true), cl::Hidden)
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
DEBUG_TYPE
#define DEBUG_TYPE
Definition: X86CmovConversion.cpp:77
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
false
Definition: StackSlotColoring.cpp:142
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
First
into llvm powi allowing the code generator to produce balanced multiplication trees First
Definition: README.txt:54
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
DebugLoc.h
SmallPtrSet.h
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:634
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:626
llvm::cl::opt< bool >
llvm::divideCeil
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:742
llvm::MachineLoop
Definition: MachineLoopInfo.h:45
TargetSchedule.h
MCSchedule.h
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::clear
void clear()
Definition: DenseMap.h:111
llvm::TargetSchedModel
Provide an instruction scheduling machine model to CodeGen passes.
Definition: TargetSchedule.h:31
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::DenseMap
Definition: DenseMap.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
ArrayRef.h
MachineFunctionPass.h
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::MachineFunction::getName
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Definition: MachineFunction.cpp:541
EnableCmovConverter
static cl::opt< bool > EnableCmovConverter("x86-cmov-converter", cl::desc("Enable the X86 cmov-to-branch optimization."), cl::init(true), cl::Hidden)
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:225
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::X86::GetOppositeBranchCondition
CondCode GetOppositeBranchCondition(CondCode CC)
GetOppositeBranchCondition - Return the inverse of the specified cond, e.g.
Definition: X86InstrInfo.cpp:2785
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::MachineFunction
Definition: MachineFunction.h:230
llvm::MachineInstr::dump
void dump() const
Definition: MachineInstr.cpp:1540
GainCycleThreshold
static cl::opt< unsigned > GainCycleThreshold("x86-cmov-converter-threshold", cl::desc("Minimum gain per loop (in cycles) threshold."), cl::init(4), cl::Hidden)
llvm::MachineBasicBlock::iterator
MachineInstrBundleIterator< MachineInstr > iterator
Definition: MachineBasicBlock.h:233
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1558
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::MachineBasicBlock::splice
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Definition: MachineBasicBlock.h:950
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
TargetSubtargetInfo.h
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:59
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MaxDepth
static const unsigned MaxDepth
Definition: InstCombineMulDivRem.cpp:869
llvm::MachineBasicBlock::addLiveIn
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
Definition: MachineBasicBlock.h:367
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:592
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:83
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:321
llvm::MachineBasicBlock::insert
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
Definition: MachineBasicBlock.cpp:1312
llvm::ilist_iterator
Iterator for intrusive lists based on ilist_node.
Definition: ilist_iterator.h:57
Conversion
X86 cmov Conversion
Definition: X86CmovConversion.cpp:863
SmallVector.h
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:268
MachineInstrBuilder.h
llvm::createX86CmovConverterPass
FunctionPass * createX86CmovConverterPass()
This pass converts X86 cmov instructions into branch when profitable.
Definition: X86CmovConversion.cpp:866
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
DefMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Definition: AArch64ExpandPseudoInsts.cpp:103
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::SmallVectorImpl< MachineInstr * >
MachineOperand.h
llvm::MachineBasicBlock::transferSuccessorsAndUpdatePHIs
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
Definition: MachineBasicBlock.cpp:885
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::cl::desc
Definition: CommandLine.h:414
raw_ostream.h
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(X86CmovConverterPass, DEBUG_TYPE, "X86 cmov Conversion", false, false) INITIALIZE_PASS_END(X86CmovConverterPass
MachineFunction.h
X86InstrInfo.h
llvm::MachineInstrBundleIterator< MachineInstr >
InitializePasses.h
TargetRegisterInfo.h
Debug.h
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:270
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37