LLVM  15.0.0git
X86CmovConversion.cpp
Go to the documentation of this file.
1 //====- X86CmovConversion.cpp - Convert Cmov to Branch --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file implements a pass that converts X86 cmov instructions into
11 /// branches when profitable. This pass is conservative. It transforms if and
12 /// only if it can guarantee a gain with high confidence.
13 ///
14 /// Thus, the optimization applies under the following conditions:
15 /// 1. Consider as candidates only CMOVs in innermost loops (assume that
16 /// most hotspots are represented by these loops).
17 /// 2. Given a group of CMOV instructions that are using the same EFLAGS def
18 /// instruction:
19 /// a. Consider them as candidates only if all have the same code condition
20 /// or the opposite one to prevent generating more than one conditional
21 /// jump per EFLAGS def instruction.
22 /// b. Consider them as candidates only if all are profitable to be
23 /// converted (assume that one bad conversion may cause a degradation).
24 /// 3. Apply conversion only for loops that are found profitable and only for
25 /// CMOV candidates that were found profitable.
26 /// a. A loop is considered profitable only if conversion will reduce its
27 /// depth cost by some threshold.
28 /// b. CMOV is considered profitable if the cost of its condition is higher
29 /// than the average cost of its true-value and false-value by 25% of
30 /// branch-misprediction-penalty. This assures no degradation even with
31 /// 25% branch misprediction.
32 ///
33 /// Note: This pass is assumed to run on SSA machine code.
34 //
35 //===----------------------------------------------------------------------===//
36 //
37 // External interfaces:
38 // FunctionPass *llvm::createX86CmovConverterPass();
39 // bool X86CmovConverterPass::runOnMachineFunction(MachineFunction &MF);
40 //
41 //===----------------------------------------------------------------------===//
42 
43 #include "X86.h"
44 #include "X86InstrInfo.h"
45 #include "llvm/ADT/ArrayRef.h"
46 #include "llvm/ADT/DenseMap.h"
47 #include "llvm/ADT/STLExtras.h"
48 #include "llvm/ADT/SmallPtrSet.h"
49 #include "llvm/ADT/SmallVector.h"
50 #include "llvm/ADT/Statistic.h"
63 #include "llvm/IR/DebugLoc.h"
64 #include "llvm/InitializePasses.h"
65 #include "llvm/MC/MCSchedule.h"
66 #include "llvm/Pass.h"
68 #include "llvm/Support/Debug.h"
70 #include <algorithm>
71 #include <cassert>
72 #include <iterator>
73 #include <utility>
74 
75 using namespace llvm;
76 
77 #define DEBUG_TYPE "x86-cmov-conversion"
78 
79 STATISTIC(NumOfSkippedCmovGroups, "Number of unsupported CMOV-groups");
80 STATISTIC(NumOfCmovGroupCandidate, "Number of CMOV-group candidates");
81 STATISTIC(NumOfLoopCandidate, "Number of CMOV-conversion profitable loops");
82 STATISTIC(NumOfOptimizedCmovGroups, "Number of optimized CMOV-groups");
83 
84 // This internal switch can be used to turn off the cmov/branch optimization.
85 static cl::opt<bool>
86  EnableCmovConverter("x86-cmov-converter",
87  cl::desc("Enable the X86 cmov-to-branch optimization."),
88  cl::init(true), cl::Hidden);
89 
90 static cl::opt<unsigned>
91  GainCycleThreshold("x86-cmov-converter-threshold",
92  cl::desc("Minimum gain per loop (in cycles) threshold."),
93  cl::init(4), cl::Hidden);
94 
96  "x86-cmov-converter-force-mem-operand",
97  cl::desc("Convert cmovs to branches whenever they have memory operands."),
98  cl::init(true), cl::Hidden);
99 
100 static cl::opt<bool> ForceAll(
101  "x86-cmov-converter-force-all",
102  cl::desc("Convert all cmovs to branches."),
103  cl::init(false), cl::Hidden);
104 
105 namespace {
106 
107 /// Converts X86 cmov instructions into branches when profitable.
108 class X86CmovConverterPass : public MachineFunctionPass {
109 public:
110  X86CmovConverterPass() : MachineFunctionPass(ID) { }
111 
112  StringRef getPassName() const override { return "X86 cmov Conversion"; }
113  bool runOnMachineFunction(MachineFunction &MF) override;
114  void getAnalysisUsage(AnalysisUsage &AU) const override;
115 
116  /// Pass identification, replacement for typeid.
117  static char ID;
118 
119 private:
120  MachineRegisterInfo *MRI = nullptr;
121  const TargetInstrInfo *TII = nullptr;
122  const TargetRegisterInfo *TRI = nullptr;
123  MachineLoopInfo *MLI = nullptr;
124  TargetSchedModel TSchedModel;
125 
126  /// List of consecutive CMOV instructions.
127  using CmovGroup = SmallVector<MachineInstr *, 2>;
128  using CmovGroups = SmallVector<CmovGroup, 2>;
129 
130  /// Collect all CMOV-group-candidates in \p CurrLoop and update \p
131  /// CmovInstGroups accordingly.
132  ///
133  /// \param Blocks List of blocks to process.
134  /// \param CmovInstGroups List of consecutive CMOV instructions in CurrLoop.
135  /// \returns true iff it found any CMOV-group-candidate.
136  bool collectCmovCandidates(ArrayRef<MachineBasicBlock *> Blocks,
137  CmovGroups &CmovInstGroups,
138  bool IncludeLoads = false);
139 
140  /// Check if it is profitable to transform each CMOV-group-candidates into
141  /// branch. Remove all groups that are not profitable from \p CmovInstGroups.
142  ///
143  /// \param Blocks List of blocks to process.
144  /// \param CmovInstGroups List of consecutive CMOV instructions in CurrLoop.
145  /// \returns true iff any CMOV-group-candidate remain.
146  bool checkForProfitableCmovCandidates(ArrayRef<MachineBasicBlock *> Blocks,
147  CmovGroups &CmovInstGroups);
148 
149  /// Convert the given list of consecutive CMOV instructions into a branch.
150  ///
151  /// \param Group Consecutive CMOV instructions to be converted into branch.
152  void convertCmovInstsToBranches(SmallVectorImpl<MachineInstr *> &Group) const;
153 };
154 
155 } // end anonymous namespace
156 
157 char X86CmovConverterPass::ID = 0;
158 
159 void X86CmovConverterPass::getAnalysisUsage(AnalysisUsage &AU) const {
162 }
163 
164 bool X86CmovConverterPass::runOnMachineFunction(MachineFunction &MF) {
165  if (skipFunction(MF.getFunction()))
166  return false;
167  if (!EnableCmovConverter)
168  return false;
169 
170  LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
171  << "**********\n");
172 
173  bool Changed = false;
174  MLI = &getAnalysis<MachineLoopInfo>();
175  const TargetSubtargetInfo &STI = MF.getSubtarget();
176  MRI = &MF.getRegInfo();
177  TII = STI.getInstrInfo();
178  TRI = STI.getRegisterInfo();
179  TSchedModel.init(&STI);
180 
181  // Before we handle the more subtle cases of register-register CMOVs inside
182  // of potentially hot loops, we want to quickly remove all CMOVs (ForceAll) or
183  // the ones with a memory operand (ForceMemOperand option). The latter CMOV
184  // will risk a stall waiting for the load to complete that speculative
185  // execution behind a branch is better suited to handle on modern x86 chips.
186  if (ForceMemOperand || ForceAll) {
187  CmovGroups AllCmovGroups;
189  for (auto &MBB : MF)
190  Blocks.push_back(&MBB);
191  if (collectCmovCandidates(Blocks, AllCmovGroups, /*IncludeLoads*/ true)) {
192  for (auto &Group : AllCmovGroups) {
193  // Skip any group that doesn't do at least one memory operand cmov.
194  if (ForceMemOperand && !ForceAll &&
195  llvm::none_of(Group, [&](MachineInstr *I) { return I->mayLoad(); }))
196  continue;
197 
198  // For CMOV groups which we can rewrite and which contain a memory load,
199  // always rewrite them. On x86, a CMOV will dramatically amplify any
200  // memory latency by blocking speculative execution.
201  Changed = true;
202  convertCmovInstsToBranches(Group);
203  }
204  }
205  // Early return as ForceAll converts all CmovGroups.
206  if (ForceAll)
207  return Changed;
208  }
209 
210  //===--------------------------------------------------------------------===//
211  // Register-operand Conversion Algorithm
212  // ---------
213  // For each inner most loop
214  // collectCmovCandidates() {
215  // Find all CMOV-group-candidates.
216  // }
217  //
218  // checkForProfitableCmovCandidates() {
219  // * Calculate both loop-depth and optimized-loop-depth.
220  // * Use these depth to check for loop transformation profitability.
221  // * Check for CMOV-group-candidate transformation profitability.
222  // }
223  //
224  // For each profitable CMOV-group-candidate
225  // convertCmovInstsToBranches() {
226  // * Create FalseBB, SinkBB, Conditional branch to SinkBB.
227  // * Replace each CMOV instruction with a PHI instruction in SinkBB.
228  // }
229  //
230  // Note: For more details, see each function description.
231  //===--------------------------------------------------------------------===//
232 
233  // Build up the loops in pre-order.
234  SmallVector<MachineLoop *, 4> Loops(MLI->begin(), MLI->end());
235  // Note that we need to check size on each iteration as we accumulate child
236  // loops.
237  for (int i = 0; i < (int)Loops.size(); ++i)
238  for (MachineLoop *Child : Loops[i]->getSubLoops())
239  Loops.push_back(Child);
240 
241  for (MachineLoop *CurrLoop : Loops) {
242  // Optimize only inner most loops.
243  if (!CurrLoop->getSubLoops().empty())
244  continue;
245 
246  // List of consecutive CMOV instructions to be processed.
247  CmovGroups CmovInstGroups;
248 
249  if (!collectCmovCandidates(CurrLoop->getBlocks(), CmovInstGroups))
250  continue;
251 
252  if (!checkForProfitableCmovCandidates(CurrLoop->getBlocks(),
253  CmovInstGroups))
254  continue;
255 
256  Changed = true;
257  for (auto &Group : CmovInstGroups)
258  convertCmovInstsToBranches(Group);
259  }
260 
261  return Changed;
262 }
263 
264 bool X86CmovConverterPass::collectCmovCandidates(
265  ArrayRef<MachineBasicBlock *> Blocks, CmovGroups &CmovInstGroups,
266  bool IncludeLoads) {
267  //===--------------------------------------------------------------------===//
268  // Collect all CMOV-group-candidates and add them into CmovInstGroups.
269  //
270  // CMOV-group:
271  // CMOV instructions, in same MBB, that uses same EFLAGS def instruction.
272  //
273  // CMOV-group-candidate:
274  // CMOV-group where all the CMOV instructions are
275  // 1. consecutive.
276  // 2. have same condition code or opposite one.
277  // 3. have only operand registers (X86::CMOVrr).
278  //===--------------------------------------------------------------------===//
279  // List of possible improvement (TODO's):
280  // --------------------------------------
281  // TODO: Add support for X86::CMOVrm instructions.
282  // TODO: Add support for X86::SETcc instructions.
283  // TODO: Add support for CMOV-groups with non consecutive CMOV instructions.
284  //===--------------------------------------------------------------------===//
285 
286  // Current processed CMOV-Group.
287  CmovGroup Group;
288  for (auto *MBB : Blocks) {
289  Group.clear();
290  // Condition code of first CMOV instruction current processed range and its
291  // opposite condition code.
292  X86::CondCode FirstCC = X86::COND_INVALID, FirstOppCC = X86::COND_INVALID,
293  MemOpCC = X86::COND_INVALID;
294  // Indicator of a non CMOVrr instruction in the current processed range.
295  bool FoundNonCMOVInst = false;
296  // Indicator for current processed CMOV-group if it should be skipped.
297  bool SkipGroup = false;
298 
299  for (auto &I : *MBB) {
300  // Skip debug instructions.
301  if (I.isDebugInstr())
302  continue;
304  // Check if we found a X86::CMOVrr instruction.
305  if (CC != X86::COND_INVALID && (IncludeLoads || !I.mayLoad())) {
306  if (Group.empty()) {
307  // We found first CMOV in the range, reset flags.
308  FirstCC = CC;
309  FirstOppCC = X86::GetOppositeBranchCondition(CC);
310  // Clear out the prior group's memory operand CC.
311  MemOpCC = X86::COND_INVALID;
312  FoundNonCMOVInst = false;
313  SkipGroup = false;
314  }
315  Group.push_back(&I);
316  // Check if it is a non-consecutive CMOV instruction or it has different
317  // condition code than FirstCC or FirstOppCC.
318  if (FoundNonCMOVInst || (CC != FirstCC && CC != FirstOppCC))
319  // Mark the SKipGroup indicator to skip current processed CMOV-Group.
320  SkipGroup = true;
321  if (I.mayLoad()) {
322  if (MemOpCC == X86::COND_INVALID)
323  // The first memory operand CMOV.
324  MemOpCC = CC;
325  else if (CC != MemOpCC)
326  // Can't handle mixed conditions with memory operands.
327  SkipGroup = true;
328  }
329  // Check if we were relying on zero-extending behavior of the CMOV.
330  if (!SkipGroup &&
331  llvm::any_of(
332  MRI->use_nodbg_instructions(I.defs().begin()->getReg()),
333  [&](MachineInstr &UseI) {
334  return UseI.getOpcode() == X86::SUBREG_TO_REG;
335  }))
336  // FIXME: We should model the cost of using an explicit MOV to handle
337  // the zero-extension rather than just refusing to handle this.
338  SkipGroup = true;
339  continue;
340  }
341  // If Group is empty, keep looking for first CMOV in the range.
342  if (Group.empty())
343  continue;
344 
345  // We found a non X86::CMOVrr instruction.
346  FoundNonCMOVInst = true;
347  // Check if this instruction define EFLAGS, to determine end of processed
348  // range, as there would be no more instructions using current EFLAGS def.
349  if (I.definesRegister(X86::EFLAGS)) {
350  // Check if current processed CMOV-group should not be skipped and add
351  // it as a CMOV-group-candidate.
352  if (!SkipGroup)
353  CmovInstGroups.push_back(Group);
354  else
355  ++NumOfSkippedCmovGroups;
356  Group.clear();
357  }
358  }
359  // End of basic block is considered end of range, check if current processed
360  // CMOV-group should not be skipped and add it as a CMOV-group-candidate.
361  if (Group.empty())
362  continue;
363  if (!SkipGroup)
364  CmovInstGroups.push_back(Group);
365  else
366  ++NumOfSkippedCmovGroups;
367  }
368 
369  NumOfCmovGroupCandidate += CmovInstGroups.size();
370  return !CmovInstGroups.empty();
371 }
372 
373 /// \returns Depth of CMOV instruction as if it was converted into branch.
374 /// \param TrueOpDepth depth cost of CMOV true value operand.
375 /// \param FalseOpDepth depth cost of CMOV false value operand.
376 static unsigned getDepthOfOptCmov(unsigned TrueOpDepth, unsigned FalseOpDepth) {
377  // The depth of the result after branch conversion is
378  // TrueOpDepth * TrueOpProbability + FalseOpDepth * FalseOpProbability.
379  // As we have no info about branch weight, we assume 75% for one and 25% for
380  // the other, and pick the result with the largest resulting depth.
381  return std::max(
382  divideCeil(TrueOpDepth * 3 + FalseOpDepth, 4),
383  divideCeil(FalseOpDepth * 3 + TrueOpDepth, 4));
384 }
385 
386 bool X86CmovConverterPass::checkForProfitableCmovCandidates(
387  ArrayRef<MachineBasicBlock *> Blocks, CmovGroups &CmovInstGroups) {
388  struct DepthInfo {
389  /// Depth of original loop.
390  unsigned Depth;
391  /// Depth of optimized loop.
392  unsigned OptDepth;
393  };
394  /// Number of loop iterations to calculate depth for ?!
395  static const unsigned LoopIterations = 2;
397  DepthInfo LoopDepth[LoopIterations] = {{0, 0}, {0, 0}};
398  enum { PhyRegType = 0, VirRegType = 1, RegTypeNum = 2 };
399  /// For each register type maps the register to its last def instruction.
400  DenseMap<unsigned, MachineInstr *> RegDefMaps[RegTypeNum];
401  /// Maps register operand to its def instruction, which can be nullptr if it
402  /// is unknown (e.g., operand is defined outside the loop).
404 
405  // Set depth of unknown instruction (i.e., nullptr) to zero.
406  DepthMap[nullptr] = {0, 0};
407 
408  SmallPtrSet<MachineInstr *, 4> CmovInstructions;
409  for (auto &Group : CmovInstGroups)
410  CmovInstructions.insert(Group.begin(), Group.end());
411 
412  //===--------------------------------------------------------------------===//
413  // Step 1: Calculate instruction depth and loop depth.
414  // Optimized-Loop:
415  // loop with CMOV-group-candidates converted into branches.
416  //
417  // Instruction-Depth:
418  // instruction latency + max operand depth.
419  // * For CMOV instruction in optimized loop the depth is calculated as:
420  // CMOV latency + getDepthOfOptCmov(True-Op-Depth, False-Op-depth)
421  // TODO: Find a better way to estimate the latency of the branch instruction
422  // rather than using the CMOV latency.
423  //
424  // Loop-Depth:
425  // max instruction depth of all instructions in the loop.
426  // Note: instruction with max depth represents the critical-path in the loop.
427  //
428  // Loop-Depth[i]:
429  // Loop-Depth calculated for first `i` iterations.
430  // Note: it is enough to calculate depth for up to two iterations.
431  //
432  // Depth-Diff[i]:
433  // Number of cycles saved in first 'i` iterations by optimizing the loop.
434  //===--------------------------------------------------------------------===//
435  for (unsigned I = 0; I < LoopIterations; ++I) {
436  DepthInfo &MaxDepth = LoopDepth[I];
437  for (auto *MBB : Blocks) {
438  // Clear physical registers Def map.
439  RegDefMaps[PhyRegType].clear();
440  for (MachineInstr &MI : *MBB) {
441  // Skip debug instructions.
442  if (MI.isDebugInstr())
443  continue;
444  unsigned MIDepth = 0;
445  unsigned MIDepthOpt = 0;
446  bool IsCMOV = CmovInstructions.count(&MI);
447  for (auto &MO : MI.uses()) {
448  // Checks for "isUse()" as "uses()" returns also implicit definitions.
449  if (!MO.isReg() || !MO.isUse())
450  continue;
451  Register Reg = MO.getReg();
452  auto &RDM = RegDefMaps[Reg.isVirtual()];
453  if (MachineInstr *DefMI = RDM.lookup(Reg)) {
454  OperandToDefMap[&MO] = DefMI;
455  DepthInfo Info = DepthMap.lookup(DefMI);
456  MIDepth = std::max(MIDepth, Info.Depth);
457  if (!IsCMOV)
458  MIDepthOpt = std::max(MIDepthOpt, Info.OptDepth);
459  }
460  }
461 
462  if (IsCMOV)
463  MIDepthOpt = getDepthOfOptCmov(
464  DepthMap[OperandToDefMap.lookup(&MI.getOperand(1))].OptDepth,
465  DepthMap[OperandToDefMap.lookup(&MI.getOperand(2))].OptDepth);
466 
467  // Iterates over all operands to handle implicit definitions as well.
468  for (auto &MO : MI.operands()) {
469  if (!MO.isReg() || !MO.isDef())
470  continue;
471  Register Reg = MO.getReg();
472  RegDefMaps[Reg.isVirtual()][Reg] = &MI;
473  }
474 
475  unsigned Latency = TSchedModel.computeInstrLatency(&MI);
476  DepthMap[&MI] = {MIDepth += Latency, MIDepthOpt += Latency};
477  MaxDepth.Depth = std::max(MaxDepth.Depth, MIDepth);
478  MaxDepth.OptDepth = std::max(MaxDepth.OptDepth, MIDepthOpt);
479  }
480  }
481  }
482 
483  unsigned Diff[LoopIterations] = {LoopDepth[0].Depth - LoopDepth[0].OptDepth,
484  LoopDepth[1].Depth - LoopDepth[1].OptDepth};
485 
486  //===--------------------------------------------------------------------===//
487  // Step 2: Check if Loop worth to be optimized.
488  // Worth-Optimize-Loop:
489  // case 1: Diff[1] == Diff[0]
490  // Critical-path is iteration independent - there is no dependency
491  // of critical-path instructions on critical-path instructions of
492  // previous iteration.
493  // Thus, it is enough to check gain percent of 1st iteration -
494  // To be conservative, the optimized loop need to have a depth of
495  // 12.5% cycles less than original loop, per iteration.
496  //
497  // case 2: Diff[1] > Diff[0]
498  // Critical-path is iteration dependent - there is dependency of
499  // critical-path instructions on critical-path instructions of
500  // previous iteration.
501  // Thus, check the gain percent of the 2nd iteration (similar to the
502  // previous case), but it is also required to check the gradient of
503  // the gain - the change in Depth-Diff compared to the change in
504  // Loop-Depth between 1st and 2nd iterations.
505  // To be conservative, the gradient need to be at least 50%.
506  //
507  // In addition, In order not to optimize loops with very small gain, the
508  // gain (in cycles) after 2nd iteration should not be less than a given
509  // threshold. Thus, the check (Diff[1] >= GainCycleThreshold) must apply.
510  //
511  // If loop is not worth optimizing, remove all CMOV-group-candidates.
512  //===--------------------------------------------------------------------===//
513  if (Diff[1] < GainCycleThreshold)
514  return false;
515 
516  bool WorthOptLoop = false;
517  if (Diff[1] == Diff[0])
518  WorthOptLoop = Diff[0] * 8 >= LoopDepth[0].Depth;
519  else if (Diff[1] > Diff[0])
520  WorthOptLoop =
521  (Diff[1] - Diff[0]) * 2 >= (LoopDepth[1].Depth - LoopDepth[0].Depth) &&
522  (Diff[1] * 8 >= LoopDepth[1].Depth);
523 
524  if (!WorthOptLoop)
525  return false;
526 
527  ++NumOfLoopCandidate;
528 
529  //===--------------------------------------------------------------------===//
530  // Step 3: Check for each CMOV-group-candidate if it worth to be optimized.
531  // Worth-Optimize-Group:
532  // Iff it worths to optimize all CMOV instructions in the group.
533  //
534  // Worth-Optimize-CMOV:
535  // Predicted branch is faster than CMOV by the difference between depth of
536  // condition operand and depth of taken (predicted) value operand.
537  // To be conservative, the gain of such CMOV transformation should cover at
538  // at least 25% of branch-misprediction-penalty.
539  //===--------------------------------------------------------------------===//
540  unsigned MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty;
541  CmovGroups TempGroups;
542  std::swap(TempGroups, CmovInstGroups);
543  for (auto &Group : TempGroups) {
544  bool WorthOpGroup = true;
545  for (auto *MI : Group) {
546  // Avoid CMOV instruction which value is used as a pointer to load from.
547  // This is another conservative check to avoid converting CMOV instruction
548  // used with tree-search like algorithm, where the branch is unpredicted.
549  auto UIs = MRI->use_instructions(MI->defs().begin()->getReg());
550  if (!UIs.empty() && ++UIs.begin() == UIs.end()) {
551  unsigned Op = UIs.begin()->getOpcode();
552  if (Op == X86::MOV64rm || Op == X86::MOV32rm) {
553  WorthOpGroup = false;
554  break;
555  }
556  }
557 
558  unsigned CondCost =
559  DepthMap[OperandToDefMap.lookup(&MI->getOperand(4))].Depth;
560  unsigned ValCost = getDepthOfOptCmov(
561  DepthMap[OperandToDefMap.lookup(&MI->getOperand(1))].Depth,
562  DepthMap[OperandToDefMap.lookup(&MI->getOperand(2))].Depth);
563  if (ValCost > CondCost || (CondCost - ValCost) * 4 < MispredictPenalty) {
564  WorthOpGroup = false;
565  break;
566  }
567  }
568 
569  if (WorthOpGroup)
570  CmovInstGroups.push_back(Group);
571  }
572 
573  return !CmovInstGroups.empty();
574 }
575 
577  if (MI->killsRegister(X86::EFLAGS))
578  return false;
579 
580  // The EFLAGS operand of MI might be missing a kill marker.
581  // Figure out whether EFLAGS operand should LIVE after MI instruction.
582  MachineBasicBlock *BB = MI->getParent();
584 
585  // Scan forward through BB for a use/def of EFLAGS.
586  for (auto I = std::next(ItrMI), E = BB->end(); I != E; ++I) {
587  if (I->readsRegister(X86::EFLAGS))
588  return true;
589  if (I->definesRegister(X86::EFLAGS))
590  return false;
591  }
592 
593  // We hit the end of the block, check whether EFLAGS is live into a successor.
594  for (MachineBasicBlock *Succ : BB->successors())
595  if (Succ->isLiveIn(X86::EFLAGS))
596  return true;
597 
598  return false;
599 }
600 
601 /// Given /p First CMOV instruction and /p Last CMOV instruction representing a
602 /// group of CMOV instructions, which may contain debug instructions in between,
603 /// move all debug instructions to after the last CMOV instruction, making the
604 /// CMOV group consecutive.
605 static void packCmovGroup(MachineInstr *First, MachineInstr *Last) {
607  "Last instruction in a CMOV group must be a CMOV instruction");
608 
609  SmallVector<MachineInstr *, 2> DBGInstructions;
610  for (auto I = First->getIterator(), E = Last->getIterator(); I != E; I++) {
611  if (I->isDebugInstr())
612  DBGInstructions.push_back(&*I);
613  }
614 
615  // Splice the debug instruction after the cmov group.
616  MachineBasicBlock *MBB = First->getParent();
617  for (auto *MI : DBGInstructions)
618  MBB->insertAfter(Last, MI->removeFromParent());
619 }
620 
621 void X86CmovConverterPass::convertCmovInstsToBranches(
622  SmallVectorImpl<MachineInstr *> &Group) const {
623  assert(!Group.empty() && "No CMOV instructions to convert");
624  ++NumOfOptimizedCmovGroups;
625 
626  // If the CMOV group is not packed, e.g., there are debug instructions between
627  // first CMOV and last CMOV, then pack the group and make the CMOV instruction
628  // consecutive by moving the debug instructions to after the last CMOV.
629  packCmovGroup(Group.front(), Group.back());
630 
631  // To convert a CMOVcc instruction, we actually have to insert the diamond
632  // control-flow pattern. The incoming instruction knows the destination vreg
633  // to set, the condition code register to branch on, the true/false values to
634  // select between, and a branch opcode to use.
635 
636  // Before
637  // -----
638  // MBB:
639  // cond = cmp ...
640  // v1 = CMOVge t1, f1, cond
641  // v2 = CMOVlt t2, f2, cond
642  // v3 = CMOVge v1, f3, cond
643  //
644  // After
645  // -----
646  // MBB:
647  // cond = cmp ...
648  // jge %SinkMBB
649  //
650  // FalseMBB:
651  // jmp %SinkMBB
652  //
653  // SinkMBB:
654  // %v1 = phi[%f1, %FalseMBB], [%t1, %MBB]
655  // %v2 = phi[%t2, %FalseMBB], [%f2, %MBB] ; For CMOV with OppCC switch
656  // ; true-value with false-value
657  // %v3 = phi[%f3, %FalseMBB], [%t1, %MBB] ; Phi instruction cannot use
658  // ; previous Phi instruction result
659 
660  MachineInstr &MI = *Group.front();
661  MachineInstr *LastCMOV = Group.back();
662  DebugLoc DL = MI.getDebugLoc();
663 
666  // Potentially swap the condition codes so that any memory operand to a CMOV
667  // is in the *false* position instead of the *true* position. We can invert
668  // any non-memory operand CMOV instructions to cope with this and we ensure
669  // memory operand CMOVs are only included with a single condition code.
670  if (llvm::any_of(Group, [&](MachineInstr *I) {
671  return I->mayLoad() && X86::getCondFromCMov(*I) == CC;
672  }))
673  std::swap(CC, OppCC);
674 
675  MachineBasicBlock *MBB = MI.getParent();
678  const BasicBlock *BB = MBB->getBasicBlock();
679 
680  MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(BB);
681  MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(BB);
682  F->insert(It, FalseMBB);
683  F->insert(It, SinkMBB);
684 
685  // If the EFLAGS register isn't dead in the terminator, then claim that it's
686  // live into the sink and copy blocks.
687  if (checkEFLAGSLive(LastCMOV)) {
688  FalseMBB->addLiveIn(X86::EFLAGS);
689  SinkMBB->addLiveIn(X86::EFLAGS);
690  }
691 
692  // Transfer the remainder of BB and its successor edges to SinkMBB.
693  SinkMBB->splice(SinkMBB->begin(), MBB,
694  std::next(MachineBasicBlock::iterator(LastCMOV)), MBB->end());
696 
697  // Add the false and sink blocks as its successors.
698  MBB->addSuccessor(FalseMBB);
699  MBB->addSuccessor(SinkMBB);
700 
701  // Create the conditional branch instruction.
702  BuildMI(MBB, DL, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(CC);
703 
704  // Add the sink block to the false block successors.
705  FalseMBB->addSuccessor(SinkMBB);
706 
710  std::next(MachineBasicBlock::iterator(LastCMOV));
711  MachineBasicBlock::iterator FalseInsertionPoint = FalseMBB->begin();
712  MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();
713 
714  // First we need to insert an explicit load on the false path for any memory
715  // operand. We also need to potentially do register rewriting here, but it is
716  // simpler as the memory operands are always on the false path so we can
717  // simply take that input, whatever it is.
718  DenseMap<unsigned, unsigned> FalseBBRegRewriteTable;
719  for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd;) {
720  auto &MI = *MIIt++;
721  // Skip any CMOVs in this group which don't load from memory.
722  if (!MI.mayLoad()) {
723  // Remember the false-side register input.
724  Register FalseReg =
725  MI.getOperand(X86::getCondFromCMov(MI) == CC ? 1 : 2).getReg();
726  // Walk back through any intermediate cmovs referenced.
727  while (true) {
728  auto FRIt = FalseBBRegRewriteTable.find(FalseReg);
729  if (FRIt == FalseBBRegRewriteTable.end())
730  break;
731  FalseReg = FRIt->second;
732  }
733  FalseBBRegRewriteTable[MI.getOperand(0).getReg()] = FalseReg;
734  continue;
735  }
736 
737  // The condition must be the *opposite* of the one we've decided to branch
738  // on as the branch will go *around* the load and the load should happen
739  // when the CMOV condition is false.
740  assert(X86::getCondFromCMov(MI) == OppCC &&
741  "Can only handle memory-operand cmov instructions with a condition "
742  "opposite to the selected branch direction.");
743 
744  // The goal is to rewrite the cmov from:
745  //
746  // MBB:
747  // %A = CMOVcc %B (tied), (mem)
748  //
749  // to
750  //
751  // MBB:
752  // %A = CMOVcc %B (tied), %C
753  // FalseMBB:
754  // %C = MOV (mem)
755  //
756  // Which will allow the next loop to rewrite the CMOV in terms of a PHI:
757  //
758  // MBB:
759  // JMP!cc SinkMBB
760  // FalseMBB:
761  // %C = MOV (mem)
762  // SinkMBB:
763  // %A = PHI [ %C, FalseMBB ], [ %B, MBB]
764 
765  // Get a fresh register to use as the destination of the MOV.
766  const TargetRegisterClass *RC = MRI->getRegClass(MI.getOperand(0).getReg());
767  Register TmpReg = MRI->createVirtualRegister(RC);
768 
770  bool Unfolded = TII->unfoldMemoryOperand(*MBB->getParent(), MI, TmpReg,
771  /*UnfoldLoad*/ true,
772  /*UnfoldStore*/ false, NewMIs);
773  (void)Unfolded;
774  assert(Unfolded && "Should never fail to unfold a loading cmov!");
775 
776  // Move the new CMOV to just before the old one and reset any impacted
777  // iterator.
778  auto *NewCMOV = NewMIs.pop_back_val();
779  assert(X86::getCondFromCMov(*NewCMOV) == OppCC &&
780  "Last new instruction isn't the expected CMOV!");
781  LLVM_DEBUG(dbgs() << "\tRewritten cmov: "; NewCMOV->dump());
783  if (&*MIItBegin == &MI)
784  MIItBegin = MachineBasicBlock::iterator(NewCMOV);
785 
786  // Sink whatever instructions were needed to produce the unfolded operand
787  // into the false block.
788  for (auto *NewMI : NewMIs) {
789  LLVM_DEBUG(dbgs() << "\tRewritten load instr: "; NewMI->dump());
790  FalseMBB->insert(FalseInsertionPoint, NewMI);
791  // Re-map any operands that are from other cmovs to the inputs for this block.
792  for (auto &MOp : NewMI->uses()) {
793  if (!MOp.isReg())
794  continue;
795  auto It = FalseBBRegRewriteTable.find(MOp.getReg());
796  if (It == FalseBBRegRewriteTable.end())
797  continue;
798 
799  MOp.setReg(It->second);
800  // This might have been a kill when it referenced the cmov result, but
801  // it won't necessarily be once rewritten.
802  // FIXME: We could potentially improve this by tracking whether the
803  // operand to the cmov was also a kill, and then skipping the PHI node
804  // construction below.
805  MOp.setIsKill(false);
806  }
807  }
808  MBB->erase(&MI);
809 
810  // Add this PHI to the rewrite table.
811  FalseBBRegRewriteTable[NewCMOV->getOperand(0).getReg()] = TmpReg;
812  }
813 
814  // As we are creating the PHIs, we have to be careful if there is more than
815  // one. Later CMOVs may reference the results of earlier CMOVs, but later
816  // PHIs have to reference the individual true/false inputs from earlier PHIs.
817  // That also means that PHI construction must work forward from earlier to
818  // later, and that the code must maintain a mapping from earlier PHI's
819  // destination registers, and the registers that went into the PHI.
821 
822  for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) {
823  Register DestReg = MIIt->getOperand(0).getReg();
824  Register Op1Reg = MIIt->getOperand(1).getReg();
825  Register Op2Reg = MIIt->getOperand(2).getReg();
826 
827  // If this CMOV we are processing is the opposite condition from the jump we
828  // generated, then we have to swap the operands for the PHI that is going to
829  // be generated.
830  if (X86::getCondFromCMov(*MIIt) == OppCC)
831  std::swap(Op1Reg, Op2Reg);
832 
833  auto Op1Itr = RegRewriteTable.find(Op1Reg);
834  if (Op1Itr != RegRewriteTable.end())
835  Op1Reg = Op1Itr->second.first;
836 
837  auto Op2Itr = RegRewriteTable.find(Op2Reg);
838  if (Op2Itr != RegRewriteTable.end())
839  Op2Reg = Op2Itr->second.second;
840 
841  // SinkMBB:
842  // %Result = phi [ %FalseValue, FalseMBB ], [ %TrueValue, MBB ]
843  // ...
844  MIB = BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(X86::PHI), DestReg)
845  .addReg(Op1Reg)
846  .addMBB(FalseMBB)
847  .addReg(Op2Reg)
848  .addMBB(MBB);
849  (void)MIB;
850  LLVM_DEBUG(dbgs() << "\tFrom: "; MIIt->dump());
851  LLVM_DEBUG(dbgs() << "\tTo: "; MIB->dump());
852 
853  // Add this PHI to the rewrite table.
854  RegRewriteTable[DestReg] = std::make_pair(Op1Reg, Op2Reg);
855  }
856 
857  // Now remove the CMOV(s).
858  MBB->erase(MIItBegin, MIItEnd);
859 
860  // Add new basic blocks to MachineLoopInfo.
861  if (MachineLoop *L = MLI->getLoopFor(MBB)) {
862  L->addBasicBlockToLoop(FalseMBB, MLI->getBase());
863  L->addBasicBlockToLoop(SinkMBB, MLI->getBase());
864  }
865 }
866 
867 INITIALIZE_PASS_BEGIN(X86CmovConverterPass, DEBUG_TYPE, "X86 cmov Conversion",
868  false, false)
870 INITIALIZE_PASS_END(X86CmovConverterPass, DEBUG_TYPE, "X86 cmov Conversion",
872 
874  return new X86CmovConverterPass();
875 }
i
i
Definition: README.txt:29
packCmovGroup
static void packCmovGroup(MachineInstr *First, MachineInstr *Last)
Given /p First CMOV instruction and /p Last CMOV instruction representing a group of CMOV instruction...
Definition: X86CmovConversion.cpp:605
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:104
MachineInstr.h
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::none_of
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1619
llvm::MachineBasicBlock::getBasicBlock
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
Definition: MachineBasicBlock.h:205
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:156
llvm::Latency
@ Latency
Definition: SIMachineScheduler.h:34
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:50
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:199
Pass.h
llvm::TargetSubtargetInfo::getInstrInfo
virtual const TargetInstrInfo * getInstrInfo() const
Definition: TargetSubtargetInfo.h:93
Loops
Hexagon Hardware Loops
Definition: HexagonHardwareLoops.cpp:372
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
Statistic.h
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
checkEFLAGSLive
static bool checkEFLAGSLive(MachineInstr *MI)
Definition: X86CmovConversion.cpp:576
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
MachineBasicBlock.h
llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Definition: TargetSubtargetInfo.h:125
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::MachineRegisterInfo::use_nodbg_instructions
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
Definition: MachineRegisterInfo.h:551
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:234
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:36
DenseMap.h
llvm::MachineRegisterInfo::use_instructions
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
Definition: MachineRegisterInfo.h:493
TargetInstrInfo.h
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
getDepthOfOptCmov
static unsigned getDepthOfOptCmov(unsigned TrueOpDepth, unsigned FalseOpDepth)
Definition: X86CmovConversion.cpp:376
ForceAll
static cl::opt< bool > ForceAll("x86-cmov-converter-force-all", cl::desc("Convert all cmovs to branches."), cl::init(false), cl::Hidden)
STLExtras.h
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:654
llvm::X86::CondCode
CondCode
Definition: X86BaseInfo.h:80
llvm::X86::COND_INVALID
@ COND_INVALID
Definition: X86BaseInfo.h:107
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1618
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:103
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:89
MachineRegisterInfo.h
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::MachineBasicBlock::erase
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
Definition: MachineBasicBlock.cpp:1295
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::X86::getCondFromCMov
CondCode getCondFromCMov(const MachineInstr &MI)
Definition: X86InstrInfo.cpp:2714
llvm::MachineBasicBlock::addSuccessor
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Definition: MachineBasicBlock.cpp:747
CommandLine.h
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:650
X86.h
llvm::MachineBasicBlock::insertAfter
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
Definition: MachineBasicBlock.h:893
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:97
MachineLoopInfo.h
llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:146
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
ForceMemOperand
static cl::opt< bool > ForceMemOperand("x86-cmov-converter-force-mem-operand", cl::desc("Convert cmovs to branches whenever they have memory operands."), cl::init(true), cl::Hidden)
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
DEBUG_TYPE
#define DEBUG_TYPE
Definition: X86CmovConversion.cpp:77
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:45
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
false
Definition: StackSlotColoring.cpp:141
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:127
First
into llvm powi allowing the code generator to produce balanced multiplication trees First
Definition: README.txt:54
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
DebugLoc.h
SmallPtrSet.h
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:642
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:640
llvm::cl::opt< bool >
llvm::divideCeil
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:769
llvm::MachineLoop
Definition: MachineLoopInfo.h:44
TargetSchedule.h
MCSchedule.h
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::clear
void clear()
Definition: DenseMap.h:112
llvm::TargetSchedModel
Provide an instruction scheduling machine model to CodeGen passes.
Definition: TargetSchedule.h:30
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::DenseMap
Definition: DenseMap.h:716
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
ArrayRef.h
MachineFunctionPass.h
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:152
llvm::MachineFunction::getName
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Definition: MachineFunction.cpp:567
EnableCmovConverter
static cl::opt< bool > EnableCmovConverter("x86-cmov-converter", cl::desc("Enable the X86 cmov-to-branch optimization."), cl::init(true), cl::Hidden)
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:234
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::X86::GetOppositeBranchCondition
CondCode GetOppositeBranchCondition(CondCode CC)
GetOppositeBranchCondition - Return the inverse of the specified cond, e.g.
Definition: X86InstrInfo.cpp:2721
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
llvm::MachineFunction
Definition: MachineFunction.h:241
llvm::MachineInstr::dump
void dump() const
Definition: MachineInstr.cpp:1491
GainCycleThreshold
static cl::opt< unsigned > GainCycleThreshold("x86-cmov-converter-threshold", cl::desc("Minimum gain per loop (in cycles) threshold."), cl::init(4), cl::Hidden)
llvm::MachineBasicBlock::iterator
MachineInstrBundleIterator< MachineInstr > iterator
Definition: MachineBasicBlock.h:242
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1612
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::MachineBasicBlock::splice
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Definition: MachineBasicBlock.h:972
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
TargetSubtargetInfo.h
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:60
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MaxDepth
static const unsigned MaxDepth
Definition: InstCombineMulDivRem.cpp:900
llvm::MachineBasicBlock::addLiveIn
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
Definition: MachineBasicBlock.h:376
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:606
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:84
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:338
llvm::MachineBasicBlock::insert
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
Definition: MachineBasicBlock.cpp:1308
llvm::ilist_iterator
Iterator for intrusive lists based on ilist_node.
Definition: ilist_iterator.h:57
Conversion
X86 cmov Conversion
Definition: X86CmovConversion.cpp:870
SmallVector.h
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:277
MachineInstrBuilder.h
llvm::createX86CmovConverterPass
FunctionPass * createX86CmovConverterPass()
This pass converts X86 cmov instructions into branch when profitable.
Definition: X86CmovConversion.cpp:873
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
DefMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Definition: AArch64ExpandPseudoInsts.cpp:104
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::SmallVectorImpl< MachineInstr * >
MachineOperand.h
llvm::MachineBasicBlock::transferSuccessorsAndUpdatePHIs
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
Definition: MachineBasicBlock.cpp:886
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::cl::desc
Definition: CommandLine.h:405
raw_ostream.h
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(X86CmovConverterPass, DEBUG_TYPE, "X86 cmov Conversion", false, false) INITIALIZE_PASS_END(X86CmovConverterPass
MachineFunction.h
X86InstrInfo.h
llvm::MachineInstrBundleIterator< MachineInstr >
InitializePasses.h
TargetRegisterInfo.h
Debug.h
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:279
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37