File: | lib/Target/X86/X86CmovConversion.cpp |
Warning: | line 308, column 37 The right operand of '!=' is a garbage value |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //====- X86CmovConversion.cpp - Convert Cmov to Branch --------------------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | /// \file | |||
10 | /// This file implements a pass that converts X86 cmov instructions into | |||
11 | /// branches when profitable. This pass is conservative. It transforms if and | |||
12 | /// only if it can guarantee a gain with high confidence. | |||
13 | /// | |||
14 | /// Thus, the optimization applies under the following conditions: | |||
15 | /// 1. Consider as candidates only CMOVs in innermost loops (assume that | |||
16 | /// most hotspots are represented by these loops). | |||
17 | /// 2. Given a group of CMOV instructions that are using the same EFLAGS def | |||
18 | /// instruction: | |||
19 | /// a. Consider them as candidates only if all have the same code condition | |||
20 | /// or the opposite one to prevent generating more than one conditional | |||
21 | /// jump per EFLAGS def instruction. | |||
22 | /// b. Consider them as candidates only if all are profitable to be | |||
23 | /// converted (assume that one bad conversion may cause a degradation). | |||
24 | /// 3. Apply conversion only for loops that are found profitable and only for | |||
25 | /// CMOV candidates that were found profitable. | |||
26 | /// a. A loop is considered profitable only if conversion will reduce its | |||
27 | /// depth cost by some threshold. | |||
28 | /// b. CMOV is considered profitable if the cost of its condition is higher | |||
29 | /// than the average cost of its true-value and false-value by 25% of | |||
30 | /// branch-misprediction-penalty. This assures no degradation even with | |||
31 | /// 25% branch misprediction. | |||
32 | /// | |||
33 | /// Note: This pass is assumed to run on SSA machine code. | |||
34 | // | |||
35 | //===----------------------------------------------------------------------===// | |||
36 | // | |||
37 | // External interfaces: | |||
38 | // FunctionPass *llvm::createX86CmovConverterPass(); | |||
39 | // bool X86CmovConverterPass::runOnMachineFunction(MachineFunction &MF); | |||
40 | // | |||
41 | //===----------------------------------------------------------------------===// | |||
42 | ||||
43 | #include "X86.h" | |||
44 | #include "X86InstrInfo.h" | |||
45 | #include "llvm/ADT/ArrayRef.h" | |||
46 | #include "llvm/ADT/DenseMap.h" | |||
47 | #include "llvm/ADT/STLExtras.h" | |||
48 | #include "llvm/ADT/SmallPtrSet.h" | |||
49 | #include "llvm/ADT/SmallVector.h" | |||
50 | #include "llvm/ADT/Statistic.h" | |||
51 | #include "llvm/CodeGen/MachineBasicBlock.h" | |||
52 | #include "llvm/CodeGen/MachineFunction.h" | |||
53 | #include "llvm/CodeGen/MachineFunctionPass.h" | |||
54 | #include "llvm/CodeGen/MachineInstr.h" | |||
55 | #include "llvm/CodeGen/MachineInstrBuilder.h" | |||
56 | #include "llvm/CodeGen/MachineLoopInfo.h" | |||
57 | #include "llvm/CodeGen/MachineOperand.h" | |||
58 | #include "llvm/CodeGen/MachineRegisterInfo.h" | |||
59 | #include "llvm/CodeGen/TargetInstrInfo.h" | |||
60 | #include "llvm/CodeGen/TargetRegisterInfo.h" | |||
61 | #include "llvm/CodeGen/TargetSchedule.h" | |||
62 | #include "llvm/CodeGen/TargetSubtargetInfo.h" | |||
63 | #include "llvm/IR/DebugLoc.h" | |||
64 | #include "llvm/MC/MCSchedule.h" | |||
65 | #include "llvm/Pass.h" | |||
66 | #include "llvm/Support/CommandLine.h" | |||
67 | #include "llvm/Support/Debug.h" | |||
68 | #include "llvm/Support/raw_ostream.h" | |||
69 | #include <algorithm> | |||
70 | #include <cassert> | |||
71 | #include <iterator> | |||
72 | #include <utility> | |||
73 | ||||
74 | using namespace llvm; | |||
75 | ||||
76 | #define DEBUG_TYPE"x86-cmov-conversion" "x86-cmov-conversion" | |||
77 | ||||
78 | STATISTIC(NumOfSkippedCmovGroups, "Number of unsupported CMOV-groups")static llvm::Statistic NumOfSkippedCmovGroups = {"x86-cmov-conversion" , "NumOfSkippedCmovGroups", "Number of unsupported CMOV-groups" , {0}, {false}}; | |||
79 | STATISTIC(NumOfCmovGroupCandidate, "Number of CMOV-group candidates")static llvm::Statistic NumOfCmovGroupCandidate = {"x86-cmov-conversion" , "NumOfCmovGroupCandidate", "Number of CMOV-group candidates" , {0}, {false}}; | |||
80 | STATISTIC(NumOfLoopCandidate, "Number of CMOV-conversion profitable loops")static llvm::Statistic NumOfLoopCandidate = {"x86-cmov-conversion" , "NumOfLoopCandidate", "Number of CMOV-conversion profitable loops" , {0}, {false}}; | |||
81 | STATISTIC(NumOfOptimizedCmovGroups, "Number of optimized CMOV-groups")static llvm::Statistic NumOfOptimizedCmovGroups = {"x86-cmov-conversion" , "NumOfOptimizedCmovGroups", "Number of optimized CMOV-groups" , {0}, {false}}; | |||
82 | ||||
83 | // This internal switch can be used to turn off the cmov/branch optimization. | |||
84 | static cl::opt<bool> | |||
85 | EnableCmovConverter("x86-cmov-converter", | |||
86 | cl::desc("Enable the X86 cmov-to-branch optimization."), | |||
87 | cl::init(true), cl::Hidden); | |||
88 | ||||
89 | static cl::opt<unsigned> | |||
90 | GainCycleThreshold("x86-cmov-converter-threshold", | |||
91 | cl::desc("Minimum gain per loop (in cycles) threshold."), | |||
92 | cl::init(4), cl::Hidden); | |||
93 | ||||
94 | static cl::opt<bool> ForceMemOperand( | |||
95 | "x86-cmov-converter-force-mem-operand", | |||
96 | cl::desc("Convert cmovs to branches whenever they have memory operands."), | |||
97 | cl::init(true), cl::Hidden); | |||
98 | ||||
99 | namespace { | |||
100 | ||||
101 | /// Converts X86 cmov instructions into branches when profitable. | |||
102 | class X86CmovConverterPass : public MachineFunctionPass { | |||
103 | public: | |||
104 | X86CmovConverterPass() : MachineFunctionPass(ID) { | |||
105 | initializeX86CmovConverterPassPass(*PassRegistry::getPassRegistry()); | |||
106 | } | |||
107 | ||||
108 | StringRef getPassName() const override { return "X86 cmov Conversion"; } | |||
109 | bool runOnMachineFunction(MachineFunction &MF) override; | |||
110 | void getAnalysisUsage(AnalysisUsage &AU) const override; | |||
111 | ||||
112 | /// Pass identification, replacement for typeid. | |||
113 | static char ID; | |||
114 | ||||
115 | private: | |||
116 | MachineRegisterInfo *MRI; | |||
117 | const TargetInstrInfo *TII; | |||
118 | const TargetRegisterInfo *TRI; | |||
119 | TargetSchedModel TSchedModel; | |||
120 | ||||
121 | /// List of consecutive CMOV instructions. | |||
122 | using CmovGroup = SmallVector<MachineInstr *, 2>; | |||
123 | using CmovGroups = SmallVector<CmovGroup, 2>; | |||
124 | ||||
125 | /// Collect all CMOV-group-candidates in \p CurrLoop and update \p | |||
126 | /// CmovInstGroups accordingly. | |||
127 | /// | |||
128 | /// \param Blocks List of blocks to process. | |||
129 | /// \param CmovInstGroups List of consecutive CMOV instructions in CurrLoop. | |||
130 | /// \returns true iff it found any CMOV-group-candidate. | |||
131 | bool collectCmovCandidates(ArrayRef<MachineBasicBlock *> Blocks, | |||
132 | CmovGroups &CmovInstGroups, | |||
133 | bool IncludeLoads = false); | |||
134 | ||||
135 | /// Check if it is profitable to transform each CMOV-group-candidates into | |||
136 | /// branch. Remove all groups that are not profitable from \p CmovInstGroups. | |||
137 | /// | |||
138 | /// \param Blocks List of blocks to process. | |||
139 | /// \param CmovInstGroups List of consecutive CMOV instructions in CurrLoop. | |||
140 | /// \returns true iff any CMOV-group-candidate remain. | |||
141 | bool checkForProfitableCmovCandidates(ArrayRef<MachineBasicBlock *> Blocks, | |||
142 | CmovGroups &CmovInstGroups); | |||
143 | ||||
144 | /// Convert the given list of consecutive CMOV instructions into a branch. | |||
145 | /// | |||
146 | /// \param Group Consecutive CMOV instructions to be converted into branch. | |||
147 | void convertCmovInstsToBranches(SmallVectorImpl<MachineInstr *> &Group) const; | |||
148 | }; | |||
149 | ||||
150 | } // end anonymous namespace | |||
151 | ||||
152 | char X86CmovConverterPass::ID = 0; | |||
153 | ||||
154 | void X86CmovConverterPass::getAnalysisUsage(AnalysisUsage &AU) const { | |||
155 | MachineFunctionPass::getAnalysisUsage(AU); | |||
156 | AU.addRequired<MachineLoopInfo>(); | |||
157 | } | |||
158 | ||||
159 | bool X86CmovConverterPass::runOnMachineFunction(MachineFunction &MF) { | |||
160 | if (skipFunction(MF.getFunction())) | |||
| ||||
161 | return false; | |||
162 | if (!EnableCmovConverter) | |||
163 | return false; | |||
164 | ||||
165 | LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-cmov-conversion")) { dbgs() << "********** " << getPassName() << " : " << MF.getName() << "**********\n" ; } } while (false) | |||
166 | << "**********\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-cmov-conversion")) { dbgs() << "********** " << getPassName() << " : " << MF.getName() << "**********\n" ; } } while (false); | |||
167 | ||||
168 | bool Changed = false; | |||
169 | MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); | |||
170 | const TargetSubtargetInfo &STI = MF.getSubtarget(); | |||
171 | MRI = &MF.getRegInfo(); | |||
172 | TII = STI.getInstrInfo(); | |||
173 | TRI = STI.getRegisterInfo(); | |||
174 | TSchedModel.init(&STI); | |||
175 | ||||
176 | // Before we handle the more subtle cases of register-register CMOVs inside | |||
177 | // of potentially hot loops, we want to quickly remove all CMOVs with | |||
178 | // a memory operand. The CMOV will risk a stall waiting for the load to | |||
179 | // complete that speculative execution behind a branch is better suited to | |||
180 | // handle on modern x86 chips. | |||
181 | if (ForceMemOperand) { | |||
182 | CmovGroups AllCmovGroups; | |||
183 | SmallVector<MachineBasicBlock *, 4> Blocks; | |||
184 | for (auto &MBB : MF) | |||
185 | Blocks.push_back(&MBB); | |||
186 | if (collectCmovCandidates(Blocks, AllCmovGroups, /*IncludeLoads*/ true)) { | |||
187 | for (auto &Group : AllCmovGroups) { | |||
188 | // Skip any group that doesn't do at least one memory operand cmov. | |||
189 | if (!llvm::any_of(Group, [&](MachineInstr *I) { return I->mayLoad(); })) | |||
190 | continue; | |||
191 | ||||
192 | // For CMOV groups which we can rewrite and which contain a memory load, | |||
193 | // always rewrite them. On x86, a CMOV will dramatically amplify any | |||
194 | // memory latency by blocking speculative execution. | |||
195 | Changed = true; | |||
196 | convertCmovInstsToBranches(Group); | |||
197 | } | |||
198 | } | |||
199 | } | |||
200 | ||||
201 | //===--------------------------------------------------------------------===// | |||
202 | // Register-operand Conversion Algorithm | |||
203 | // --------- | |||
204 | // For each inner most loop | |||
205 | // collectCmovCandidates() { | |||
206 | // Find all CMOV-group-candidates. | |||
207 | // } | |||
208 | // | |||
209 | // checkForProfitableCmovCandidates() { | |||
210 | // * Calculate both loop-depth and optimized-loop-depth. | |||
211 | // * Use these depth to check for loop transformation profitability. | |||
212 | // * Check for CMOV-group-candidate transformation profitability. | |||
213 | // } | |||
214 | // | |||
215 | // For each profitable CMOV-group-candidate | |||
216 | // convertCmovInstsToBranches() { | |||
217 | // * Create FalseBB, SinkBB, Conditional branch to SinkBB. | |||
218 | // * Replace each CMOV instruction with a PHI instruction in SinkBB. | |||
219 | // } | |||
220 | // | |||
221 | // Note: For more details, see each function description. | |||
222 | //===--------------------------------------------------------------------===// | |||
223 | ||||
224 | // Build up the loops in pre-order. | |||
225 | SmallVector<MachineLoop *, 4> Loops(MLI.begin(), MLI.end()); | |||
226 | // Note that we need to check size on each iteration as we accumulate child | |||
227 | // loops. | |||
228 | for (int i = 0; i < (int)Loops.size(); ++i) | |||
229 | for (MachineLoop *Child : Loops[i]->getSubLoops()) | |||
230 | Loops.push_back(Child); | |||
231 | ||||
232 | for (MachineLoop *CurrLoop : Loops) { | |||
233 | // Optimize only inner most loops. | |||
234 | if (!CurrLoop->getSubLoops().empty()) | |||
235 | continue; | |||
236 | ||||
237 | // List of consecutive CMOV instructions to be processed. | |||
238 | CmovGroups CmovInstGroups; | |||
239 | ||||
240 | if (!collectCmovCandidates(CurrLoop->getBlocks(), CmovInstGroups)) | |||
241 | continue; | |||
242 | ||||
243 | if (!checkForProfitableCmovCandidates(CurrLoop->getBlocks(), | |||
244 | CmovInstGroups)) | |||
245 | continue; | |||
246 | ||||
247 | Changed = true; | |||
248 | for (auto &Group : CmovInstGroups) | |||
249 | convertCmovInstsToBranches(Group); | |||
250 | } | |||
251 | ||||
252 | return Changed; | |||
253 | } | |||
254 | ||||
255 | bool X86CmovConverterPass::collectCmovCandidates( | |||
256 | ArrayRef<MachineBasicBlock *> Blocks, CmovGroups &CmovInstGroups, | |||
257 | bool IncludeLoads) { | |||
258 | //===--------------------------------------------------------------------===// | |||
259 | // Collect all CMOV-group-candidates and add them into CmovInstGroups. | |||
260 | // | |||
261 | // CMOV-group: | |||
262 | // CMOV instructions, in same MBB, that uses same EFLAGS def instruction. | |||
263 | // | |||
264 | // CMOV-group-candidate: | |||
265 | // CMOV-group where all the CMOV instructions are | |||
266 | // 1. consecutive. | |||
267 | // 2. have same condition code or opposite one. | |||
268 | // 3. have only operand registers (X86::CMOVrr). | |||
269 | //===--------------------------------------------------------------------===// | |||
270 | // List of possible improvement (TODO's): | |||
271 | // -------------------------------------- | |||
272 | // TODO: Add support for X86::CMOVrm instructions. | |||
273 | // TODO: Add support for X86::SETcc instructions. | |||
274 | // TODO: Add support for CMOV-groups with non consecutive CMOV instructions. | |||
275 | //===--------------------------------------------------------------------===// | |||
276 | ||||
277 | // Current processed CMOV-Group. | |||
278 | CmovGroup Group; | |||
279 | for (auto *MBB : Blocks) { | |||
280 | Group.clear(); | |||
281 | // Condition code of first CMOV instruction current processed range and its | |||
282 | // opposite condition code. | |||
283 | X86::CondCode FirstCC, FirstOppCC, MemOpCC; | |||
284 | // Indicator of a non CMOVrr instruction in the current processed range. | |||
285 | bool FoundNonCMOVInst = false; | |||
286 | // Indicator for current processed CMOV-group if it should be skipped. | |||
287 | bool SkipGroup = false; | |||
288 | ||||
289 | for (auto &I : *MBB) { | |||
290 | // Skip debug instructions. | |||
291 | if (I.isDebugInstr()) | |||
292 | continue; | |||
293 | X86::CondCode CC = X86::getCondFromCMov(I); | |||
294 | // Check if we found a X86::CMOVrr instruction. | |||
295 | if (CC != X86::COND_INVALID && (IncludeLoads || !I.mayLoad())) { | |||
296 | if (Group.empty()) { | |||
297 | // We found first CMOV in the range, reset flags. | |||
298 | FirstCC = CC; | |||
299 | FirstOppCC = X86::GetOppositeBranchCondition(CC); | |||
300 | // Clear out the prior group's memory operand CC. | |||
301 | MemOpCC = X86::COND_INVALID; | |||
302 | FoundNonCMOVInst = false; | |||
303 | SkipGroup = false; | |||
304 | } | |||
305 | Group.push_back(&I); | |||
306 | // Check if it is a non-consecutive CMOV instruction or it has different | |||
307 | // condition code than FirstCC or FirstOppCC. | |||
308 | if (FoundNonCMOVInst || (CC != FirstCC && CC != FirstOppCC)) | |||
| ||||
309 | // Mark the SKipGroup indicator to skip current processed CMOV-Group. | |||
310 | SkipGroup = true; | |||
311 | if (I.mayLoad()) { | |||
312 | if (MemOpCC == X86::COND_INVALID) | |||
313 | // The first memory operand CMOV. | |||
314 | MemOpCC = CC; | |||
315 | else if (CC != MemOpCC) | |||
316 | // Can't handle mixed conditions with memory operands. | |||
317 | SkipGroup = true; | |||
318 | } | |||
319 | // Check if we were relying on zero-extending behavior of the CMOV. | |||
320 | if (!SkipGroup && | |||
321 | llvm::any_of( | |||
322 | MRI->use_nodbg_instructions(I.defs().begin()->getReg()), | |||
323 | [&](MachineInstr &UseI) { | |||
324 | return UseI.getOpcode() == X86::SUBREG_TO_REG; | |||
325 | })) | |||
326 | // FIXME: We should model the cost of using an explicit MOV to handle | |||
327 | // the zero-extension rather than just refusing to handle this. | |||
328 | SkipGroup = true; | |||
329 | continue; | |||
330 | } | |||
331 | // If Group is empty, keep looking for first CMOV in the range. | |||
332 | if (Group.empty()) | |||
333 | continue; | |||
334 | ||||
335 | // We found a non X86::CMOVrr instruction. | |||
336 | FoundNonCMOVInst = true; | |||
337 | // Check if this instruction define EFLAGS, to determine end of processed | |||
338 | // range, as there would be no more instructions using current EFLAGS def. | |||
339 | if (I.definesRegister(X86::EFLAGS)) { | |||
340 | // Check if current processed CMOV-group should not be skipped and add | |||
341 | // it as a CMOV-group-candidate. | |||
342 | if (!SkipGroup) | |||
343 | CmovInstGroups.push_back(Group); | |||
344 | else | |||
345 | ++NumOfSkippedCmovGroups; | |||
346 | Group.clear(); | |||
347 | } | |||
348 | } | |||
349 | // End of basic block is considered end of range, check if current processed | |||
350 | // CMOV-group should not be skipped and add it as a CMOV-group-candidate. | |||
351 | if (Group.empty()) | |||
352 | continue; | |||
353 | if (!SkipGroup) | |||
354 | CmovInstGroups.push_back(Group); | |||
355 | else | |||
356 | ++NumOfSkippedCmovGroups; | |||
357 | } | |||
358 | ||||
359 | NumOfCmovGroupCandidate += CmovInstGroups.size(); | |||
360 | return !CmovInstGroups.empty(); | |||
361 | } | |||
362 | ||||
363 | /// \returns Depth of CMOV instruction as if it was converted into branch. | |||
364 | /// \param TrueOpDepth depth cost of CMOV true value operand. | |||
365 | /// \param FalseOpDepth depth cost of CMOV false value operand. | |||
366 | static unsigned getDepthOfOptCmov(unsigned TrueOpDepth, unsigned FalseOpDepth) { | |||
367 | //===--------------------------------------------------------------------===// | |||
368 | // With no info about branch weight, we assume 50% for each value operand. | |||
369 | // Thus, depth of optimized CMOV instruction is the rounded up average of | |||
370 | // its True-Operand-Value-Depth and False-Operand-Value-Depth. | |||
371 | //===--------------------------------------------------------------------===// | |||
372 | return (TrueOpDepth + FalseOpDepth + 1) / 2; | |||
373 | } | |||
374 | ||||
375 | bool X86CmovConverterPass::checkForProfitableCmovCandidates( | |||
376 | ArrayRef<MachineBasicBlock *> Blocks, CmovGroups &CmovInstGroups) { | |||
377 | struct DepthInfo { | |||
378 | /// Depth of original loop. | |||
379 | unsigned Depth; | |||
380 | /// Depth of optimized loop. | |||
381 | unsigned OptDepth; | |||
382 | }; | |||
383 | /// Number of loop iterations to calculate depth for ?! | |||
384 | static const unsigned LoopIterations = 2; | |||
385 | DenseMap<MachineInstr *, DepthInfo> DepthMap; | |||
386 | DepthInfo LoopDepth[LoopIterations] = {{0, 0}, {0, 0}}; | |||
387 | enum { PhyRegType = 0, VirRegType = 1, RegTypeNum = 2 }; | |||
388 | /// For each register type maps the register to its last def instruction. | |||
389 | DenseMap<unsigned, MachineInstr *> RegDefMaps[RegTypeNum]; | |||
390 | /// Maps register operand to its def instruction, which can be nullptr if it | |||
391 | /// is unknown (e.g., operand is defined outside the loop). | |||
392 | DenseMap<MachineOperand *, MachineInstr *> OperandToDefMap; | |||
393 | ||||
394 | // Set depth of unknown instruction (i.e., nullptr) to zero. | |||
395 | DepthMap[nullptr] = {0, 0}; | |||
396 | ||||
397 | SmallPtrSet<MachineInstr *, 4> CmovInstructions; | |||
398 | for (auto &Group : CmovInstGroups) | |||
399 | CmovInstructions.insert(Group.begin(), Group.end()); | |||
400 | ||||
401 | //===--------------------------------------------------------------------===// | |||
402 | // Step 1: Calculate instruction depth and loop depth. | |||
403 | // Optimized-Loop: | |||
404 | // loop with CMOV-group-candidates converted into branches. | |||
405 | // | |||
406 | // Instruction-Depth: | |||
407 | // instruction latency + max operand depth. | |||
408 | // * For CMOV instruction in optimized loop the depth is calculated as: | |||
409 | // CMOV latency + getDepthOfOptCmov(True-Op-Depth, False-Op-depth) | |||
410 | // TODO: Find a better way to estimate the latency of the branch instruction | |||
411 | // rather than using the CMOV latency. | |||
412 | // | |||
413 | // Loop-Depth: | |||
414 | // max instruction depth of all instructions in the loop. | |||
415 | // Note: instruction with max depth represents the critical-path in the loop. | |||
416 | // | |||
417 | // Loop-Depth[i]: | |||
418 | // Loop-Depth calculated for first `i` iterations. | |||
419 | // Note: it is enough to calculate depth for up to two iterations. | |||
420 | // | |||
421 | // Depth-Diff[i]: | |||
422 | // Number of cycles saved in first 'i` iterations by optimizing the loop. | |||
423 | //===--------------------------------------------------------------------===// | |||
424 | for (unsigned I = 0; I < LoopIterations; ++I) { | |||
425 | DepthInfo &MaxDepth = LoopDepth[I]; | |||
426 | for (auto *MBB : Blocks) { | |||
427 | // Clear physical registers Def map. | |||
428 | RegDefMaps[PhyRegType].clear(); | |||
429 | for (MachineInstr &MI : *MBB) { | |||
430 | // Skip debug instructions. | |||
431 | if (MI.isDebugInstr()) | |||
432 | continue; | |||
433 | unsigned MIDepth = 0; | |||
434 | unsigned MIDepthOpt = 0; | |||
435 | bool IsCMOV = CmovInstructions.count(&MI); | |||
436 | for (auto &MO : MI.uses()) { | |||
437 | // Checks for "isUse()" as "uses()" returns also implicit definitions. | |||
438 | if (!MO.isReg() || !MO.isUse()) | |||
439 | continue; | |||
440 | unsigned Reg = MO.getReg(); | |||
441 | auto &RDM = RegDefMaps[TargetRegisterInfo::isVirtualRegister(Reg)]; | |||
442 | if (MachineInstr *DefMI = RDM.lookup(Reg)) { | |||
443 | OperandToDefMap[&MO] = DefMI; | |||
444 | DepthInfo Info = DepthMap.lookup(DefMI); | |||
445 | MIDepth = std::max(MIDepth, Info.Depth); | |||
446 | if (!IsCMOV) | |||
447 | MIDepthOpt = std::max(MIDepthOpt, Info.OptDepth); | |||
448 | } | |||
449 | } | |||
450 | ||||
451 | if (IsCMOV) | |||
452 | MIDepthOpt = getDepthOfOptCmov( | |||
453 | DepthMap[OperandToDefMap.lookup(&MI.getOperand(1))].OptDepth, | |||
454 | DepthMap[OperandToDefMap.lookup(&MI.getOperand(2))].OptDepth); | |||
455 | ||||
456 | // Iterates over all operands to handle implicit definitions as well. | |||
457 | for (auto &MO : MI.operands()) { | |||
458 | if (!MO.isReg() || !MO.isDef()) | |||
459 | continue; | |||
460 | unsigned Reg = MO.getReg(); | |||
461 | RegDefMaps[TargetRegisterInfo::isVirtualRegister(Reg)][Reg] = &MI; | |||
462 | } | |||
463 | ||||
464 | unsigned Latency = TSchedModel.computeInstrLatency(&MI); | |||
465 | DepthMap[&MI] = {MIDepth += Latency, MIDepthOpt += Latency}; | |||
466 | MaxDepth.Depth = std::max(MaxDepth.Depth, MIDepth); | |||
467 | MaxDepth.OptDepth = std::max(MaxDepth.OptDepth, MIDepthOpt); | |||
468 | } | |||
469 | } | |||
470 | } | |||
471 | ||||
472 | unsigned Diff[LoopIterations] = {LoopDepth[0].Depth - LoopDepth[0].OptDepth, | |||
473 | LoopDepth[1].Depth - LoopDepth[1].OptDepth}; | |||
474 | ||||
475 | //===--------------------------------------------------------------------===// | |||
476 | // Step 2: Check if Loop worth to be optimized. | |||
477 | // Worth-Optimize-Loop: | |||
478 | // case 1: Diff[1] == Diff[0] | |||
479 | // Critical-path is iteration independent - there is no dependency | |||
480 | // of critical-path instructions on critical-path instructions of | |||
481 | // previous iteration. | |||
482 | // Thus, it is enough to check gain percent of 1st iteration - | |||
483 | // To be conservative, the optimized loop need to have a depth of | |||
484 | // 12.5% cycles less than original loop, per iteration. | |||
485 | // | |||
486 | // case 2: Diff[1] > Diff[0] | |||
487 | // Critical-path is iteration dependent - there is dependency of | |||
488 | // critical-path instructions on critical-path instructions of | |||
489 | // previous iteration. | |||
490 | // Thus, check the gain percent of the 2nd iteration (similar to the | |||
491 | // previous case), but it is also required to check the gradient of | |||
492 | // the gain - the change in Depth-Diff compared to the change in | |||
493 | // Loop-Depth between 1st and 2nd iterations. | |||
494 | // To be conservative, the gradient need to be at least 50%. | |||
495 | // | |||
496 | // In addition, In order not to optimize loops with very small gain, the | |||
497 | // gain (in cycles) after 2nd iteration should not be less than a given | |||
498 | // threshold. Thus, the check (Diff[1] >= GainCycleThreshold) must apply. | |||
499 | // | |||
500 | // If loop is not worth optimizing, remove all CMOV-group-candidates. | |||
501 | //===--------------------------------------------------------------------===// | |||
502 | if (Diff[1] < GainCycleThreshold) | |||
503 | return false; | |||
504 | ||||
505 | bool WorthOptLoop = false; | |||
506 | if (Diff[1] == Diff[0]) | |||
507 | WorthOptLoop = Diff[0] * 8 >= LoopDepth[0].Depth; | |||
508 | else if (Diff[1] > Diff[0]) | |||
509 | WorthOptLoop = | |||
510 | (Diff[1] - Diff[0]) * 2 >= (LoopDepth[1].Depth - LoopDepth[0].Depth) && | |||
511 | (Diff[1] * 8 >= LoopDepth[1].Depth); | |||
512 | ||||
513 | if (!WorthOptLoop) | |||
514 | return false; | |||
515 | ||||
516 | ++NumOfLoopCandidate; | |||
517 | ||||
518 | //===--------------------------------------------------------------------===// | |||
519 | // Step 3: Check for each CMOV-group-candidate if it worth to be optimized. | |||
520 | // Worth-Optimize-Group: | |||
521 | // Iff it worths to optimize all CMOV instructions in the group. | |||
522 | // | |||
523 | // Worth-Optimize-CMOV: | |||
524 | // Predicted branch is faster than CMOV by the difference between depth of | |||
525 | // condition operand and depth of taken (predicted) value operand. | |||
526 | // To be conservative, the gain of such CMOV transformation should cover at | |||
527 | // at least 25% of branch-misprediction-penalty. | |||
528 | //===--------------------------------------------------------------------===// | |||
529 | unsigned MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty; | |||
530 | CmovGroups TempGroups; | |||
531 | std::swap(TempGroups, CmovInstGroups); | |||
532 | for (auto &Group : TempGroups) { | |||
533 | bool WorthOpGroup = true; | |||
534 | for (auto *MI : Group) { | |||
535 | // Avoid CMOV instruction which value is used as a pointer to load from. | |||
536 | // This is another conservative check to avoid converting CMOV instruction | |||
537 | // used with tree-search like algorithm, where the branch is unpredicted. | |||
538 | auto UIs = MRI->use_instructions(MI->defs().begin()->getReg()); | |||
539 | if (UIs.begin() != UIs.end() && ++UIs.begin() == UIs.end()) { | |||
540 | unsigned Op = UIs.begin()->getOpcode(); | |||
541 | if (Op == X86::MOV64rm || Op == X86::MOV32rm) { | |||
542 | WorthOpGroup = false; | |||
543 | break; | |||
544 | } | |||
545 | } | |||
546 | ||||
547 | unsigned CondCost = | |||
548 | DepthMap[OperandToDefMap.lookup(&MI->getOperand(4))].Depth; | |||
549 | unsigned ValCost = getDepthOfOptCmov( | |||
550 | DepthMap[OperandToDefMap.lookup(&MI->getOperand(1))].Depth, | |||
551 | DepthMap[OperandToDefMap.lookup(&MI->getOperand(2))].Depth); | |||
552 | if (ValCost > CondCost || (CondCost - ValCost) * 4 < MispredictPenalty) { | |||
553 | WorthOpGroup = false; | |||
554 | break; | |||
555 | } | |||
556 | } | |||
557 | ||||
558 | if (WorthOpGroup) | |||
559 | CmovInstGroups.push_back(Group); | |||
560 | } | |||
561 | ||||
562 | return !CmovInstGroups.empty(); | |||
563 | } | |||
564 | ||||
565 | static bool checkEFLAGSLive(MachineInstr *MI) { | |||
566 | if (MI->killsRegister(X86::EFLAGS)) | |||
567 | return false; | |||
568 | ||||
569 | // The EFLAGS operand of MI might be missing a kill marker. | |||
570 | // Figure out whether EFLAGS operand should LIVE after MI instruction. | |||
571 | MachineBasicBlock *BB = MI->getParent(); | |||
572 | MachineBasicBlock::iterator ItrMI = MI; | |||
573 | ||||
574 | // Scan forward through BB for a use/def of EFLAGS. | |||
575 | for (auto I = std::next(ItrMI), E = BB->end(); I != E; ++I) { | |||
576 | if (I->readsRegister(X86::EFLAGS)) | |||
577 | return true; | |||
578 | if (I->definesRegister(X86::EFLAGS)) | |||
579 | return false; | |||
580 | } | |||
581 | ||||
582 | // We hit the end of the block, check whether EFLAGS is live into a successor. | |||
583 | for (auto I = BB->succ_begin(), E = BB->succ_end(); I != E; ++I) { | |||
584 | if ((*I)->isLiveIn(X86::EFLAGS)) | |||
585 | return true; | |||
586 | } | |||
587 | ||||
588 | return false; | |||
589 | } | |||
590 | ||||
591 | /// Given /p First CMOV instruction and /p Last CMOV instruction representing a | |||
592 | /// group of CMOV instructions, which may contain debug instructions in between, | |||
593 | /// move all debug instructions to after the last CMOV instruction, making the | |||
594 | /// CMOV group consecutive. | |||
595 | static void packCmovGroup(MachineInstr *First, MachineInstr *Last) { | |||
596 | assert(X86::getCondFromCMov(*Last) != X86::COND_INVALID &&((X86::getCondFromCMov(*Last) != X86::COND_INVALID && "Last instruction in a CMOV group must be a CMOV instruction" ) ? static_cast<void> (0) : __assert_fail ("X86::getCondFromCMov(*Last) != X86::COND_INVALID && \"Last instruction in a CMOV group must be a CMOV instruction\"" , "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86CmovConversion.cpp" , 597, __PRETTY_FUNCTION__)) | |||
597 | "Last instruction in a CMOV group must be a CMOV instruction")((X86::getCondFromCMov(*Last) != X86::COND_INVALID && "Last instruction in a CMOV group must be a CMOV instruction" ) ? static_cast<void> (0) : __assert_fail ("X86::getCondFromCMov(*Last) != X86::COND_INVALID && \"Last instruction in a CMOV group must be a CMOV instruction\"" , "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86CmovConversion.cpp" , 597, __PRETTY_FUNCTION__)); | |||
598 | ||||
599 | SmallVector<MachineInstr *, 2> DBGInstructions; | |||
600 | for (auto I = First->getIterator(), E = Last->getIterator(); I != E; I++) { | |||
601 | if (I->isDebugInstr()) | |||
602 | DBGInstructions.push_back(&*I); | |||
603 | } | |||
604 | ||||
605 | // Splice the debug instruction after the cmov group. | |||
606 | MachineBasicBlock *MBB = First->getParent(); | |||
607 | for (auto *MI : DBGInstructions) | |||
608 | MBB->insertAfter(Last, MI->removeFromParent()); | |||
609 | } | |||
610 | ||||
611 | void X86CmovConverterPass::convertCmovInstsToBranches( | |||
612 | SmallVectorImpl<MachineInstr *> &Group) const { | |||
613 | assert(!Group.empty() && "No CMOV instructions to convert")((!Group.empty() && "No CMOV instructions to convert" ) ? static_cast<void> (0) : __assert_fail ("!Group.empty() && \"No CMOV instructions to convert\"" , "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86CmovConversion.cpp" , 613, __PRETTY_FUNCTION__)); | |||
614 | ++NumOfOptimizedCmovGroups; | |||
615 | ||||
616 | // If the CMOV group is not packed, e.g., there are debug instructions between | |||
617 | // first CMOV and last CMOV, then pack the group and make the CMOV instruction | |||
618 | // consecutive by moving the debug instructions to after the last CMOV. | |||
619 | packCmovGroup(Group.front(), Group.back()); | |||
620 | ||||
621 | // To convert a CMOVcc instruction, we actually have to insert the diamond | |||
622 | // control-flow pattern. The incoming instruction knows the destination vreg | |||
623 | // to set, the condition code register to branch on, the true/false values to | |||
624 | // select between, and a branch opcode to use. | |||
625 | ||||
626 | // Before | |||
627 | // ----- | |||
628 | // MBB: | |||
629 | // cond = cmp ... | |||
630 | // v1 = CMOVge t1, f1, cond | |||
631 | // v2 = CMOVlt t2, f2, cond | |||
632 | // v3 = CMOVge v1, f3, cond | |||
633 | // | |||
634 | // After | |||
635 | // ----- | |||
636 | // MBB: | |||
637 | // cond = cmp ... | |||
638 | // jge %SinkMBB | |||
639 | // | |||
640 | // FalseMBB: | |||
641 | // jmp %SinkMBB | |||
642 | // | |||
643 | // SinkMBB: | |||
644 | // %v1 = phi[%f1, %FalseMBB], [%t1, %MBB] | |||
645 | // %v2 = phi[%t2, %FalseMBB], [%f2, %MBB] ; For CMOV with OppCC switch | |||
646 | // ; true-value with false-value | |||
647 | // %v3 = phi[%f3, %FalseMBB], [%t1, %MBB] ; Phi instruction cannot use | |||
648 | // ; previous Phi instruction result | |||
649 | ||||
650 | MachineInstr &MI = *Group.front(); | |||
651 | MachineInstr *LastCMOV = Group.back(); | |||
652 | DebugLoc DL = MI.getDebugLoc(); | |||
653 | ||||
654 | X86::CondCode CC = X86::CondCode(X86::getCondFromCMov(MI)); | |||
655 | X86::CondCode OppCC = X86::GetOppositeBranchCondition(CC); | |||
656 | // Potentially swap the condition codes so that any memory operand to a CMOV | |||
657 | // is in the *false* position instead of the *true* position. We can invert | |||
658 | // any non-memory operand CMOV instructions to cope with this and we ensure | |||
659 | // memory operand CMOVs are only included with a single condition code. | |||
660 | if (llvm::any_of(Group, [&](MachineInstr *I) { | |||
661 | return I->mayLoad() && X86::getCondFromCMov(*I) == CC; | |||
662 | })) | |||
663 | std::swap(CC, OppCC); | |||
664 | ||||
665 | MachineBasicBlock *MBB = MI.getParent(); | |||
666 | MachineFunction::iterator It = ++MBB->getIterator(); | |||
667 | MachineFunction *F = MBB->getParent(); | |||
668 | const BasicBlock *BB = MBB->getBasicBlock(); | |||
669 | ||||
670 | MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(BB); | |||
671 | MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(BB); | |||
672 | F->insert(It, FalseMBB); | |||
673 | F->insert(It, SinkMBB); | |||
674 | ||||
675 | // If the EFLAGS register isn't dead in the terminator, then claim that it's | |||
676 | // live into the sink and copy blocks. | |||
677 | if (checkEFLAGSLive(LastCMOV)) { | |||
678 | FalseMBB->addLiveIn(X86::EFLAGS); | |||
679 | SinkMBB->addLiveIn(X86::EFLAGS); | |||
680 | } | |||
681 | ||||
682 | // Transfer the remainder of BB and its successor edges to SinkMBB. | |||
683 | SinkMBB->splice(SinkMBB->begin(), MBB, | |||
684 | std::next(MachineBasicBlock::iterator(LastCMOV)), MBB->end()); | |||
685 | SinkMBB->transferSuccessorsAndUpdatePHIs(MBB); | |||
686 | ||||
687 | // Add the false and sink blocks as its successors. | |||
688 | MBB->addSuccessor(FalseMBB); | |||
689 | MBB->addSuccessor(SinkMBB); | |||
690 | ||||
691 | // Create the conditional branch instruction. | |||
692 | BuildMI(MBB, DL, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(CC); | |||
693 | ||||
694 | // Add the sink block to the false block successors. | |||
695 | FalseMBB->addSuccessor(SinkMBB); | |||
696 | ||||
697 | MachineInstrBuilder MIB; | |||
698 | MachineBasicBlock::iterator MIItBegin = MachineBasicBlock::iterator(MI); | |||
699 | MachineBasicBlock::iterator MIItEnd = | |||
700 | std::next(MachineBasicBlock::iterator(LastCMOV)); | |||
701 | MachineBasicBlock::iterator FalseInsertionPoint = FalseMBB->begin(); | |||
702 | MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin(); | |||
703 | ||||
704 | // First we need to insert an explicit load on the false path for any memory | |||
705 | // operand. We also need to potentially do register rewriting here, but it is | |||
706 | // simpler as the memory operands are always on the false path so we can | |||
707 | // simply take that input, whatever it is. | |||
708 | DenseMap<unsigned, unsigned> FalseBBRegRewriteTable; | |||
709 | for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd;) { | |||
710 | auto &MI = *MIIt++; | |||
711 | // Skip any CMOVs in this group which don't load from memory. | |||
712 | if (!MI.mayLoad()) { | |||
713 | // Remember the false-side register input. | |||
714 | unsigned FalseReg = | |||
715 | MI.getOperand(X86::getCondFromCMov(MI) == CC ? 1 : 2).getReg(); | |||
716 | // Walk back through any intermediate cmovs referenced. | |||
717 | while (true) { | |||
718 | auto FRIt = FalseBBRegRewriteTable.find(FalseReg); | |||
719 | if (FRIt == FalseBBRegRewriteTable.end()) | |||
720 | break; | |||
721 | FalseReg = FRIt->second; | |||
722 | } | |||
723 | FalseBBRegRewriteTable[MI.getOperand(0).getReg()] = FalseReg; | |||
724 | continue; | |||
725 | } | |||
726 | ||||
727 | // The condition must be the *opposite* of the one we've decided to branch | |||
728 | // on as the branch will go *around* the load and the load should happen | |||
729 | // when the CMOV condition is false. | |||
730 | assert(X86::getCondFromCMov(MI) == OppCC &&((X86::getCondFromCMov(MI) == OppCC && "Can only handle memory-operand cmov instructions with a condition " "opposite to the selected branch direction.") ? static_cast< void> (0) : __assert_fail ("X86::getCondFromCMov(MI) == OppCC && \"Can only handle memory-operand cmov instructions with a condition \" \"opposite to the selected branch direction.\"" , "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86CmovConversion.cpp" , 732, __PRETTY_FUNCTION__)) | |||
731 | "Can only handle memory-operand cmov instructions with a condition "((X86::getCondFromCMov(MI) == OppCC && "Can only handle memory-operand cmov instructions with a condition " "opposite to the selected branch direction.") ? static_cast< void> (0) : __assert_fail ("X86::getCondFromCMov(MI) == OppCC && \"Can only handle memory-operand cmov instructions with a condition \" \"opposite to the selected branch direction.\"" , "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86CmovConversion.cpp" , 732, __PRETTY_FUNCTION__)) | |||
732 | "opposite to the selected branch direction.")((X86::getCondFromCMov(MI) == OppCC && "Can only handle memory-operand cmov instructions with a condition " "opposite to the selected branch direction.") ? static_cast< void> (0) : __assert_fail ("X86::getCondFromCMov(MI) == OppCC && \"Can only handle memory-operand cmov instructions with a condition \" \"opposite to the selected branch direction.\"" , "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86CmovConversion.cpp" , 732, __PRETTY_FUNCTION__)); | |||
733 | ||||
734 | // The goal is to rewrite the cmov from: | |||
735 | // | |||
736 | // MBB: | |||
737 | // %A = CMOVcc %B (tied), (mem) | |||
738 | // | |||
739 | // to | |||
740 | // | |||
741 | // MBB: | |||
742 | // %A = CMOVcc %B (tied), %C | |||
743 | // FalseMBB: | |||
744 | // %C = MOV (mem) | |||
745 | // | |||
746 | // Which will allow the next loop to rewrite the CMOV in terms of a PHI: | |||
747 | // | |||
748 | // MBB: | |||
749 | // JMP!cc SinkMBB | |||
750 | // FalseMBB: | |||
751 | // %C = MOV (mem) | |||
752 | // SinkMBB: | |||
753 | // %A = PHI [ %C, FalseMBB ], [ %B, MBB] | |||
754 | ||||
755 | // Get a fresh register to use as the destination of the MOV. | |||
756 | const TargetRegisterClass *RC = MRI->getRegClass(MI.getOperand(0).getReg()); | |||
757 | unsigned TmpReg = MRI->createVirtualRegister(RC); | |||
758 | ||||
759 | SmallVector<MachineInstr *, 4> NewMIs; | |||
760 | bool Unfolded = TII->unfoldMemoryOperand(*MBB->getParent(), MI, TmpReg, | |||
761 | /*UnfoldLoad*/ true, | |||
762 | /*UnfoldStore*/ false, NewMIs); | |||
763 | (void)Unfolded; | |||
764 | assert(Unfolded && "Should never fail to unfold a loading cmov!")((Unfolded && "Should never fail to unfold a loading cmov!" ) ? static_cast<void> (0) : __assert_fail ("Unfolded && \"Should never fail to unfold a loading cmov!\"" , "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86CmovConversion.cpp" , 764, __PRETTY_FUNCTION__)); | |||
765 | ||||
766 | // Move the new CMOV to just before the old one and reset any impacted | |||
767 | // iterator. | |||
768 | auto *NewCMOV = NewMIs.pop_back_val(); | |||
769 | assert(X86::getCondFromCMov(*NewCMOV) == OppCC &&((X86::getCondFromCMov(*NewCMOV) == OppCC && "Last new instruction isn't the expected CMOV!" ) ? static_cast<void> (0) : __assert_fail ("X86::getCondFromCMov(*NewCMOV) == OppCC && \"Last new instruction isn't the expected CMOV!\"" , "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86CmovConversion.cpp" , 770, __PRETTY_FUNCTION__)) | |||
770 | "Last new instruction isn't the expected CMOV!")((X86::getCondFromCMov(*NewCMOV) == OppCC && "Last new instruction isn't the expected CMOV!" ) ? static_cast<void> (0) : __assert_fail ("X86::getCondFromCMov(*NewCMOV) == OppCC && \"Last new instruction isn't the expected CMOV!\"" , "/build/llvm-toolchain-snapshot-9~svn361465/lib/Target/X86/X86CmovConversion.cpp" , 770, __PRETTY_FUNCTION__)); | |||
771 | LLVM_DEBUG(dbgs() << "\tRewritten cmov: "; NewCMOV->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-cmov-conversion")) { dbgs() << "\tRewritten cmov: " ; NewCMOV->dump(); } } while (false); | |||
772 | MBB->insert(MachineBasicBlock::iterator(MI), NewCMOV); | |||
773 | if (&*MIItBegin == &MI) | |||
774 | MIItBegin = MachineBasicBlock::iterator(NewCMOV); | |||
775 | ||||
776 | // Sink whatever instructions were needed to produce the unfolded operand | |||
777 | // into the false block. | |||
778 | for (auto *NewMI : NewMIs) { | |||
779 | LLVM_DEBUG(dbgs() << "\tRewritten load instr: "; NewMI->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-cmov-conversion")) { dbgs() << "\tRewritten load instr: " ; NewMI->dump(); } } while (false); | |||
780 | FalseMBB->insert(FalseInsertionPoint, NewMI); | |||
781 | // Re-map any operands that are from other cmovs to the inputs for this block. | |||
782 | for (auto &MOp : NewMI->uses()) { | |||
783 | if (!MOp.isReg()) | |||
784 | continue; | |||
785 | auto It = FalseBBRegRewriteTable.find(MOp.getReg()); | |||
786 | if (It == FalseBBRegRewriteTable.end()) | |||
787 | continue; | |||
788 | ||||
789 | MOp.setReg(It->second); | |||
790 | // This might have been a kill when it referenced the cmov result, but | |||
791 | // it won't necessarily be once rewritten. | |||
792 | // FIXME: We could potentially improve this by tracking whether the | |||
793 | // operand to the cmov was also a kill, and then skipping the PHI node | |||
794 | // construction below. | |||
795 | MOp.setIsKill(false); | |||
796 | } | |||
797 | } | |||
798 | MBB->erase(MachineBasicBlock::iterator(MI), | |||
799 | std::next(MachineBasicBlock::iterator(MI))); | |||
800 | ||||
801 | // Add this PHI to the rewrite table. | |||
802 | FalseBBRegRewriteTable[NewCMOV->getOperand(0).getReg()] = TmpReg; | |||
803 | } | |||
804 | ||||
805 | // As we are creating the PHIs, we have to be careful if there is more than | |||
806 | // one. Later CMOVs may reference the results of earlier CMOVs, but later | |||
807 | // PHIs have to reference the individual true/false inputs from earlier PHIs. | |||
808 | // That also means that PHI construction must work forward from earlier to | |||
809 | // later, and that the code must maintain a mapping from earlier PHI's | |||
810 | // destination registers, and the registers that went into the PHI. | |||
811 | DenseMap<unsigned, std::pair<unsigned, unsigned>> RegRewriteTable; | |||
812 | ||||
813 | for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) { | |||
814 | unsigned DestReg = MIIt->getOperand(0).getReg(); | |||
815 | unsigned Op1Reg = MIIt->getOperand(1).getReg(); | |||
816 | unsigned Op2Reg = MIIt->getOperand(2).getReg(); | |||
817 | ||||
818 | // If this CMOV we are processing is the opposite condition from the jump we | |||
819 | // generated, then we have to swap the operands for the PHI that is going to | |||
820 | // be generated. | |||
821 | if (X86::getCondFromCMov(*MIIt) == OppCC) | |||
822 | std::swap(Op1Reg, Op2Reg); | |||
823 | ||||
824 | auto Op1Itr = RegRewriteTable.find(Op1Reg); | |||
825 | if (Op1Itr != RegRewriteTable.end()) | |||
826 | Op1Reg = Op1Itr->second.first; | |||
827 | ||||
828 | auto Op2Itr = RegRewriteTable.find(Op2Reg); | |||
829 | if (Op2Itr != RegRewriteTable.end()) | |||
830 | Op2Reg = Op2Itr->second.second; | |||
831 | ||||
832 | // SinkMBB: | |||
833 | // %Result = phi [ %FalseValue, FalseMBB ], [ %TrueValue, MBB ] | |||
834 | // ... | |||
835 | MIB = BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(X86::PHI), DestReg) | |||
836 | .addReg(Op1Reg) | |||
837 | .addMBB(FalseMBB) | |||
838 | .addReg(Op2Reg) | |||
839 | .addMBB(MBB); | |||
840 | (void)MIB; | |||
841 | LLVM_DEBUG(dbgs() << "\tFrom: "; MIIt->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-cmov-conversion")) { dbgs() << "\tFrom: "; MIIt-> dump(); } } while (false); | |||
842 | LLVM_DEBUG(dbgs() << "\tTo: "; MIB->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-cmov-conversion")) { dbgs() << "\tTo: "; MIB-> dump(); } } while (false); | |||
843 | ||||
844 | // Add this PHI to the rewrite table. | |||
845 | RegRewriteTable[DestReg] = std::make_pair(Op1Reg, Op2Reg); | |||
846 | } | |||
847 | ||||
848 | // Now remove the CMOV(s). | |||
849 | MBB->erase(MIItBegin, MIItEnd); | |||
850 | } | |||
851 | ||||
852 | INITIALIZE_PASS_BEGIN(X86CmovConverterPass, DEBUG_TYPE, "X86 cmov Conversion",static void *initializeX86CmovConverterPassPassOnce(PassRegistry &Registry) { | |||
853 | false, false)static void *initializeX86CmovConverterPassPassOnce(PassRegistry &Registry) { | |||
854 | INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)initializeMachineLoopInfoPass(Registry); | |||
855 | INITIALIZE_PASS_END(X86CmovConverterPass, DEBUG_TYPE, "X86 cmov Conversion",PassInfo *PI = new PassInfo( "X86 cmov Conversion", "x86-cmov-conversion" , &X86CmovConverterPass::ID, PassInfo::NormalCtor_t(callDefaultCtor <X86CmovConverterPass>), false, false); Registry.registerPass (*PI, true); return PI; } static llvm::once_flag InitializeX86CmovConverterPassPassFlag ; void llvm::initializeX86CmovConverterPassPass(PassRegistry & Registry) { llvm::call_once(InitializeX86CmovConverterPassPassFlag , initializeX86CmovConverterPassPassOnce, std::ref(Registry)) ; } | |||
856 | false, false)PassInfo *PI = new PassInfo( "X86 cmov Conversion", "x86-cmov-conversion" , &X86CmovConverterPass::ID, PassInfo::NormalCtor_t(callDefaultCtor <X86CmovConverterPass>), false, false); Registry.registerPass (*PI, true); return PI; } static llvm::once_flag InitializeX86CmovConverterPassPassFlag ; void llvm::initializeX86CmovConverterPassPass(PassRegistry & Registry) { llvm::call_once(InitializeX86CmovConverterPassPassFlag , initializeX86CmovConverterPassPassOnce, std::ref(Registry)) ; } | |||
857 | ||||
858 | FunctionPass *llvm::createX86CmovConverterPass() { | |||
859 | return new X86CmovConverterPass(); | |||
860 | } |