LLVM 19.0.0git
X86FlagsCopyLowering.cpp
Go to the documentation of this file.
1//====- X86FlagsCopyLowering.cpp - Lowers COPY nodes of EFLAGS ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// Lowers COPY nodes of EFLAGS by directly extracting and preserving individual
11/// flag bits.
12///
13/// We have to do this by carefully analyzing and rewriting the usage of the
14/// copied EFLAGS register because there is no general way to rematerialize the
15/// entire EFLAGS register safely and efficiently. Using `popf` both forces
16/// dynamic stack adjustment and can create correctness issues due to IF, TF,
17/// and other non-status flags being overwritten. Using sequences involving
18/// SAHF don't work on all x86 processors and are often quite slow compared to
19/// directly testing a single status preserved in its own GPR.
20///
21//===----------------------------------------------------------------------===//
22
23#include "X86.h"
24#include "X86InstrBuilder.h"
25#include "X86InstrInfo.h"
26#include "X86Subtarget.h"
28#include "llvm/ADT/STLExtras.h"
29#include "llvm/ADT/ScopeExit.h"
32#include "llvm/ADT/Statistic.h"
48#include "llvm/IR/DebugLoc.h"
49#include "llvm/MC/MCSchedule.h"
50#include "llvm/Pass.h"
52#include "llvm/Support/Debug.h"
54#include <algorithm>
55#include <cassert>
56#include <iterator>
57#include <utility>
58
59using namespace llvm;
60
61#define PASS_KEY "x86-flags-copy-lowering"
62#define DEBUG_TYPE PASS_KEY
63
64STATISTIC(NumCopiesEliminated, "Number of copies of EFLAGS eliminated");
65STATISTIC(NumSetCCsInserted, "Number of setCC instructions inserted");
66STATISTIC(NumTestsInserted, "Number of test instructions inserted");
67STATISTIC(NumAddsInserted, "Number of adds instructions inserted");
68
69namespace {
70
71// Convenient array type for storing registers associated with each condition.
72using CondRegArray = std::array<unsigned, X86::LAST_VALID_COND + 1>;
73
74class X86FlagsCopyLoweringPass : public MachineFunctionPass {
75public:
76 X86FlagsCopyLoweringPass() : MachineFunctionPass(ID) {}
77
78 StringRef getPassName() const override { return "X86 EFLAGS copy lowering"; }
79 bool runOnMachineFunction(MachineFunction &MF) override;
80 void getAnalysisUsage(AnalysisUsage &AU) const override;
81
82 /// Pass identification, replacement for typeid.
83 static char ID;
84
85private:
86 MachineRegisterInfo *MRI = nullptr;
87 const X86Subtarget *Subtarget = nullptr;
88 const X86InstrInfo *TII = nullptr;
89 const TargetRegisterInfo *TRI = nullptr;
90 const TargetRegisterClass *PromoteRC = nullptr;
91 MachineDominatorTree *MDT = nullptr;
92
93 CondRegArray collectCondsInRegs(MachineBasicBlock &MBB,
95
96 Register promoteCondToReg(MachineBasicBlock &MBB,
98 const DebugLoc &TestLoc, X86::CondCode Cond);
99 std::pair<unsigned, bool> getCondOrInverseInReg(
101 const DebugLoc &TestLoc, X86::CondCode Cond, CondRegArray &CondRegs);
103 const DebugLoc &Loc, unsigned Reg);
104
105 void rewriteSetCC(MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos,
106 const DebugLoc &Loc, MachineInstr &MI,
107 CondRegArray &CondRegs);
108 void rewriteArithmetic(MachineBasicBlock &MBB,
110 MachineInstr &MI, CondRegArray &CondRegs);
112 const DebugLoc &Loc, MachineInstr &MI, CondRegArray &CondRegs);
113};
114
115} // end anonymous namespace
116
117INITIALIZE_PASS_BEGIN(X86FlagsCopyLoweringPass, DEBUG_TYPE,
118 "X86 EFLAGS copy lowering", false, false)
119INITIALIZE_PASS_END(X86FlagsCopyLoweringPass, DEBUG_TYPE,
120 "X86 EFLAGS copy lowering", false, false)
121
123 return new X86FlagsCopyLoweringPass();
124}
125
126char X86FlagsCopyLoweringPass::ID = 0;
127
128void X86FlagsCopyLoweringPass::getAnalysisUsage(AnalysisUsage &AU) const {
131}
132
133static bool isArithmeticOp(unsigned Opc) {
134 return X86::isADC(Opc) || X86::isSBB(Opc) || X86::isRCL(Opc) ||
135 X86::isRCR(Opc) || (Opc == X86::SETB_C32r || Opc == X86::SETB_C64r);
136}
137
139 MachineInstr &SplitI,
140 const X86InstrInfo &TII) {
142
143 assert(SplitI.getParent() == &MBB &&
144 "Split instruction must be in the split block!");
145 assert(SplitI.isBranch() &&
146 "Only designed to split a tail of branch instructions!");
148 "Must split on an actual jCC instruction!");
149
150 // Dig out the previous instruction to the split point.
151 MachineInstr &PrevI = *std::prev(SplitI.getIterator());
152 assert(PrevI.isBranch() && "Must split after a branch!");
154 "Must split after an actual jCC instruction!");
155 assert(!std::prev(PrevI.getIterator())->isTerminator() &&
156 "Must only have this one terminator prior to the split!");
157
158 // Grab the one successor edge that will stay in `MBB`.
159 MachineBasicBlock &UnsplitSucc = *PrevI.getOperand(0).getMBB();
160
161 // Analyze the original block to see if we are actually splitting an edge
162 // into two edges. This can happen when we have multiple conditional jumps to
163 // the same successor.
164 bool IsEdgeSplit =
165 std::any_of(SplitI.getIterator(), MBB.instr_end(),
166 [&](MachineInstr &MI) {
167 assert(MI.isTerminator() &&
168 "Should only have spliced terminators!");
169 return llvm::any_of(
170 MI.operands(), [&](MachineOperand &MOp) {
171 return MOp.isMBB() && MOp.getMBB() == &UnsplitSucc;
172 });
173 }) ||
174 MBB.getFallThrough() == &UnsplitSucc;
175
176 MachineBasicBlock &NewMBB = *MF.CreateMachineBasicBlock();
177
178 // Insert the new block immediately after the current one. Any existing
179 // fallthrough will be sunk into this new block anyways.
180 MF.insert(std::next(MachineFunction::iterator(&MBB)), &NewMBB);
181
182 // Splice the tail of instructions into the new block.
183 NewMBB.splice(NewMBB.end(), &MBB, SplitI.getIterator(), MBB.end());
184
185 // Copy the necessary succesors (and their probability info) into the new
186 // block.
187 for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI)
188 if (IsEdgeSplit || *SI != &UnsplitSucc)
189 NewMBB.copySuccessor(&MBB, SI);
190 // Normalize the probabilities if we didn't end up splitting the edge.
191 if (!IsEdgeSplit)
192 NewMBB.normalizeSuccProbs();
193
194 // Now replace all of the moved successors in the original block with the new
195 // block. This will merge their probabilities.
196 for (MachineBasicBlock *Succ : NewMBB.successors())
197 if (Succ != &UnsplitSucc)
198 MBB.replaceSuccessor(Succ, &NewMBB);
199
200 // We should always end up replacing at least one successor.
201 assert(MBB.isSuccessor(&NewMBB) &&
202 "Failed to make the new block a successor!");
203
204 // Now update all the PHIs.
205 for (MachineBasicBlock *Succ : NewMBB.successors()) {
206 for (MachineInstr &MI : *Succ) {
207 if (!MI.isPHI())
208 break;
209
210 for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps;
211 OpIdx += 2) {
212 MachineOperand &OpV = MI.getOperand(OpIdx);
213 MachineOperand &OpMBB = MI.getOperand(OpIdx + 1);
214 assert(OpMBB.isMBB() && "Block operand to a PHI is not a block!");
215 if (OpMBB.getMBB() != &MBB)
216 continue;
217
218 // Replace the operand for unsplit successors
219 if (!IsEdgeSplit || Succ != &UnsplitSucc) {
220 OpMBB.setMBB(&NewMBB);
221
222 // We have to continue scanning as there may be multiple entries in
223 // the PHI.
224 continue;
225 }
226
227 // When we have split the edge append a new successor.
228 MI.addOperand(MF, OpV);
229 MI.addOperand(MF, MachineOperand::CreateMBB(&NewMBB));
230 break;
231 }
232 }
233 }
234
235 return NewMBB;
236}
237
238bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
239 LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
240 << " **********\n");
241
242 Subtarget = &MF.getSubtarget<X86Subtarget>();
243 MRI = &MF.getRegInfo();
244 TII = Subtarget->getInstrInfo();
245 TRI = Subtarget->getRegisterInfo();
246 MDT = &getAnalysis<MachineDominatorTree>();
247 PromoteRC = &X86::GR8RegClass;
248
249 if (MF.empty())
250 // Nothing to do for a degenerate empty function...
251 return false;
252
253 // Collect the copies in RPO so that when there are chains where a copy is in
254 // turn copied again we visit the first one first. This ensures we can find
255 // viable locations for testing the original EFLAGS that dominate all the
256 // uses across complex CFGs.
259 for (MachineBasicBlock *MBB : RPOT)
260 for (MachineInstr &MI : *MBB)
261 if (MI.getOpcode() == TargetOpcode::COPY &&
262 MI.getOperand(0).getReg() == X86::EFLAGS)
263 Copies.push_back(&MI);
264
265 for (MachineInstr *CopyI : Copies) {
266 MachineBasicBlock &MBB = *CopyI->getParent();
267
268 MachineOperand &VOp = CopyI->getOperand(1);
269 assert(VOp.isReg() &&
270 "The input to the copy for EFLAGS should always be a register!");
271 MachineInstr &CopyDefI = *MRI->getVRegDef(VOp.getReg());
272 if (CopyDefI.getOpcode() != TargetOpcode::COPY) {
273 // FIXME: The big likely candidate here are PHI nodes. We could in theory
274 // handle PHI nodes, but it gets really, really hard. Insanely hard. Hard
275 // enough that it is probably better to change every other part of LLVM
276 // to avoid creating them. The issue is that once we have PHIs we won't
277 // know which original EFLAGS value we need to capture with our setCCs
278 // below. The end result will be computing a complete set of setCCs that
279 // we *might* want, computing them in every place where we copy *out* of
280 // EFLAGS and then doing SSA formation on all of them to insert necessary
281 // PHI nodes and consume those here. Then hoping that somehow we DCE the
282 // unnecessary ones. This DCE seems very unlikely to be successful and so
283 // we will almost certainly end up with a glut of dead setCC
284 // instructions. Until we have a motivating test case and fail to avoid
285 // it by changing other parts of LLVM's lowering, we refuse to handle
286 // this complex case here.
288 dbgs() << "ERROR: Encountered unexpected def of an eflags copy: ";
289 CopyDefI.dump());
291 "Cannot lower EFLAGS copy unless it is defined in turn by a copy!");
292 }
293
294 auto Cleanup = make_scope_exit([&] {
295 // All uses of the EFLAGS copy are now rewritten, kill the copy into
296 // eflags and if dead the copy from.
297 CopyI->eraseFromParent();
298 if (MRI->use_empty(CopyDefI.getOperand(0).getReg()))
299 CopyDefI.eraseFromParent();
300 ++NumCopiesEliminated;
301 });
302
303 MachineOperand &DOp = CopyI->getOperand(0);
304 assert(DOp.isDef() && "Expected register def!");
305 assert(DOp.getReg() == X86::EFLAGS && "Unexpected copy def register!");
306 if (DOp.isDead())
307 continue;
308
309 MachineBasicBlock *TestMBB = CopyDefI.getParent();
310 auto TestPos = CopyDefI.getIterator();
311 DebugLoc TestLoc = CopyDefI.getDebugLoc();
312
313 LLVM_DEBUG(dbgs() << "Rewriting copy: "; CopyI->dump());
314
315 // Walk up across live-in EFLAGS to find where they were actually def'ed.
316 //
317 // This copy's def may just be part of a region of blocks covered by
318 // a single def of EFLAGS and we want to find the top of that region where
319 // possible.
320 //
321 // This is essentially a search for a *candidate* reaching definition
322 // location. We don't need to ever find the actual reaching definition here,
323 // but we want to walk up the dominator tree to find the highest point which
324 // would be viable for such a definition.
325 auto HasEFLAGSClobber = [&](MachineBasicBlock::iterator Begin,
327 // Scan backwards as we expect these to be relatively short and often find
328 // a clobber near the end.
329 return llvm::any_of(
331 // Flag any instruction (other than the copy we are
332 // currently rewriting) that defs EFLAGS.
333 return &MI != CopyI &&
334 MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr);
335 });
336 };
337 auto HasEFLAGSClobberPath = [&](MachineBasicBlock *BeginMBB,
338 MachineBasicBlock *EndMBB) {
339 assert(MDT->dominates(BeginMBB, EndMBB) &&
340 "Only support paths down the dominator tree!");
343 // We terminate at the beginning. No need to scan it.
344 Visited.insert(BeginMBB);
345 Worklist.push_back(EndMBB);
346 do {
347 auto *MBB = Worklist.pop_back_val();
348 for (auto *PredMBB : MBB->predecessors()) {
349 if (!Visited.insert(PredMBB).second)
350 continue;
351 if (HasEFLAGSClobber(PredMBB->begin(), PredMBB->end()))
352 return true;
353 // Enqueue this block to walk its predecessors.
354 Worklist.push_back(PredMBB);
355 }
356 } while (!Worklist.empty());
357 // No clobber found along a path from the begin to end.
358 return false;
359 };
360 while (TestMBB->isLiveIn(X86::EFLAGS) && !TestMBB->pred_empty() &&
361 !HasEFLAGSClobber(TestMBB->begin(), TestPos)) {
362 // Find the nearest common dominator of the predecessors, as
363 // that will be the best candidate to hoist into.
364 MachineBasicBlock *HoistMBB =
365 std::accumulate(std::next(TestMBB->pred_begin()), TestMBB->pred_end(),
366 *TestMBB->pred_begin(),
367 [&](MachineBasicBlock *LHS, MachineBasicBlock *RHS) {
368 return MDT->findNearestCommonDominator(LHS, RHS);
369 });
370
371 // Now we need to scan all predecessors that may be reached along paths to
372 // the hoist block. A clobber anywhere in any of these blocks the hoist.
373 // Note that this even handles loops because we require *no* clobbers.
374 if (HasEFLAGSClobberPath(HoistMBB, TestMBB))
375 break;
376
377 // We also need the terminators to not sneakily clobber flags.
378 if (HasEFLAGSClobber(HoistMBB->getFirstTerminator()->getIterator(),
379 HoistMBB->instr_end()))
380 break;
381
382 // We found a viable location, hoist our test position to it.
383 TestMBB = HoistMBB;
384 TestPos = TestMBB->getFirstTerminator()->getIterator();
385 // Clear the debug location as it would just be confusing after hoisting.
386 TestLoc = DebugLoc();
387 }
388 LLVM_DEBUG({
389 auto DefIt = llvm::find_if(
390 llvm::reverse(llvm::make_range(TestMBB->instr_begin(), TestPos)),
391 [&](MachineInstr &MI) {
392 return MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr);
393 });
394 if (DefIt.base() != TestMBB->instr_begin()) {
395 dbgs() << " Using EFLAGS defined by: ";
396 DefIt->dump();
397 } else {
398 dbgs() << " Using live-in flags for BB:\n";
399 TestMBB->dump();
400 }
401 });
402
403 // While rewriting uses, we buffer jumps and rewrite them in a second pass
404 // because doing so will perturb the CFG that we are walking to find the
405 // uses in the first place.
407
408 // Gather the condition flags that have already been preserved in
409 // registers. We do this from scratch each time as we expect there to be
410 // very few of them and we expect to not revisit the same copy definition
411 // many times. If either of those change sufficiently we could build a map
412 // of these up front instead.
413 CondRegArray CondRegs = collectCondsInRegs(*TestMBB, TestPos);
414
415 // Collect the basic blocks we need to scan. Typically this will just be
416 // a single basic block but we may have to scan multiple blocks if the
417 // EFLAGS copy lives into successors.
420 Blocks.push_back(&MBB);
421
422 do {
423 MachineBasicBlock &UseMBB = *Blocks.pop_back_val();
424
425 // Track when if/when we find a kill of the flags in this block.
426 bool FlagsKilled = false;
427
428 // In most cases, we walk from the beginning to the end of the block. But
429 // when the block is the same block as the copy is from, we will visit it
430 // twice. The first time we start from the copy and go to the end. The
431 // second time we start from the beginning and go to the copy. This lets
432 // us handle copies inside of cycles.
433 // FIXME: This loop is *super* confusing. This is at least in part
434 // a symptom of all of this routine needing to be refactored into
435 // documentable components. Once done, there may be a better way to write
436 // this loop.
437 for (auto MII = (&UseMBB == &MBB && !VisitedBlocks.count(&UseMBB))
438 ? std::next(CopyI->getIterator())
439 : UseMBB.instr_begin(),
440 MIE = UseMBB.instr_end();
441 MII != MIE;) {
442 MachineInstr &MI = *MII++;
443 // If we are in the original copy block and encounter either the copy
444 // def or the copy itself, break so that we don't re-process any part of
445 // the block or process the instructions in the range that was copied
446 // over.
447 if (&MI == CopyI || &MI == &CopyDefI) {
448 assert(&UseMBB == &MBB && VisitedBlocks.count(&MBB) &&
449 "Should only encounter these on the second pass over the "
450 "original block.");
451 break;
452 }
453
454 MachineOperand *FlagUse =
455 MI.findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr);
456 FlagsKilled = MI.modifiesRegister(X86::EFLAGS, TRI);
457
458 if (!FlagUse && FlagsKilled)
459 break;
460 else if (!FlagUse)
461 continue;
462
463 LLVM_DEBUG(dbgs() << " Rewriting use: "; MI.dump());
464
465 // Check the kill flag before we rewrite as that may change it.
466 if (FlagUse->isKill())
467 FlagsKilled = true;
468
469 // Once we encounter a branch, the rest of the instructions must also be
470 // branches. We can't rewrite in place here, so we handle them below.
471 //
472 // Note that we don't have to handle tail calls here, even conditional
473 // tail calls, as those are not introduced into the X86 MI until post-RA
474 // branch folding or black placement. As a consequence, we get to deal
475 // with the simpler formulation of conditional branches followed by tail
476 // calls.
478 auto JmpIt = MI.getIterator();
479 do {
480 JmpIs.push_back(&*JmpIt);
481 ++JmpIt;
482 } while (JmpIt != UseMBB.instr_end() &&
484 break;
485 }
486
487 // Otherwise we can just rewrite in-place.
488 unsigned Opc = MI.getOpcode();
489 if (Opc == TargetOpcode::COPY) {
490 // Just replace this copy with the original copy def.
491 MRI->replaceRegWith(MI.getOperand(0).getReg(),
492 CopyDefI.getOperand(0).getReg());
493 MI.eraseFromParent();
494 } else if (X86::isSETCC(Opc)) {
495 rewriteSetCC(*TestMBB, TestPos, TestLoc, MI, CondRegs);
496 } else if (isArithmeticOp(Opc)) {
497 rewriteArithmetic(*TestMBB, TestPos, TestLoc, MI, CondRegs);
498 } else {
499 rewriteMI(*TestMBB, TestPos, TestLoc, MI, CondRegs);
500 }
501
502 // If this was the last use of the flags, we're done.
503 if (FlagsKilled)
504 break;
505 }
506
507 // If the flags were killed, we're done with this block.
508 if (FlagsKilled)
509 continue;
510
511 // Otherwise we need to scan successors for ones where the flags live-in
512 // and queue those up for processing.
513 for (MachineBasicBlock *SuccMBB : UseMBB.successors())
514 if (SuccMBB->isLiveIn(X86::EFLAGS) &&
515 VisitedBlocks.insert(SuccMBB).second) {
516 // We currently don't do any PHI insertion and so we require that the
517 // test basic block dominates all of the use basic blocks. Further, we
518 // can't have a cycle from the test block back to itself as that would
519 // create a cycle requiring a PHI to break it.
520 //
521 // We could in theory do PHI insertion here if it becomes useful by
522 // just taking undef values in along every edge that we don't trace
523 // this EFLAGS copy along. This isn't as bad as fully general PHI
524 // insertion, but still seems like a great deal of complexity.
525 //
526 // Because it is theoretically possible that some earlier MI pass or
527 // other lowering transformation could induce this to happen, we do
528 // a hard check even in non-debug builds here.
529 if (SuccMBB == TestMBB || !MDT->dominates(TestMBB, SuccMBB)) {
530 LLVM_DEBUG({
531 dbgs()
532 << "ERROR: Encountered use that is not dominated by our test "
533 "basic block! Rewriting this would require inserting PHI "
534 "nodes to track the flag state across the CFG.\n\nTest "
535 "block:\n";
536 TestMBB->dump();
537 dbgs() << "Use block:\n";
538 SuccMBB->dump();
539 });
541 "Cannot lower EFLAGS copy when original copy def "
542 "does not dominate all uses.");
543 }
544
545 Blocks.push_back(SuccMBB);
546
547 // After this, EFLAGS will be recreated before each use.
548 SuccMBB->removeLiveIn(X86::EFLAGS);
549 }
550 } while (!Blocks.empty());
551
552 // Now rewrite the jumps that use the flags. These we handle specially
553 // because if there are multiple jumps in a single basic block we'll have
554 // to do surgery on the CFG.
555 MachineBasicBlock *LastJmpMBB = nullptr;
556 for (MachineInstr *JmpI : JmpIs) {
557 // Past the first jump within a basic block we need to split the blocks
558 // apart.
559 if (JmpI->getParent() == LastJmpMBB)
560 splitBlock(*JmpI->getParent(), *JmpI, *TII);
561 else
562 LastJmpMBB = JmpI->getParent();
563
564 rewriteMI(*TestMBB, TestPos, TestLoc, *JmpI, CondRegs);
565 }
566
567 // FIXME: Mark the last use of EFLAGS before the copy's def as a kill if
568 // the copy's def operand is itself a kill.
569 }
570
571#ifndef NDEBUG
572 for (MachineBasicBlock &MBB : MF)
573 for (MachineInstr &MI : MBB)
574 if (MI.getOpcode() == TargetOpcode::COPY &&
575 (MI.getOperand(0).getReg() == X86::EFLAGS ||
576 MI.getOperand(1).getReg() == X86::EFLAGS)) {
577 LLVM_DEBUG(dbgs() << "ERROR: Found a COPY involving EFLAGS: ";
578 MI.dump());
579 llvm_unreachable("Unlowered EFLAGS copy!");
580 }
581#endif
582
583 return true;
584}
585
586/// Collect any conditions that have already been set in registers so that we
587/// can re-use them rather than adding duplicates.
588CondRegArray X86FlagsCopyLoweringPass::collectCondsInRegs(
590 CondRegArray CondRegs = {};
591
592 // Scan backwards across the range of instructions with live EFLAGS.
593 for (MachineInstr &MI :
596 if (Cond != X86::COND_INVALID && !MI.mayStore() &&
597 MI.getOperand(0).isReg() && MI.getOperand(0).getReg().isVirtual()) {
598 assert(MI.getOperand(0).isDef() &&
599 "A non-storing SETcc should always define a register!");
600 CondRegs[Cond] = MI.getOperand(0).getReg();
601 }
602
603 // Stop scanning when we see the first definition of the EFLAGS as prior to
604 // this we would potentially capture the wrong flag state.
605 if (MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr))
606 break;
607 }
608 return CondRegs;
609}
610
611Register X86FlagsCopyLoweringPass::promoteCondToReg(
613 const DebugLoc &TestLoc, X86::CondCode Cond) {
614 Register Reg = MRI->createVirtualRegister(PromoteRC);
615 auto SetI = BuildMI(TestMBB, TestPos, TestLoc, TII->get(X86::SETCCr), Reg)
616 .addImm(Cond);
617 (void)SetI;
618 LLVM_DEBUG(dbgs() << " save cond: "; SetI->dump());
619 ++NumSetCCsInserted;
620 return Reg;
621}
622
623std::pair<unsigned, bool> X86FlagsCopyLoweringPass::getCondOrInverseInReg(
625 const DebugLoc &TestLoc, X86::CondCode Cond, CondRegArray &CondRegs) {
626 unsigned &CondReg = CondRegs[Cond];
627 unsigned &InvCondReg = CondRegs[X86::GetOppositeBranchCondition(Cond)];
628 if (!CondReg && !InvCondReg)
629 CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond);
630
631 if (CondReg)
632 return {CondReg, false};
633 else
634 return {InvCondReg, true};
635}
636
637void X86FlagsCopyLoweringPass::insertTest(MachineBasicBlock &MBB,
639 const DebugLoc &Loc, unsigned Reg) {
640 auto TestI =
641 BuildMI(MBB, Pos, Loc, TII->get(X86::TEST8rr)).addReg(Reg).addReg(Reg);
642 (void)TestI;
643 LLVM_DEBUG(dbgs() << " test cond: "; TestI->dump());
644 ++NumTestsInserted;
645}
646
647void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &MBB,
649 const DebugLoc &Loc,
651 CondRegArray &CondRegs) {
653 // Note that we can't usefully rewrite this to the inverse without complex
654 // analysis of the users of the setCC. Largely we rely on duplicates which
655 // could have been avoided already being avoided here.
656 unsigned &CondReg = CondRegs[Cond];
657 if (!CondReg)
658 CondReg = promoteCondToReg(MBB, Pos, Loc, Cond);
659
660 // Rewriting a register def is trivial: we just replace the register and
661 // remove the setcc.
662 if (!MI.mayStore()) {
663 assert(MI.getOperand(0).isReg() &&
664 "Cannot have a non-register defined operand to SETcc!");
665 Register OldReg = MI.getOperand(0).getReg();
666 // Drop Kill flags on the old register before replacing. CondReg may have
667 // a longer live range.
668 MRI->clearKillFlags(OldReg);
669 MRI->replaceRegWith(OldReg, CondReg);
670 MI.eraseFromParent();
671 return;
672 }
673
674 // Otherwise, we need to emit a store.
675 auto MIB = BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
676 TII->get(X86::MOV8mr));
677 // Copy the address operands.
678 for (int i = 0; i < X86::AddrNumOperands; ++i)
679 MIB.add(MI.getOperand(i));
680
681 MIB.addReg(CondReg);
682 MIB.setMemRefs(MI.memoperands());
683 MI.eraseFromParent();
684}
685
686void X86FlagsCopyLoweringPass::rewriteArithmetic(
688 const DebugLoc &Loc, MachineInstr &MI, CondRegArray &CondRegs) {
689 // Arithmetic is either reading CF or OF.
690 X86::CondCode Cond = X86::COND_B; // CF == 1
691 // The addend to use to reset CF or OF when added to the flag value.
692 // Set up an addend that when one is added will need a carry due to not
693 // having a higher bit available.
694 int Addend = 255;
695
696 // Now get a register that contains the value of the flag input to the
697 // arithmetic. We require exactly this flag to simplify the arithmetic
698 // required to materialize it back into the flag.
699 unsigned &CondReg = CondRegs[Cond];
700 if (!CondReg)
701 CondReg = promoteCondToReg(MBB, Pos, Loc, Cond);
702
703 // Insert an instruction that will set the flag back to the desired value.
704 Register TmpReg = MRI->createVirtualRegister(PromoteRC);
705 auto AddI =
706 BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
707 TII->get(Subtarget->hasNDD() ? X86::ADD8ri_ND : X86::ADD8ri))
708 .addDef(TmpReg, RegState::Dead)
709 .addReg(CondReg)
710 .addImm(Addend);
711 (void)AddI;
712 LLVM_DEBUG(dbgs() << " add cond: "; AddI->dump());
713 ++NumAddsInserted;
714 MI.findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr)->setIsKill(true);
715}
716
718#define FROM_TO(A, B) \
719 case X86::CMOV##A##_Fp32: \
720 case X86::CMOV##A##_Fp64: \
721 case X86::CMOV##A##_Fp80: \
722 return X86::COND_##B;
723
724 switch (Opc) {
725 default:
726 return X86::COND_INVALID;
727 FROM_TO(B, B)
728 FROM_TO(E, E)
729 FROM_TO(P, P)
730 FROM_TO(BE, BE)
731 FROM_TO(NB, AE)
732 FROM_TO(NE, NE)
733 FROM_TO(NP, NP)
734 FROM_TO(NBE, A)
735 }
736#undef FROM_TO
737}
738
739static unsigned getOpcodeWithCC(unsigned Opc, X86::CondCode CC) {
740 assert((CC == X86::COND_E || CC == X86::COND_NE) && "Unexpected CC");
741#define CASE(A) \
742 case X86::CMOVB_##A: \
743 case X86::CMOVE_##A: \
744 case X86::CMOVP_##A: \
745 case X86::CMOVBE_##A: \
746 case X86::CMOVNB_##A: \
747 case X86::CMOVNE_##A: \
748 case X86::CMOVNP_##A: \
749 case X86::CMOVNBE_##A: \
750 return (CC == X86::COND_E) ? X86::CMOVE_##A : X86::CMOVNE_##A;
751 switch (Opc) {
752 default:
753 llvm_unreachable("Unexpected opcode");
754 CASE(Fp32)
755 CASE(Fp64)
756 CASE(Fp80)
757 }
758#undef CASE
759}
760
761void X86FlagsCopyLoweringPass::rewriteMI(MachineBasicBlock &MBB,
763 const DebugLoc &Loc, MachineInstr &MI,
764 CondRegArray &CondRegs) {
765 // First get the register containing this specific condition.
766 bool IsImplicitCC = false;
768 if (CC == X86::COND_INVALID) {
769 CC = getImplicitCondFromMI(MI.getOpcode());
770 IsImplicitCC = true;
771 }
772 assert(CC != X86::COND_INVALID && "Unknown EFLAG user!");
773 unsigned CondReg;
774 bool Inverted;
775 std::tie(CondReg, Inverted) =
776 getCondOrInverseInReg(MBB, Pos, Loc, CC, CondRegs);
777
778 // Insert a direct test of the saved register.
779 insertTest(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(), CondReg);
780
781 // Rewrite the instruction to use the !ZF flag from the test, and then kill
782 // its use of the flags afterward.
783 X86::CondCode NewCC = Inverted ? X86::COND_E : X86::COND_NE;
784 if (IsImplicitCC)
785 MI.setDesc(TII->get(getOpcodeWithCC(MI.getOpcode(), NewCC)));
786 else
787 MI.getOperand(MI.getDesc().getNumOperands() - 1).setImm(NewCC);
788
789 MI.findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr)->setIsKill(true);
790 LLVM_DEBUG(dbgs() << " fixed instruction: "; MI.dump());
791}
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock & MBB
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
bool End
Definition: ELF_riscv.cpp:480
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
static const HTTPClientCleanup Cleanup
Definition: HTTPClient.cpp:42
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned const TargetRegisterInfo * TRI
#define P(N)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI Lower i1 Copies
This file contains some templates that are useful if you are working with the STL at all.
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static X86::CondCode getImplicitCondFromMI(unsigned Opc)
#define FROM_TO(A, B)
static MachineBasicBlock & splitBlock(MachineBasicBlock &MBB, MachineInstr &SplitI, const X86InstrInfo &TII)
static unsigned getOpcodeWithCC(unsigned Opc, X86::CondCode CC)
#define CASE(A)
static bool isArithmeticOp(unsigned Opc)
#define DEBUG_TYPE
X86 EFLAGS copy lowering
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
A debug info location.
Definition: DebugLoc.h:33
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
void normalizeSuccProbs()
Normalize probabilities of all successors so that the sum of them becomes one.
instr_iterator instr_begin()
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
MachineBasicBlock * getFallThrough(bool JumpToFallThrough=true)
Return the fallthrough block if the block can implicitly transfer control to the block after it by fa...
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void copySuccessor(const MachineBasicBlock *Orig, succ_iterator I)
Copy a successor (and any probability info) from original block to this block's.
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:558
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:341
bool isBranch(QueryType Type=AnyInBundle) const
Returns true if this is a conditional, unconditional, or indirect branch.
Definition: MachineInstr.h:971
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:487
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:568
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setMBB(MachineBasicBlock *MBB)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)
bool isMBB() const
isMBB - Tests if this is a MO_MachineBasicBlock operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
void dump() const
Definition: Pass.cpp:136
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
bool empty() const
Definition: SmallVector.h:94
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
self_iterator getIterator()
Definition: ilist_node.h:109
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ Dead
Unused definition.
Reg
All possible values of the reg field in the ModR/M byte.
CondCode getCondFromBranch(const MachineInstr &MI)
CondCode getCondFromMI(const MachineInstr &MI)
Return the condition code of the instruction.
CondCode GetOppositeBranchCondition(CondCode CC)
GetOppositeBranchCondition - Return the inverse of the specified cond, e.g.
@ AddrNumOperands
Definition: X86BaseInfo.h:36
CondCode getCondFromSETCC(const MachineInstr &MI)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
Definition: ScopeExit.h:59
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
FunctionPass * createX86FlagsCopyLoweringPass()
Return a pass that lowers EFLAGS copy pseudo instructions.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1824
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749