LLVM 20.0.0git
X86FlagsCopyLowering.cpp
Go to the documentation of this file.
1//====- X86FlagsCopyLowering.cpp - Lowers COPY nodes of EFLAGS ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// Lowers COPY nodes of EFLAGS by directly extracting and preserving individual
11/// flag bits.
12///
13/// We have to do this by carefully analyzing and rewriting the usage of the
14/// copied EFLAGS register because there is no general way to rematerialize the
15/// entire EFLAGS register safely and efficiently. Using `popf` both forces
16/// dynamic stack adjustment and can create correctness issues due to IF, TF,
17/// and other non-status flags being overwritten. Using sequences involving
18/// SAHF don't work on all x86 processors and are often quite slow compared to
19/// directly testing a single status preserved in its own GPR.
20///
21//===----------------------------------------------------------------------===//
22
23#include "X86.h"
24#include "X86InstrBuilder.h"
25#include "X86InstrInfo.h"
26#include "X86Subtarget.h"
29#include "llvm/ADT/STLExtras.h"
30#include "llvm/ADT/ScopeExit.h"
33#include "llvm/ADT/Statistic.h"
49#include "llvm/IR/DebugLoc.h"
50#include "llvm/MC/MCSchedule.h"
51#include "llvm/Pass.h"
53#include "llvm/Support/Debug.h"
55#include <algorithm>
56#include <cassert>
57#include <iterator>
58#include <utility>
59
60using namespace llvm;
61
62#define PASS_KEY "x86-flags-copy-lowering"
63#define DEBUG_TYPE PASS_KEY
64
65STATISTIC(NumCopiesEliminated, "Number of copies of EFLAGS eliminated");
66STATISTIC(NumSetCCsInserted, "Number of setCC instructions inserted");
67STATISTIC(NumTestsInserted, "Number of test instructions inserted");
68STATISTIC(NumAddsInserted, "Number of adds instructions inserted");
69STATISTIC(NumNFsConvertedTo, "Number of NF instructions converted to");
70
71namespace {
72
73// Convenient array type for storing registers associated with each condition.
74using CondRegArray = std::array<unsigned, X86::LAST_VALID_COND + 1>;
75
76class X86FlagsCopyLoweringPass : public MachineFunctionPass {
77public:
78 X86FlagsCopyLoweringPass() : MachineFunctionPass(ID) {}
79
80 StringRef getPassName() const override { return "X86 EFLAGS copy lowering"; }
81 bool runOnMachineFunction(MachineFunction &MF) override;
82 void getAnalysisUsage(AnalysisUsage &AU) const override;
83
84 /// Pass identification, replacement for typeid.
85 static char ID;
86
87private:
88 MachineRegisterInfo *MRI = nullptr;
89 const X86Subtarget *Subtarget = nullptr;
90 const X86InstrInfo *TII = nullptr;
91 const TargetRegisterInfo *TRI = nullptr;
92 const TargetRegisterClass *PromoteRC = nullptr;
93 MachineDominatorTree *MDT = nullptr;
94
95 CondRegArray collectCondsInRegs(MachineBasicBlock &MBB,
97
98 Register promoteCondToReg(MachineBasicBlock &MBB,
100 const DebugLoc &TestLoc, X86::CondCode Cond);
101 std::pair<unsigned, bool> getCondOrInverseInReg(
103 const DebugLoc &TestLoc, X86::CondCode Cond, CondRegArray &CondRegs);
105 const DebugLoc &Loc, unsigned Reg);
106
107 void rewriteSetCC(MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos,
108 const DebugLoc &Loc, MachineInstr &MI,
109 CondRegArray &CondRegs);
110 void rewriteArithmetic(MachineBasicBlock &MBB,
112 MachineInstr &MI, CondRegArray &CondRegs);
114 const DebugLoc &Loc, MachineInstr &MI, CondRegArray &CondRegs);
115};
116
117} // end anonymous namespace
118
119INITIALIZE_PASS_BEGIN(X86FlagsCopyLoweringPass, DEBUG_TYPE,
120 "X86 EFLAGS copy lowering", false, false)
121INITIALIZE_PASS_END(X86FlagsCopyLoweringPass, DEBUG_TYPE,
122 "X86 EFLAGS copy lowering", false, false)
123
125 return new X86FlagsCopyLoweringPass();
126}
127
128char X86FlagsCopyLoweringPass::ID = 0;
129
130void X86FlagsCopyLoweringPass::getAnalysisUsage(AnalysisUsage &AU) const {
133}
134
135static bool isArithmeticOp(unsigned Opc) {
136 return X86::isADC(Opc) || X86::isSBB(Opc) || X86::isRCL(Opc) ||
137 X86::isRCR(Opc) || (Opc == X86::SETB_C32r || Opc == X86::SETB_C64r);
138}
139
141 MachineInstr &SplitI,
142 const X86InstrInfo &TII) {
144
145 assert(SplitI.getParent() == &MBB &&
146 "Split instruction must be in the split block!");
147 assert(SplitI.isBranch() &&
148 "Only designed to split a tail of branch instructions!");
150 "Must split on an actual jCC instruction!");
151
152 // Dig out the previous instruction to the split point.
153 MachineInstr &PrevI = *std::prev(SplitI.getIterator());
154 assert(PrevI.isBranch() && "Must split after a branch!");
156 "Must split after an actual jCC instruction!");
157 assert(!std::prev(PrevI.getIterator())->isTerminator() &&
158 "Must only have this one terminator prior to the split!");
159
160 // Grab the one successor edge that will stay in `MBB`.
161 MachineBasicBlock &UnsplitSucc = *PrevI.getOperand(0).getMBB();
162
163 // Analyze the original block to see if we are actually splitting an edge
164 // into two edges. This can happen when we have multiple conditional jumps to
165 // the same successor.
166 bool IsEdgeSplit =
167 std::any_of(SplitI.getIterator(), MBB.instr_end(),
168 [&](MachineInstr &MI) {
169 assert(MI.isTerminator() &&
170 "Should only have spliced terminators!");
171 return llvm::any_of(
172 MI.operands(), [&](MachineOperand &MOp) {
173 return MOp.isMBB() && MOp.getMBB() == &UnsplitSucc;
174 });
175 }) ||
176 MBB.getFallThrough() == &UnsplitSucc;
177
178 MachineBasicBlock &NewMBB = *MF.CreateMachineBasicBlock();
179
180 // Insert the new block immediately after the current one. Any existing
181 // fallthrough will be sunk into this new block anyways.
182 MF.insert(std::next(MachineFunction::iterator(&MBB)), &NewMBB);
183
184 // Splice the tail of instructions into the new block.
185 NewMBB.splice(NewMBB.end(), &MBB, SplitI.getIterator(), MBB.end());
186
187 // Copy the necessary succesors (and their probability info) into the new
188 // block.
189 for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI)
190 if (IsEdgeSplit || *SI != &UnsplitSucc)
191 NewMBB.copySuccessor(&MBB, SI);
192 // Normalize the probabilities if we didn't end up splitting the edge.
193 if (!IsEdgeSplit)
194 NewMBB.normalizeSuccProbs();
195
196 // Now replace all of the moved successors in the original block with the new
197 // block. This will merge their probabilities.
198 for (MachineBasicBlock *Succ : NewMBB.successors())
199 if (Succ != &UnsplitSucc)
200 MBB.replaceSuccessor(Succ, &NewMBB);
201
202 // We should always end up replacing at least one successor.
203 assert(MBB.isSuccessor(&NewMBB) &&
204 "Failed to make the new block a successor!");
205
206 // Now update all the PHIs.
207 for (MachineBasicBlock *Succ : NewMBB.successors()) {
208 for (MachineInstr &MI : *Succ) {
209 if (!MI.isPHI())
210 break;
211
212 for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps;
213 OpIdx += 2) {
214 MachineOperand &OpV = MI.getOperand(OpIdx);
215 MachineOperand &OpMBB = MI.getOperand(OpIdx + 1);
216 assert(OpMBB.isMBB() && "Block operand to a PHI is not a block!");
217 if (OpMBB.getMBB() != &MBB)
218 continue;
219
220 // Replace the operand for unsplit successors
221 if (!IsEdgeSplit || Succ != &UnsplitSucc) {
222 OpMBB.setMBB(&NewMBB);
223
224 // We have to continue scanning as there may be multiple entries in
225 // the PHI.
226 continue;
227 }
228
229 // When we have split the edge append a new successor.
230 MI.addOperand(MF, OpV);
231 MI.addOperand(MF, MachineOperand::CreateMBB(&NewMBB));
232 break;
233 }
234 }
235 }
236
237 return NewMBB;
238}
239
241
243 const MachineOperand *FlagDef =
244 MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr);
245 if (!FlagDef)
246 return NoClobber;
247 if (FlagDef->isDead() && X86::getNFVariant(MI.getOpcode()))
248 return EvitableClobber;
249
250 return InevitableClobber;
251}
252
253bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
254 LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
255 << " **********\n");
256
257 Subtarget = &MF.getSubtarget<X86Subtarget>();
258 MRI = &MF.getRegInfo();
259 TII = Subtarget->getInstrInfo();
260 TRI = Subtarget->getRegisterInfo();
261 PromoteRC = &X86::GR8RegClass;
262
263 if (MF.empty())
264 // Nothing to do for a degenerate empty function...
265 return false;
266
267 if (none_of(MRI->def_instructions(X86::EFLAGS), [](const MachineInstr &MI) {
268 return MI.getOpcode() == TargetOpcode::COPY;
269 }))
270 return false;
271
272 // We change the code, so we don't preserve the dominator tree anyway. If we
273 // got a valid MDT from the pass manager, use that, otherwise construct one
274 // now. This is an optimization that avoids unnecessary MDT construction for
275 // functions that have no flag copies.
276
277 auto MDTWrapper = getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>();
278 std::unique_ptr<MachineDominatorTree> OwnedMDT;
279 if (MDTWrapper) {
280 MDT = &MDTWrapper->getDomTree();
281 } else {
282 OwnedMDT = std::make_unique<MachineDominatorTree>();
283 OwnedMDT->getBase().recalculate(MF);
284 MDT = OwnedMDT.get();
285 }
286
287 // Collect the copies in RPO so that when there are chains where a copy is in
288 // turn copied again we visit the first one first. This ensures we can find
289 // viable locations for testing the original EFLAGS that dominate all the
290 // uses across complex CFGs.
293 for (MachineBasicBlock *MBB : RPOT)
294 for (MachineInstr &MI : *MBB)
295 if (MI.getOpcode() == TargetOpcode::COPY &&
296 MI.getOperand(0).getReg() == X86::EFLAGS)
297 Copies.insert(&MI);
298
299 // Try to elminate the copys by transform the instructions between copy and
300 // copydef to the NF (no flags update) variants, e.g.
301 //
302 // %1:gr64 = COPY $eflags
303 // OP1 implicit-def dead $eflags
304 // $eflags = COPY %1
305 // OP2 cc, implicit $eflags
306 //
307 // ->
308 //
309 // OP1_NF
310 // OP2 implicit $eflags
311 if (Subtarget->hasNF()) {
313 // CopyIIt may be invalidated by removing copies.
314 auto CopyIIt = Copies.begin(), CopyIEnd = Copies.end();
315 while (CopyIIt != CopyIEnd) {
316 auto NCopyIIt = std::next(CopyIIt);
317 SmallSetVector<MachineInstr *, 4> EvitableClobbers;
318 MachineInstr *CopyI = *CopyIIt;
319 MachineOperand &VOp = CopyI->getOperand(1);
320 MachineInstr *CopyDefI = MRI->getVRegDef(VOp.getReg());
321 MachineBasicBlock *CopyIMBB = CopyI->getParent();
322 MachineBasicBlock *CopyDefIMBB = CopyDefI->getParent();
323 // Walk all basic blocks reachable in depth-first iteration on the inverse
324 // CFG from CopyIMBB to CopyDefIMBB. These blocks are all the blocks that
325 // may be executed between the execution of CopyDefIMBB and CopyIMBB. On
326 // all execution paths, instructions from CopyDefI to CopyI (exclusive)
327 // has to be NF-convertible if it clobbers flags.
328 for (auto BI = idf_begin(CopyIMBB), BE = idf_end(CopyDefIMBB); BI != BE;
329 ++BI) {
330 MachineBasicBlock *MBB = *BI;
331 for (auto I = (MBB != CopyDefIMBB)
332 ? MBB->begin()
333 : std::next(MachineBasicBlock::iterator(CopyDefI)),
334 E = (MBB != CopyIMBB) ? MBB->end()
336 I != E; ++I) {
337 MachineInstr &MI = *I;
338 EFLAGSClobber ClobberType = getClobberType(MI);
339 if (ClobberType == NoClobber)
340 continue;
341
342 if (ClobberType == InevitableClobber)
343 goto ProcessNextCopyI;
344
345 assert(ClobberType == EvitableClobber && "unexpected workflow");
346 EvitableClobbers.insert(&MI);
347 }
348 }
349 // Covert evitable clobbers into NF variants and remove the copyies.
350 RemovedCopies.insert(CopyI);
351 CopyI->eraseFromParent();
352 if (MRI->use_nodbg_empty(CopyDefI->getOperand(0).getReg())) {
353 RemovedCopies.insert(CopyDefI);
354 CopyDefI->eraseFromParent();
355 }
356 ++NumCopiesEliminated;
357 for (auto *Clobber : EvitableClobbers) {
358 unsigned NewOpc = X86::getNFVariant(Clobber->getOpcode());
359 assert(NewOpc && "evitable clobber must have a NF variant");
360 Clobber->setDesc(TII->get(NewOpc));
361 Clobber->removeOperand(
362 Clobber->findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr)
363 ->getOperandNo());
364 ++NumNFsConvertedTo;
365 }
366 // Update liveins for basic blocks in the path
367 for (auto BI = idf_begin(CopyIMBB), BE = idf_end(CopyDefIMBB); BI != BE;
368 ++BI)
369 if (*BI != CopyDefIMBB)
370 BI->addLiveIn(X86::EFLAGS);
371 ProcessNextCopyI:
372 CopyIIt = NCopyIIt;
373 }
374 Copies.set_subtract(RemovedCopies);
375 }
376
377 // For the rest of copies that cannot be eliminated by NF transform, we use
378 // setcc to preserve the flags in GPR32 before OP1, and recheck its value
379 // before using the flags, e.g.
380 //
381 // %1:gr64 = COPY $eflags
382 // OP1 implicit-def dead $eflags
383 // $eflags = COPY %1
384 // OP2 cc, implicit $eflags
385 //
386 // ->
387 //
388 // %1:gr8 = SETCCr cc, implicit $eflags
389 // OP1 implicit-def dead $eflags
390 // TEST8rr %1, %1, implicit-def $eflags
391 // OP2 ne, implicit $eflags
392 for (MachineInstr *CopyI : Copies) {
393 MachineBasicBlock &MBB = *CopyI->getParent();
394
395 MachineOperand &VOp = CopyI->getOperand(1);
396 assert(VOp.isReg() &&
397 "The input to the copy for EFLAGS should always be a register!");
398 MachineInstr &CopyDefI = *MRI->getVRegDef(VOp.getReg());
399 if (CopyDefI.getOpcode() != TargetOpcode::COPY) {
400 // FIXME: The big likely candidate here are PHI nodes. We could in theory
401 // handle PHI nodes, but it gets really, really hard. Insanely hard. Hard
402 // enough that it is probably better to change every other part of LLVM
403 // to avoid creating them. The issue is that once we have PHIs we won't
404 // know which original EFLAGS value we need to capture with our setCCs
405 // below. The end result will be computing a complete set of setCCs that
406 // we *might* want, computing them in every place where we copy *out* of
407 // EFLAGS and then doing SSA formation on all of them to insert necessary
408 // PHI nodes and consume those here. Then hoping that somehow we DCE the
409 // unnecessary ones. This DCE seems very unlikely to be successful and so
410 // we will almost certainly end up with a glut of dead setCC
411 // instructions. Until we have a motivating test case and fail to avoid
412 // it by changing other parts of LLVM's lowering, we refuse to handle
413 // this complex case here.
415 dbgs() << "ERROR: Encountered unexpected def of an eflags copy: ";
416 CopyDefI.dump());
418 "Cannot lower EFLAGS copy unless it is defined in turn by a copy!");
419 }
420
421 auto Cleanup = make_scope_exit([&] {
422 // All uses of the EFLAGS copy are now rewritten, kill the copy into
423 // eflags and if dead the copy from.
424 CopyI->eraseFromParent();
425 if (MRI->use_empty(CopyDefI.getOperand(0).getReg()))
426 CopyDefI.eraseFromParent();
427 ++NumCopiesEliminated;
428 });
429
430 MachineOperand &DOp = CopyI->getOperand(0);
431 assert(DOp.isDef() && "Expected register def!");
432 assert(DOp.getReg() == X86::EFLAGS && "Unexpected copy def register!");
433 if (DOp.isDead())
434 continue;
435
436 MachineBasicBlock *TestMBB = CopyDefI.getParent();
437 auto TestPos = CopyDefI.getIterator();
438 DebugLoc TestLoc = CopyDefI.getDebugLoc();
439
440 LLVM_DEBUG(dbgs() << "Rewriting copy: "; CopyI->dump());
441
442 // Walk up across live-in EFLAGS to find where they were actually def'ed.
443 //
444 // This copy's def may just be part of a region of blocks covered by
445 // a single def of EFLAGS and we want to find the top of that region where
446 // possible.
447 //
448 // This is essentially a search for a *candidate* reaching definition
449 // location. We don't need to ever find the actual reaching definition here,
450 // but we want to walk up the dominator tree to find the highest point which
451 // would be viable for such a definition.
452 auto HasEFLAGSClobber = [&](MachineBasicBlock::iterator Begin,
454 // Scan backwards as we expect these to be relatively short and often find
455 // a clobber near the end.
456 return llvm::any_of(
458 // Flag any instruction (other than the copy we are
459 // currently rewriting) that defs EFLAGS.
460 return &MI != CopyI &&
461 MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr);
462 });
463 };
464 auto HasEFLAGSClobberPath = [&](MachineBasicBlock *BeginMBB,
465 MachineBasicBlock *EndMBB) {
466 assert(MDT->dominates(BeginMBB, EndMBB) &&
467 "Only support paths down the dominator tree!");
470 // We terminate at the beginning. No need to scan it.
471 Visited.insert(BeginMBB);
472 Worklist.push_back(EndMBB);
473 do {
474 auto *MBB = Worklist.pop_back_val();
475 for (auto *PredMBB : MBB->predecessors()) {
476 if (!Visited.insert(PredMBB).second)
477 continue;
478 if (HasEFLAGSClobber(PredMBB->begin(), PredMBB->end()))
479 return true;
480 // Enqueue this block to walk its predecessors.
481 Worklist.push_back(PredMBB);
482 }
483 } while (!Worklist.empty());
484 // No clobber found along a path from the begin to end.
485 return false;
486 };
487 while (TestMBB->isLiveIn(X86::EFLAGS) && !TestMBB->pred_empty() &&
488 !HasEFLAGSClobber(TestMBB->begin(), TestPos)) {
489 // Find the nearest common dominator of the predecessors, as
490 // that will be the best candidate to hoist into.
491 MachineBasicBlock *HoistMBB =
492 std::accumulate(std::next(TestMBB->pred_begin()), TestMBB->pred_end(),
493 *TestMBB->pred_begin(),
494 [&](MachineBasicBlock *LHS, MachineBasicBlock *RHS) {
495 return MDT->findNearestCommonDominator(LHS, RHS);
496 });
497
498 // Now we need to scan all predecessors that may be reached along paths to
499 // the hoist block. A clobber anywhere in any of these blocks the hoist.
500 // Note that this even handles loops because we require *no* clobbers.
501 if (HasEFLAGSClobberPath(HoistMBB, TestMBB))
502 break;
503
504 // We also need the terminators to not sneakily clobber flags.
505 if (HasEFLAGSClobber(HoistMBB->getFirstTerminator()->getIterator(),
506 HoistMBB->instr_end()))
507 break;
508
509 // We found a viable location, hoist our test position to it.
510 TestMBB = HoistMBB;
511 TestPos = TestMBB->getFirstTerminator()->getIterator();
512 // Clear the debug location as it would just be confusing after hoisting.
513 TestLoc = DebugLoc();
514 }
515 LLVM_DEBUG({
516 auto DefIt = llvm::find_if(
517 llvm::reverse(llvm::make_range(TestMBB->instr_begin(), TestPos)),
518 [&](MachineInstr &MI) {
519 return MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr);
520 });
521 if (DefIt.base() != TestMBB->instr_begin()) {
522 dbgs() << " Using EFLAGS defined by: ";
523 DefIt->dump();
524 } else {
525 dbgs() << " Using live-in flags for BB:\n";
526 TestMBB->dump();
527 }
528 });
529
530 // While rewriting uses, we buffer jumps and rewrite them in a second pass
531 // because doing so will perturb the CFG that we are walking to find the
532 // uses in the first place.
534
535 // Gather the condition flags that have already been preserved in
536 // registers. We do this from scratch each time as we expect there to be
537 // very few of them and we expect to not revisit the same copy definition
538 // many times. If either of those change sufficiently we could build a map
539 // of these up front instead.
540 CondRegArray CondRegs = collectCondsInRegs(*TestMBB, TestPos);
541
542 // Collect the basic blocks we need to scan. Typically this will just be
543 // a single basic block but we may have to scan multiple blocks if the
544 // EFLAGS copy lives into successors.
547 Blocks.push_back(&MBB);
548
549 do {
550 MachineBasicBlock &UseMBB = *Blocks.pop_back_val();
551
552 // Track when if/when we find a kill of the flags in this block.
553 bool FlagsKilled = false;
554
555 // In most cases, we walk from the beginning to the end of the block. But
556 // when the block is the same block as the copy is from, we will visit it
557 // twice. The first time we start from the copy and go to the end. The
558 // second time we start from the beginning and go to the copy. This lets
559 // us handle copies inside of cycles.
560 // FIXME: This loop is *super* confusing. This is at least in part
561 // a symptom of all of this routine needing to be refactored into
562 // documentable components. Once done, there may be a better way to write
563 // this loop.
564 for (auto MII = (&UseMBB == &MBB && !VisitedBlocks.count(&UseMBB))
565 ? std::next(CopyI->getIterator())
566 : UseMBB.instr_begin(),
567 MIE = UseMBB.instr_end();
568 MII != MIE;) {
569 MachineInstr &MI = *MII++;
570 // If we are in the original copy block and encounter either the copy
571 // def or the copy itself, break so that we don't re-process any part of
572 // the block or process the instructions in the range that was copied
573 // over.
574 if (&MI == CopyI || &MI == &CopyDefI) {
575 assert(&UseMBB == &MBB && VisitedBlocks.count(&MBB) &&
576 "Should only encounter these on the second pass over the "
577 "original block.");
578 break;
579 }
580
581 MachineOperand *FlagUse =
582 MI.findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr);
583 FlagsKilled = MI.modifiesRegister(X86::EFLAGS, TRI);
584
585 if (!FlagUse && FlagsKilled)
586 break;
587 else if (!FlagUse)
588 continue;
589
590 LLVM_DEBUG(dbgs() << " Rewriting use: "; MI.dump());
591
592 // Check the kill flag before we rewrite as that may change it.
593 if (FlagUse->isKill())
594 FlagsKilled = true;
595
596 // Once we encounter a branch, the rest of the instructions must also be
597 // branches. We can't rewrite in place here, so we handle them below.
598 //
599 // Note that we don't have to handle tail calls here, even conditional
600 // tail calls, as those are not introduced into the X86 MI until post-RA
601 // branch folding or black placement. As a consequence, we get to deal
602 // with the simpler formulation of conditional branches followed by tail
603 // calls.
605 auto JmpIt = MI.getIterator();
606 do {
607 JmpIs.push_back(&*JmpIt);
608 ++JmpIt;
609 } while (JmpIt != UseMBB.instr_end() &&
611 break;
612 }
613
614 // Otherwise we can just rewrite in-place.
615 unsigned Opc = MI.getOpcode();
616 if (Opc == TargetOpcode::COPY) {
617 // Just replace this copy with the original copy def.
618 MRI->replaceRegWith(MI.getOperand(0).getReg(),
619 CopyDefI.getOperand(0).getReg());
620 MI.eraseFromParent();
621 } else if (X86::isSETCC(Opc)) {
622 rewriteSetCC(*TestMBB, TestPos, TestLoc, MI, CondRegs);
623 } else if (isArithmeticOp(Opc)) {
624 rewriteArithmetic(*TestMBB, TestPos, TestLoc, MI, CondRegs);
625 } else {
626 rewriteMI(*TestMBB, TestPos, TestLoc, MI, CondRegs);
627 }
628
629 // If this was the last use of the flags, we're done.
630 if (FlagsKilled)
631 break;
632 }
633
634 // If the flags were killed, we're done with this block.
635 if (FlagsKilled)
636 continue;
637
638 // Otherwise we need to scan successors for ones where the flags live-in
639 // and queue those up for processing.
640 for (MachineBasicBlock *SuccMBB : UseMBB.successors())
641 if (SuccMBB->isLiveIn(X86::EFLAGS) &&
642 VisitedBlocks.insert(SuccMBB).second) {
643 // We currently don't do any PHI insertion and so we require that the
644 // test basic block dominates all of the use basic blocks. Further, we
645 // can't have a cycle from the test block back to itself as that would
646 // create a cycle requiring a PHI to break it.
647 //
648 // We could in theory do PHI insertion here if it becomes useful by
649 // just taking undef values in along every edge that we don't trace
650 // this EFLAGS copy along. This isn't as bad as fully general PHI
651 // insertion, but still seems like a great deal of complexity.
652 //
653 // Because it is theoretically possible that some earlier MI pass or
654 // other lowering transformation could induce this to happen, we do
655 // a hard check even in non-debug builds here.
656 if (SuccMBB == TestMBB || !MDT->dominates(TestMBB, SuccMBB)) {
657 LLVM_DEBUG({
658 dbgs()
659 << "ERROR: Encountered use that is not dominated by our test "
660 "basic block! Rewriting this would require inserting PHI "
661 "nodes to track the flag state across the CFG.\n\nTest "
662 "block:\n";
663 TestMBB->dump();
664 dbgs() << "Use block:\n";
665 SuccMBB->dump();
666 });
668 "Cannot lower EFLAGS copy when original copy def "
669 "does not dominate all uses.");
670 }
671
672 Blocks.push_back(SuccMBB);
673
674 // After this, EFLAGS will be recreated before each use.
675 SuccMBB->removeLiveIn(X86::EFLAGS);
676 }
677 } while (!Blocks.empty());
678
679 // Now rewrite the jumps that use the flags. These we handle specially
680 // because if there are multiple jumps in a single basic block we'll have
681 // to do surgery on the CFG.
682 MachineBasicBlock *LastJmpMBB = nullptr;
683 for (MachineInstr *JmpI : JmpIs) {
684 // Past the first jump within a basic block we need to split the blocks
685 // apart.
686 if (JmpI->getParent() == LastJmpMBB)
687 splitBlock(*JmpI->getParent(), *JmpI, *TII);
688 else
689 LastJmpMBB = JmpI->getParent();
690
691 rewriteMI(*TestMBB, TestPos, TestLoc, *JmpI, CondRegs);
692 }
693
694 // FIXME: Mark the last use of EFLAGS before the copy's def as a kill if
695 // the copy's def operand is itself a kill.
696 }
697
698#ifndef NDEBUG
699 for (MachineBasicBlock &MBB : MF)
700 for (MachineInstr &MI : MBB)
701 if (MI.getOpcode() == TargetOpcode::COPY &&
702 (MI.getOperand(0).getReg() == X86::EFLAGS ||
703 MI.getOperand(1).getReg() == X86::EFLAGS)) {
704 LLVM_DEBUG(dbgs() << "ERROR: Found a COPY involving EFLAGS: ";
705 MI.dump());
706 llvm_unreachable("Unlowered EFLAGS copy!");
707 }
708#endif
709
710 return true;
711}
712
713/// Collect any conditions that have already been set in registers so that we
714/// can re-use them rather than adding duplicates.
715CondRegArray X86FlagsCopyLoweringPass::collectCondsInRegs(
717 CondRegArray CondRegs = {};
718
719 // Scan backwards across the range of instructions with live EFLAGS.
720 for (MachineInstr &MI :
723 if (Cond != X86::COND_INVALID && !MI.mayStore() &&
724 MI.getOperand(0).isReg() && MI.getOperand(0).getReg().isVirtual()) {
725 assert(MI.getOperand(0).isDef() &&
726 "A non-storing SETcc should always define a register!");
727 CondRegs[Cond] = MI.getOperand(0).getReg();
728 }
729
730 // Stop scanning when we see the first definition of the EFLAGS as prior to
731 // this we would potentially capture the wrong flag state.
732 if (MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr))
733 break;
734 }
735 return CondRegs;
736}
737
738Register X86FlagsCopyLoweringPass::promoteCondToReg(
740 const DebugLoc &TestLoc, X86::CondCode Cond) {
741 Register Reg = MRI->createVirtualRegister(PromoteRC);
742 auto SetI = BuildMI(TestMBB, TestPos, TestLoc, TII->get(X86::SETCCr), Reg)
743 .addImm(Cond);
744 (void)SetI;
745 LLVM_DEBUG(dbgs() << " save cond: "; SetI->dump());
746 ++NumSetCCsInserted;
747 return Reg;
748}
749
750std::pair<unsigned, bool> X86FlagsCopyLoweringPass::getCondOrInverseInReg(
752 const DebugLoc &TestLoc, X86::CondCode Cond, CondRegArray &CondRegs) {
753 unsigned &CondReg = CondRegs[Cond];
754 unsigned &InvCondReg = CondRegs[X86::GetOppositeBranchCondition(Cond)];
755 if (!CondReg && !InvCondReg)
756 CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond);
757
758 if (CondReg)
759 return {CondReg, false};
760 else
761 return {InvCondReg, true};
762}
763
764void X86FlagsCopyLoweringPass::insertTest(MachineBasicBlock &MBB,
766 const DebugLoc &Loc, unsigned Reg) {
767 auto TestI =
768 BuildMI(MBB, Pos, Loc, TII->get(X86::TEST8rr)).addReg(Reg).addReg(Reg);
769 (void)TestI;
770 LLVM_DEBUG(dbgs() << " test cond: "; TestI->dump());
771 ++NumTestsInserted;
772}
773
774void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &MBB,
776 const DebugLoc &Loc,
778 CondRegArray &CondRegs) {
780 // Note that we can't usefully rewrite this to the inverse without complex
781 // analysis of the users of the setCC. Largely we rely on duplicates which
782 // could have been avoided already being avoided here.
783 unsigned &CondReg = CondRegs[Cond];
784 if (!CondReg)
785 CondReg = promoteCondToReg(MBB, Pos, Loc, Cond);
786
787 // Rewriting a register def is trivial: we just replace the register and
788 // remove the setcc.
789 if (!MI.mayStore()) {
790 assert(MI.getOperand(0).isReg() &&
791 "Cannot have a non-register defined operand to SETcc!");
792 Register OldReg = MI.getOperand(0).getReg();
793 // Drop Kill flags on the old register before replacing. CondReg may have
794 // a longer live range.
795 MRI->clearKillFlags(OldReg);
796 MRI->replaceRegWith(OldReg, CondReg);
797 MI.eraseFromParent();
798 return;
799 }
800
801 // Otherwise, we need to emit a store.
802 auto MIB = BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
803 TII->get(X86::MOV8mr));
804 // Copy the address operands.
805 for (int i = 0; i < X86::AddrNumOperands; ++i)
806 MIB.add(MI.getOperand(i));
807
808 MIB.addReg(CondReg);
809 MIB.setMemRefs(MI.memoperands());
810 MI.eraseFromParent();
811}
812
813void X86FlagsCopyLoweringPass::rewriteArithmetic(
815 const DebugLoc &Loc, MachineInstr &MI, CondRegArray &CondRegs) {
816 // Arithmetic is either reading CF or OF.
817 X86::CondCode Cond = X86::COND_B; // CF == 1
818 // The addend to use to reset CF or OF when added to the flag value.
819 // Set up an addend that when one is added will need a carry due to not
820 // having a higher bit available.
821 int Addend = 255;
822
823 // Now get a register that contains the value of the flag input to the
824 // arithmetic. We require exactly this flag to simplify the arithmetic
825 // required to materialize it back into the flag.
826 unsigned &CondReg = CondRegs[Cond];
827 if (!CondReg)
828 CondReg = promoteCondToReg(MBB, Pos, Loc, Cond);
829
830 // Insert an instruction that will set the flag back to the desired value.
831 Register TmpReg = MRI->createVirtualRegister(PromoteRC);
832 auto AddI =
833 BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
834 TII->get(Subtarget->hasNDD() ? X86::ADD8ri_ND : X86::ADD8ri))
835 .addDef(TmpReg, RegState::Dead)
836 .addReg(CondReg)
837 .addImm(Addend);
838 (void)AddI;
839 LLVM_DEBUG(dbgs() << " add cond: "; AddI->dump());
840 ++NumAddsInserted;
841 MI.findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr)->setIsKill(true);
842}
843
845#define FROM_TO(A, B) \
846 case X86::CMOV##A##_Fp32: \
847 case X86::CMOV##A##_Fp64: \
848 case X86::CMOV##A##_Fp80: \
849 return X86::COND_##B;
850
851 switch (Opc) {
852 default:
853 return X86::COND_INVALID;
854 FROM_TO(B, B)
855 FROM_TO(E, E)
856 FROM_TO(P, P)
857 FROM_TO(BE, BE)
858 FROM_TO(NB, AE)
859 FROM_TO(NE, NE)
860 FROM_TO(NP, NP)
861 FROM_TO(NBE, A)
862 }
863#undef FROM_TO
864}
865
866static unsigned getOpcodeWithCC(unsigned Opc, X86::CondCode CC) {
867 assert((CC == X86::COND_E || CC == X86::COND_NE) && "Unexpected CC");
868#define CASE(A) \
869 case X86::CMOVB_##A: \
870 case X86::CMOVE_##A: \
871 case X86::CMOVP_##A: \
872 case X86::CMOVBE_##A: \
873 case X86::CMOVNB_##A: \
874 case X86::CMOVNE_##A: \
875 case X86::CMOVNP_##A: \
876 case X86::CMOVNBE_##A: \
877 return (CC == X86::COND_E) ? X86::CMOVE_##A : X86::CMOVNE_##A;
878 switch (Opc) {
879 default:
880 llvm_unreachable("Unexpected opcode");
881 CASE(Fp32)
882 CASE(Fp64)
883 CASE(Fp80)
884 }
885#undef CASE
886}
887
888void X86FlagsCopyLoweringPass::rewriteMI(MachineBasicBlock &MBB,
890 const DebugLoc &Loc, MachineInstr &MI,
891 CondRegArray &CondRegs) {
892 // First get the register containing this specific condition.
893 bool IsImplicitCC = false;
895 if (CC == X86::COND_INVALID) {
896 CC = getImplicitCondFromMI(MI.getOpcode());
897 IsImplicitCC = true;
898 }
899 assert(CC != X86::COND_INVALID && "Unknown EFLAG user!");
900 unsigned CondReg;
901 bool Inverted;
902 std::tie(CondReg, Inverted) =
903 getCondOrInverseInReg(MBB, Pos, Loc, CC, CondRegs);
904
905 // Insert a direct test of the saved register.
906 insertTest(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(), CondReg);
907
908 // Rewrite the instruction to use the !ZF flag from the test, and then kill
909 // its use of the flags afterward.
910 X86::CondCode NewCC = Inverted ? X86::COND_E : X86::COND_NE;
911 if (IsImplicitCC)
912 MI.setDesc(TII->get(getOpcodeWithCC(MI.getOpcode(), NewCC)));
913 else
914 MI.getOperand(MI.getDesc().getNumOperands() - 1).setImm(NewCC);
915
916 MI.findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr)->setIsKill(true);
917 LLVM_DEBUG(dbgs() << " fixed instruction: "; MI.dump());
918}
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock & MBB
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
bool End
Definition: ELF_riscv.cpp:480
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
static const HTTPClientCleanup Cleanup
Definition: HTTPClient.cpp:42
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned const TargetRegisterInfo * TRI
#define P(N)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI Lower i1 Copies
This file contains some templates that are useful if you are working with the STL at all.
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static X86::CondCode getImplicitCondFromMI(unsigned Opc)
@ InevitableClobber
@ EvitableClobber
#define FROM_TO(A, B)
static MachineBasicBlock & splitBlock(MachineBasicBlock &MBB, MachineInstr &SplitI, const X86InstrInfo &TII)
static unsigned getOpcodeWithCC(unsigned Opc, X86::CondCode CC)
#define CASE(A)
static bool isArithmeticOp(unsigned Opc)
#define DEBUG_TYPE
static EFLAGSClobber getClobberType(const MachineInstr &MI)
X86 EFLAGS copy lowering
Represent the analysis usage information of a pass.
AnalysisUsage & addUsedIfAvailable()
Add the specified Pass class to the set of analyses used by this pass.
A debug info location.
Definition: DebugLoc.h:33
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
void normalizeSuccProbs()
Normalize probabilities of all successors so that the sum of them becomes one.
instr_iterator instr_begin()
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
MachineBasicBlock * getFallThrough(bool JumpToFallThrough=true)
Return the fallthrough block if the block can implicitly transfer control to the block after it by fa...
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void copySuccessor(const MachineBasicBlock *Orig, succ_iterator I)
Copy a successor (and any probability info) from original block to this block's.
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:569
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:346
bool isBranch(QueryType Type=AnyInBundle) const
Returns true if this is a conditional, unconditional, or indirect branch.
Definition: MachineInstr.h:982
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:498
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setMBB(MachineBasicBlock *MBB)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)
bool isMBB() const
isMBB - Tests if this is a MO_MachineBasicBlock operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
void dump() const
Definition: Pass.cpp:136
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:436
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:368
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:503
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:95
void push_back(const T &Elt)
Definition: SmallVector.h:427
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1210
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ Dead
Unused definition.
Reg
All possible values of the reg field in the ModR/M byte.
CondCode getCondFromBranch(const MachineInstr &MI)
CondCode getCondFromMI(const MachineInstr &MI)
Return the condition code of the instruction.
CondCode GetOppositeBranchCondition(CondCode CC)
GetOppositeBranchCondition - Return the inverse of the specified cond, e.g.
@ AddrNumOperands
Definition: X86BaseInfo.h:36
CondCode getCondFromSETCC(const MachineInstr &MI)
unsigned getNFVariant(unsigned Opc)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
Definition: ScopeExit.h:59
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
FunctionPass * createX86FlagsCopyLoweringPass()
Return a pass that lowers EFLAGS copy pseudo instructions.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1736
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
idf_iterator< T > idf_end(const T &G)
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1824
idf_iterator< T > idf_begin(const T &G)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749