LLVM 18.0.0git
X86FloatingPoint.cpp
Go to the documentation of this file.
1//===-- X86FloatingPoint.cpp - Floating point Reg -> Stack converter ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the pass which converts floating point instructions from
10// pseudo registers into register stack instructions. This pass uses live
11// variable information to indicate where the FPn registers are used and their
12// lifetimes.
13//
14// The x87 hardware tracks liveness of the stack registers, so it is necessary
15// to implement exact liveness tracking between basic blocks. The CFG edges are
16// partitioned into bundles where the same FP registers must be live in
17// identical stack positions. Instructions are inserted at the end of each basic
18// block to rearrange the live registers to match the outgoing bundle.
19//
20// This approach avoids splitting critical edges at the potential cost of more
21// live register shuffling instructions when critical edges are present.
22//
23//===----------------------------------------------------------------------===//
24
25#include "X86.h"
26#include "X86InstrInfo.h"
28#include "llvm/ADT/STLExtras.h"
29#include "llvm/ADT/SmallSet.h"
31#include "llvm/ADT/Statistic.h"
37#include "llvm/CodeGen/Passes.h"
40#include "llvm/Config/llvm-config.h"
41#include "llvm/IR/InlineAsm.h"
43#include "llvm/Support/Debug.h"
47#include <algorithm>
48#include <bitset>
49using namespace llvm;
50
51#define DEBUG_TYPE "x86-codegen"
52
53STATISTIC(NumFXCH, "Number of fxch instructions inserted");
54STATISTIC(NumFP , "Number of floating point instructions");
55
56namespace {
57 const unsigned ScratchFPReg = 7;
58
59 struct FPS : public MachineFunctionPass {
60 static char ID;
61 FPS() : MachineFunctionPass(ID) {
62 // This is really only to keep valgrind quiet.
63 // The logic in isLive() is too much for it.
64 memset(Stack, 0, sizeof(Stack));
65 memset(RegMap, 0, sizeof(RegMap));
66 }
67
68 void getAnalysisUsage(AnalysisUsage &AU) const override {
69 AU.setPreservesCFG();
74 }
75
76 bool runOnMachineFunction(MachineFunction &MF) override;
77
80 MachineFunctionProperties::Property::NoVRegs);
81 }
82
83 StringRef getPassName() const override { return "X86 FP Stackifier"; }
84
85 private:
86 const TargetInstrInfo *TII = nullptr; // Machine instruction info.
87
88 // Two CFG edges are related if they leave the same block, or enter the same
89 // block. The transitive closure of an edge under this relation is a
90 // LiveBundle. It represents a set of CFG edges where the live FP stack
91 // registers must be allocated identically in the x87 stack.
92 //
93 // A LiveBundle is usually all the edges leaving a block, or all the edges
94 // entering a block, but it can contain more edges if critical edges are
95 // present.
96 //
97 // The set of live FP registers in a LiveBundle is calculated by bundleCFG,
98 // but the exact mapping of FP registers to stack slots is fixed later.
99 struct LiveBundle {
100 // Bit mask of live FP registers. Bit 0 = FP0, bit 1 = FP1, &c.
101 unsigned Mask = 0;
102
103 // Number of pre-assigned live registers in FixStack. This is 0 when the
104 // stack order has not yet been fixed.
105 unsigned FixCount = 0;
106
107 // Assigned stack order for live-in registers.
108 // FixStack[i] == getStackEntry(i) for all i < FixCount.
109 unsigned char FixStack[8];
110
111 LiveBundle() = default;
112
113 // Have the live registers been assigned a stack order yet?
114 bool isFixed() const { return !Mask || FixCount; }
115 };
116
117 // Numbered LiveBundle structs. LiveBundles[0] is used for all CFG edges
118 // with no live FP registers.
119 SmallVector<LiveBundle, 8> LiveBundles;
120
121 // The edge bundle analysis provides indices into the LiveBundles vector.
122 EdgeBundles *Bundles = nullptr;
123
124 // Return a bitmask of FP registers in block's live-in list.
125 static unsigned calcLiveInMask(MachineBasicBlock *MBB, bool RemoveFPs) {
126 unsigned Mask = 0;
128 I != MBB->livein_end(); ) {
129 MCPhysReg Reg = I->PhysReg;
130 static_assert(X86::FP6 - X86::FP0 == 6, "sequential regnums");
131 if (Reg >= X86::FP0 && Reg <= X86::FP6) {
132 Mask |= 1 << (Reg - X86::FP0);
133 if (RemoveFPs) {
134 I = MBB->removeLiveIn(I);
135 continue;
136 }
137 }
138 ++I;
139 }
140 return Mask;
141 }
142
143 // Partition all the CFG edges into LiveBundles.
144 void bundleCFGRecomputeKillFlags(MachineFunction &MF);
145
146 MachineBasicBlock *MBB = nullptr; // Current basic block
147
148 // The hardware keeps track of how many FP registers are live, so we have
149 // to model that exactly. Usually, each live register corresponds to an
150 // FP<n> register, but when dealing with calls, returns, and inline
151 // assembly, it is sometimes necessary to have live scratch registers.
152 unsigned Stack[8]; // FP<n> Registers in each stack slot...
153 unsigned StackTop = 0; // The current top of the FP stack.
154
155 enum {
156 NumFPRegs = 8 // Including scratch pseudo-registers.
157 };
158
159 // For each live FP<n> register, point to its Stack[] entry.
160 // The first entries correspond to FP0-FP6, the rest are scratch registers
161 // used when we need slightly different live registers than what the
162 // register allocator thinks.
163 unsigned RegMap[NumFPRegs];
164
165 // Set up our stack model to match the incoming registers to MBB.
166 void setupBlockStack();
167
168 // Shuffle live registers to match the expectations of successor blocks.
169 void finishBlockStack();
170
171#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
172 void dumpStack() const {
173 dbgs() << "Stack contents:";
174 for (unsigned i = 0; i != StackTop; ++i) {
175 dbgs() << " FP" << Stack[i];
176 assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!");
177 }
178 }
179#endif
180
181 /// getSlot - Return the stack slot number a particular register number is
182 /// in.
183 unsigned getSlot(unsigned RegNo) const {
184 assert(RegNo < NumFPRegs && "Regno out of range!");
185 return RegMap[RegNo];
186 }
187
188 /// isLive - Is RegNo currently live in the stack?
189 bool isLive(unsigned RegNo) const {
190 unsigned Slot = getSlot(RegNo);
191 return Slot < StackTop && Stack[Slot] == RegNo;
192 }
193
194 /// getStackEntry - Return the X86::FP<n> register in register ST(i).
195 unsigned getStackEntry(unsigned STi) const {
196 if (STi >= StackTop)
197 report_fatal_error("Access past stack top!");
198 return Stack[StackTop-1-STi];
199 }
200
201 /// getSTReg - Return the X86::ST(i) register which contains the specified
202 /// FP<RegNo> register.
203 unsigned getSTReg(unsigned RegNo) const {
204 return StackTop - 1 - getSlot(RegNo) + X86::ST0;
205 }
206
207 // pushReg - Push the specified FP<n> register onto the stack.
208 void pushReg(unsigned Reg) {
209 assert(Reg < NumFPRegs && "Register number out of range!");
210 if (StackTop >= 8)
211 report_fatal_error("Stack overflow!");
212 Stack[StackTop] = Reg;
213 RegMap[Reg] = StackTop++;
214 }
215
216 // popReg - Pop a register from the stack.
217 void popReg() {
218 if (StackTop == 0)
219 report_fatal_error("Cannot pop empty stack!");
220 RegMap[Stack[--StackTop]] = ~0; // Update state
221 }
222
223 bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; }
224 void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) {
225 DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
226 if (isAtTop(RegNo)) return;
227
228 unsigned STReg = getSTReg(RegNo);
229 unsigned RegOnTop = getStackEntry(0);
230
231 // Swap the slots the regs are in.
232 std::swap(RegMap[RegNo], RegMap[RegOnTop]);
233
234 // Swap stack slot contents.
235 if (RegMap[RegOnTop] >= StackTop)
236 report_fatal_error("Access past stack top!");
237 std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
238
239 // Emit an fxch to update the runtime processors version of the state.
240 BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(STReg);
241 ++NumFXCH;
242 }
243
244 void duplicateToTop(unsigned RegNo, unsigned AsReg,
246 DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
247 unsigned STReg = getSTReg(RegNo);
248 pushReg(AsReg); // New register on top of stack
249
250 BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg);
251 }
252
253 /// popStackAfter - Pop the current value off of the top of the FP stack
254 /// after the specified instruction.
255 void popStackAfter(MachineBasicBlock::iterator &I);
256
257 /// freeStackSlotAfter - Free the specified register from the register
258 /// stack, so that it is no longer in a register. If the register is
259 /// currently at the top of the stack, we just pop the current instruction,
260 /// otherwise we store the current top-of-stack into the specified slot,
261 /// then pop the top of stack.
262 void freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned Reg);
263
264 /// freeStackSlotBefore - Just the pop, no folding. Return the inserted
265 /// instruction.
267 freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo);
268
269 /// Adjust the live registers to be the set in Mask.
270 void adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I);
271
272 /// Shuffle the top FixCount stack entries such that FP reg FixStack[0] is
273 /// st(0), FP reg FixStack[1] is st(1) etc.
274 void shuffleStackTop(const unsigned char *FixStack, unsigned FixCount,
276
277 bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
278
279 void handleCall(MachineBasicBlock::iterator &I);
280 void handleReturn(MachineBasicBlock::iterator &I);
281 void handleZeroArgFP(MachineBasicBlock::iterator &I);
282 void handleOneArgFP(MachineBasicBlock::iterator &I);
283 void handleOneArgFPRW(MachineBasicBlock::iterator &I);
284 void handleTwoArgFP(MachineBasicBlock::iterator &I);
285 void handleCompareFP(MachineBasicBlock::iterator &I);
286 void handleCondMovFP(MachineBasicBlock::iterator &I);
287 void handleSpecialFP(MachineBasicBlock::iterator &I);
288
289 // Check if a COPY instruction is using FP registers.
290 static bool isFPCopy(MachineInstr &MI) {
291 Register DstReg = MI.getOperand(0).getReg();
292 Register SrcReg = MI.getOperand(1).getReg();
293
294 return X86::RFP80RegClass.contains(DstReg) ||
295 X86::RFP80RegClass.contains(SrcReg);
296 }
297
298 void setKillFlags(MachineBasicBlock &MBB) const;
299 };
300}
301
302char FPS::ID = 0;
303
304INITIALIZE_PASS_BEGIN(FPS, DEBUG_TYPE, "X86 FP Stackifier",
305 false, false)
309
311
312/// getFPReg - Return the X86::FPx register number for the specified operand.
313/// For example, this returns 3 for X86::FP3.
314static unsigned getFPReg(const MachineOperand &MO) {
315 assert(MO.isReg() && "Expected an FP register!");
316 Register Reg = MO.getReg();
317 assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!");
318 return Reg - X86::FP0;
319}
320
321/// runOnMachineFunction - Loop over all of the basic blocks, transforming FP
322/// register references into FP stack references.
323///
324bool FPS::runOnMachineFunction(MachineFunction &MF) {
325 // We only need to run this pass if there are any FP registers used in this
326 // function. If it is all integer, there is nothing for us to do!
327 bool FPIsUsed = false;
328
329 static_assert(X86::FP6 == X86::FP0+6, "Register enums aren't sorted right!");
330 const MachineRegisterInfo &MRI = MF.getRegInfo();
331 for (unsigned i = 0; i <= 6; ++i)
332 if (!MRI.reg_nodbg_empty(X86::FP0 + i)) {
333 FPIsUsed = true;
334 break;
335 }
336
337 // Early exit.
338 if (!FPIsUsed) return false;
339
340 Bundles = &getAnalysis<EdgeBundles>();
342
343 // Prepare cross-MBB liveness.
344 bundleCFGRecomputeKillFlags(MF);
345
346 StackTop = 0;
347
348 // Process the function in depth first order so that we process at least one
349 // of the predecessors for every reachable block in the function.
351 MachineBasicBlock *Entry = &MF.front();
352
353 LiveBundle &Bundle =
354 LiveBundles[Bundles->getBundle(Entry->getNumber(), false)];
355
356 // In regcall convention, some FP registers may not be passed through
357 // the stack, so they will need to be assigned to the stack first
358 if ((Entry->getParent()->getFunction().getCallingConv() ==
359 CallingConv::X86_RegCall) && (Bundle.Mask && !Bundle.FixCount)) {
360 // In the register calling convention, up to one FP argument could be
361 // saved in the first FP register.
362 // If bundle.mask is non-zero and Bundle.FixCount is zero, it means
363 // that the FP registers contain arguments.
364 // The actual value is passed in FP0.
365 // Here we fix the stack and mark FP0 as pre-assigned register.
366 assert((Bundle.Mask & 0xFE) == 0 &&
367 "Only FP0 could be passed as an argument");
368 Bundle.FixCount = 1;
369 Bundle.FixStack[0] = 0;
370 }
371
372 bool Changed = false;
373 for (MachineBasicBlock *BB : depth_first_ext(Entry, Processed))
374 Changed |= processBasicBlock(MF, *BB);
375
376 // Process any unreachable blocks in arbitrary order now.
377 if (MF.size() != Processed.size())
378 for (MachineBasicBlock &BB : MF)
379 if (Processed.insert(&BB).second)
380 Changed |= processBasicBlock(MF, BB);
381
382 LiveBundles.clear();
383
384 return Changed;
385}
386
387/// bundleCFG - Scan all the basic blocks to determine consistent live-in and
388/// live-out sets for the FP registers. Consistent means that the set of
389/// registers live-out from a block is identical to the live-in set of all
390/// successors. This is not enforced by the normal live-in lists since
391/// registers may be implicitly defined, or not used by all successors.
392void FPS::bundleCFGRecomputeKillFlags(MachineFunction &MF) {
393 assert(LiveBundles.empty() && "Stale data in LiveBundles");
394 LiveBundles.resize(Bundles->getNumBundles());
395
396 // Gather the actual live-in masks for all MBBs.
397 for (MachineBasicBlock &MBB : MF) {
398 setKillFlags(MBB);
399
400 const unsigned Mask = calcLiveInMask(&MBB, false);
401 if (!Mask)
402 continue;
403 // Update MBB ingoing bundle mask.
404 LiveBundles[Bundles->getBundle(MBB.getNumber(), false)].Mask |= Mask;
405 }
406}
407
408/// processBasicBlock - Loop over all of the instructions in the basic block,
409/// transforming FP instructions into their stack form.
410///
411bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
412 bool Changed = false;
413 MBB = &BB;
414
415 setupBlockStack();
416
417 for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
418 MachineInstr &MI = *I;
419 uint64_t Flags = MI.getDesc().TSFlags;
420
421 unsigned FPInstClass = Flags & X86II::FPTypeMask;
422 if (MI.isInlineAsm())
423 FPInstClass = X86II::SpecialFP;
424
425 if (MI.isCopy() && isFPCopy(MI))
426 FPInstClass = X86II::SpecialFP;
427
428 if (MI.isImplicitDef() &&
429 X86::RFP80RegClass.contains(MI.getOperand(0).getReg()))
430 FPInstClass = X86II::SpecialFP;
431
432 if (MI.isCall())
433 FPInstClass = X86II::SpecialFP;
434
435 if (FPInstClass == X86II::NotFP)
436 continue; // Efficiently ignore non-fp insts!
437
438 MachineInstr *PrevMI = nullptr;
439 if (I != BB.begin())
440 PrevMI = &*std::prev(I);
441
442 ++NumFP; // Keep track of # of pseudo instrs
443 LLVM_DEBUG(dbgs() << "\nFPInst:\t" << MI);
444
445 // Get dead variables list now because the MI pointer may be deleted as part
446 // of processing!
448 for (const MachineOperand &MO : MI.operands())
449 if (MO.isReg() && MO.isDead())
450 DeadRegs.push_back(MO.getReg());
451
452 switch (FPInstClass) {
453 case X86II::ZeroArgFP: handleZeroArgFP(I); break;
454 case X86II::OneArgFP: handleOneArgFP(I); break; // fstp ST(0)
455 case X86II::OneArgFPRW: handleOneArgFPRW(I); break; // ST(0) = fsqrt(ST(0))
456 case X86II::TwoArgFP: handleTwoArgFP(I); break;
457 case X86II::CompareFP: handleCompareFP(I); break;
458 case X86II::CondMovFP: handleCondMovFP(I); break;
459 case X86II::SpecialFP: handleSpecialFP(I); break;
460 default: llvm_unreachable("Unknown FP Type!");
461 }
462
463 // Check to see if any of the values defined by this instruction are dead
464 // after definition. If so, pop them.
465 for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) {
466 unsigned Reg = DeadRegs[i];
467 // Check if Reg is live on the stack. An inline-asm register operand that
468 // is in the clobber list and marked dead might not be live on the stack.
469 static_assert(X86::FP7 - X86::FP0 == 7, "sequential FP regnumbers");
470 if (Reg >= X86::FP0 && Reg <= X86::FP6 && isLive(Reg-X86::FP0)) {
471 LLVM_DEBUG(dbgs() << "Register FP#" << Reg - X86::FP0 << " is dead!\n");
472 freeStackSlotAfter(I, Reg-X86::FP0);
473 }
474 }
475
476 // Print out all of the instructions expanded to if -debug
477 LLVM_DEBUG({
478 MachineBasicBlock::iterator PrevI = PrevMI;
479 if (I == PrevI) {
480 dbgs() << "Just deleted pseudo instruction\n";
481 } else {
483 // Rewind to first instruction newly inserted.
484 while (Start != BB.begin() && std::prev(Start) != PrevI)
485 --Start;
486 dbgs() << "Inserted instructions:\n\t";
487 Start->print(dbgs());
488 while (++Start != std::next(I)) {
489 }
490 }
491 dumpStack();
492 });
493 (void)PrevMI;
494
495 Changed = true;
496 }
497
498 finishBlockStack();
499
500 return Changed;
501}
502
503/// setupBlockStack - Use the live bundles to set up our model of the stack
504/// to match predecessors' live out stack.
505void FPS::setupBlockStack() {
506 LLVM_DEBUG(dbgs() << "\nSetting up live-ins for " << printMBBReference(*MBB)
507 << " derived from " << MBB->getName() << ".\n");
508 StackTop = 0;
509 // Get the live-in bundle for MBB.
510 const LiveBundle &Bundle =
511 LiveBundles[Bundles->getBundle(MBB->getNumber(), false)];
512
513 if (!Bundle.Mask) {
514 LLVM_DEBUG(dbgs() << "Block has no FP live-ins.\n");
515 return;
516 }
517
518 // Depth-first iteration should ensure that we always have an assigned stack.
519 assert(Bundle.isFixed() && "Reached block before any predecessors");
520
521 // Push the fixed live-in registers.
522 for (unsigned i = Bundle.FixCount; i > 0; --i) {
523 LLVM_DEBUG(dbgs() << "Live-in st(" << (i - 1) << "): %fp"
524 << unsigned(Bundle.FixStack[i - 1]) << '\n');
525 pushReg(Bundle.FixStack[i-1]);
526 }
527
528 // Kill off unwanted live-ins. This can happen with a critical edge.
529 // FIXME: We could keep these live registers around as zombies. They may need
530 // to be revived at the end of a short block. It might save a few instrs.
531 unsigned Mask = calcLiveInMask(MBB, /*RemoveFPs=*/true);
532 adjustLiveRegs(Mask, MBB->begin());
533 LLVM_DEBUG(MBB->dump());
534}
535
536/// finishBlockStack - Revive live-outs that are implicitly defined out of
537/// MBB. Shuffle live registers to match the expected fixed stack of any
538/// predecessors, and ensure that all predecessors are expecting the same
539/// stack.
540void FPS::finishBlockStack() {
541 // The RET handling below takes care of return blocks for us.
542 if (MBB->succ_empty())
543 return;
544
545 LLVM_DEBUG(dbgs() << "Setting up live-outs for " << printMBBReference(*MBB)
546 << " derived from " << MBB->getName() << ".\n");
547
548 // Get MBB's live-out bundle.
549 unsigned BundleIdx = Bundles->getBundle(MBB->getNumber(), true);
550 LiveBundle &Bundle = LiveBundles[BundleIdx];
551
552 // We may need to kill and define some registers to match successors.
553 // FIXME: This can probably be combined with the shuffle below.
555 adjustLiveRegs(Bundle.Mask, Term);
556
557 if (!Bundle.Mask) {
558 LLVM_DEBUG(dbgs() << "No live-outs.\n");
559 return;
560 }
561
562 // Has the stack order been fixed yet?
563 LLVM_DEBUG(dbgs() << "LB#" << BundleIdx << ": ");
564 if (Bundle.isFixed()) {
565 LLVM_DEBUG(dbgs() << "Shuffling stack to match.\n");
566 shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term);
567 } else {
568 // Not fixed yet, we get to choose.
569 LLVM_DEBUG(dbgs() << "Fixing stack order now.\n");
570 Bundle.FixCount = StackTop;
571 for (unsigned i = 0; i < StackTop; ++i)
572 Bundle.FixStack[i] = getStackEntry(i);
573 }
574}
575
576
577//===----------------------------------------------------------------------===//
578// Efficient Lookup Table Support
579//===----------------------------------------------------------------------===//
580
581namespace {
582 struct TableEntry {
583 uint16_t from;
584 uint16_t to;
585 bool operator<(const TableEntry &TE) const { return from < TE.from; }
586 friend bool operator<(const TableEntry &TE, unsigned V) {
587 return TE.from < V;
588 }
589 friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned V,
590 const TableEntry &TE) {
591 return V < TE.from;
592 }
593 };
594}
595
596static int Lookup(ArrayRef<TableEntry> Table, unsigned Opcode) {
597 const TableEntry *I = llvm::lower_bound(Table, Opcode);
598 if (I != Table.end() && I->from == Opcode)
599 return I->to;
600 return -1;
601}
602
603#ifdef NDEBUG
604#define ASSERT_SORTED(TABLE)
605#else
606#define ASSERT_SORTED(TABLE) \
607 { \
608 static std::atomic<bool> TABLE##Checked(false); \
609 if (!TABLE##Checked.load(std::memory_order_relaxed)) { \
610 assert(is_sorted(TABLE) && \
611 "All lookup tables must be sorted for efficient access!"); \
612 TABLE##Checked.store(true, std::memory_order_relaxed); \
613 } \
614 }
615#endif
616
617//===----------------------------------------------------------------------===//
618// Register File -> Register Stack Mapping Methods
619//===----------------------------------------------------------------------===//
620
621// OpcodeTable - Sorted map of register instructions to their stack version.
622// The first element is an register file pseudo instruction, the second is the
623// concrete X86 instruction which uses the register stack.
624//
625static const TableEntry OpcodeTable[] = {
626 { X86::ABS_Fp32 , X86::ABS_F },
627 { X86::ABS_Fp64 , X86::ABS_F },
628 { X86::ABS_Fp80 , X86::ABS_F },
629 { X86::ADD_Fp32m , X86::ADD_F32m },
630 { X86::ADD_Fp64m , X86::ADD_F64m },
631 { X86::ADD_Fp64m32 , X86::ADD_F32m },
632 { X86::ADD_Fp80m32 , X86::ADD_F32m },
633 { X86::ADD_Fp80m64 , X86::ADD_F64m },
634 { X86::ADD_FpI16m32 , X86::ADD_FI16m },
635 { X86::ADD_FpI16m64 , X86::ADD_FI16m },
636 { X86::ADD_FpI16m80 , X86::ADD_FI16m },
637 { X86::ADD_FpI32m32 , X86::ADD_FI32m },
638 { X86::ADD_FpI32m64 , X86::ADD_FI32m },
639 { X86::ADD_FpI32m80 , X86::ADD_FI32m },
640 { X86::CHS_Fp32 , X86::CHS_F },
641 { X86::CHS_Fp64 , X86::CHS_F },
642 { X86::CHS_Fp80 , X86::CHS_F },
643 { X86::CMOVBE_Fp32 , X86::CMOVBE_F },
644 { X86::CMOVBE_Fp64 , X86::CMOVBE_F },
645 { X86::CMOVBE_Fp80 , X86::CMOVBE_F },
646 { X86::CMOVB_Fp32 , X86::CMOVB_F },
647 { X86::CMOVB_Fp64 , X86::CMOVB_F },
648 { X86::CMOVB_Fp80 , X86::CMOVB_F },
649 { X86::CMOVE_Fp32 , X86::CMOVE_F },
650 { X86::CMOVE_Fp64 , X86::CMOVE_F },
651 { X86::CMOVE_Fp80 , X86::CMOVE_F },
652 { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F },
653 { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F },
654 { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F },
655 { X86::CMOVNB_Fp32 , X86::CMOVNB_F },
656 { X86::CMOVNB_Fp64 , X86::CMOVNB_F },
657 { X86::CMOVNB_Fp80 , X86::CMOVNB_F },
658 { X86::CMOVNE_Fp32 , X86::CMOVNE_F },
659 { X86::CMOVNE_Fp64 , X86::CMOVNE_F },
660 { X86::CMOVNE_Fp80 , X86::CMOVNE_F },
661 { X86::CMOVNP_Fp32 , X86::CMOVNP_F },
662 { X86::CMOVNP_Fp64 , X86::CMOVNP_F },
663 { X86::CMOVNP_Fp80 , X86::CMOVNP_F },
664 { X86::CMOVP_Fp32 , X86::CMOVP_F },
665 { X86::CMOVP_Fp64 , X86::CMOVP_F },
666 { X86::CMOVP_Fp80 , X86::CMOVP_F },
667 { X86::COM_FpIr32 , X86::COM_FIr },
668 { X86::COM_FpIr64 , X86::COM_FIr },
669 { X86::COM_FpIr80 , X86::COM_FIr },
670 { X86::COM_Fpr32 , X86::COM_FST0r },
671 { X86::COM_Fpr64 , X86::COM_FST0r },
672 { X86::COM_Fpr80 , X86::COM_FST0r },
673 { X86::DIVR_Fp32m , X86::DIVR_F32m },
674 { X86::DIVR_Fp64m , X86::DIVR_F64m },
675 { X86::DIVR_Fp64m32 , X86::DIVR_F32m },
676 { X86::DIVR_Fp80m32 , X86::DIVR_F32m },
677 { X86::DIVR_Fp80m64 , X86::DIVR_F64m },
678 { X86::DIVR_FpI16m32, X86::DIVR_FI16m},
679 { X86::DIVR_FpI16m64, X86::DIVR_FI16m},
680 { X86::DIVR_FpI16m80, X86::DIVR_FI16m},
681 { X86::DIVR_FpI32m32, X86::DIVR_FI32m},
682 { X86::DIVR_FpI32m64, X86::DIVR_FI32m},
683 { X86::DIVR_FpI32m80, X86::DIVR_FI32m},
684 { X86::DIV_Fp32m , X86::DIV_F32m },
685 { X86::DIV_Fp64m , X86::DIV_F64m },
686 { X86::DIV_Fp64m32 , X86::DIV_F32m },
687 { X86::DIV_Fp80m32 , X86::DIV_F32m },
688 { X86::DIV_Fp80m64 , X86::DIV_F64m },
689 { X86::DIV_FpI16m32 , X86::DIV_FI16m },
690 { X86::DIV_FpI16m64 , X86::DIV_FI16m },
691 { X86::DIV_FpI16m80 , X86::DIV_FI16m },
692 { X86::DIV_FpI32m32 , X86::DIV_FI32m },
693 { X86::DIV_FpI32m64 , X86::DIV_FI32m },
694 { X86::DIV_FpI32m80 , X86::DIV_FI32m },
695 { X86::ILD_Fp16m32 , X86::ILD_F16m },
696 { X86::ILD_Fp16m64 , X86::ILD_F16m },
697 { X86::ILD_Fp16m80 , X86::ILD_F16m },
698 { X86::ILD_Fp32m32 , X86::ILD_F32m },
699 { X86::ILD_Fp32m64 , X86::ILD_F32m },
700 { X86::ILD_Fp32m80 , X86::ILD_F32m },
701 { X86::ILD_Fp64m32 , X86::ILD_F64m },
702 { X86::ILD_Fp64m64 , X86::ILD_F64m },
703 { X86::ILD_Fp64m80 , X86::ILD_F64m },
704 { X86::ISTT_Fp16m32 , X86::ISTT_FP16m},
705 { X86::ISTT_Fp16m64 , X86::ISTT_FP16m},
706 { X86::ISTT_Fp16m80 , X86::ISTT_FP16m},
707 { X86::ISTT_Fp32m32 , X86::ISTT_FP32m},
708 { X86::ISTT_Fp32m64 , X86::ISTT_FP32m},
709 { X86::ISTT_Fp32m80 , X86::ISTT_FP32m},
710 { X86::ISTT_Fp64m32 , X86::ISTT_FP64m},
711 { X86::ISTT_Fp64m64 , X86::ISTT_FP64m},
712 { X86::ISTT_Fp64m80 , X86::ISTT_FP64m},
713 { X86::IST_Fp16m32 , X86::IST_F16m },
714 { X86::IST_Fp16m64 , X86::IST_F16m },
715 { X86::IST_Fp16m80 , X86::IST_F16m },
716 { X86::IST_Fp32m32 , X86::IST_F32m },
717 { X86::IST_Fp32m64 , X86::IST_F32m },
718 { X86::IST_Fp32m80 , X86::IST_F32m },
719 { X86::IST_Fp64m32 , X86::IST_FP64m },
720 { X86::IST_Fp64m64 , X86::IST_FP64m },
721 { X86::IST_Fp64m80 , X86::IST_FP64m },
722 { X86::LD_Fp032 , X86::LD_F0 },
723 { X86::LD_Fp064 , X86::LD_F0 },
724 { X86::LD_Fp080 , X86::LD_F0 },
725 { X86::LD_Fp132 , X86::LD_F1 },
726 { X86::LD_Fp164 , X86::LD_F1 },
727 { X86::LD_Fp180 , X86::LD_F1 },
728 { X86::LD_Fp32m , X86::LD_F32m },
729 { X86::LD_Fp32m64 , X86::LD_F32m },
730 { X86::LD_Fp32m80 , X86::LD_F32m },
731 { X86::LD_Fp64m , X86::LD_F64m },
732 { X86::LD_Fp64m80 , X86::LD_F64m },
733 { X86::LD_Fp80m , X86::LD_F80m },
734 { X86::MUL_Fp32m , X86::MUL_F32m },
735 { X86::MUL_Fp64m , X86::MUL_F64m },
736 { X86::MUL_Fp64m32 , X86::MUL_F32m },
737 { X86::MUL_Fp80m32 , X86::MUL_F32m },
738 { X86::MUL_Fp80m64 , X86::MUL_F64m },
739 { X86::MUL_FpI16m32 , X86::MUL_FI16m },
740 { X86::MUL_FpI16m64 , X86::MUL_FI16m },
741 { X86::MUL_FpI16m80 , X86::MUL_FI16m },
742 { X86::MUL_FpI32m32 , X86::MUL_FI32m },
743 { X86::MUL_FpI32m64 , X86::MUL_FI32m },
744 { X86::MUL_FpI32m80 , X86::MUL_FI32m },
745 { X86::SQRT_Fp32 , X86::SQRT_F },
746 { X86::SQRT_Fp64 , X86::SQRT_F },
747 { X86::SQRT_Fp80 , X86::SQRT_F },
748 { X86::ST_Fp32m , X86::ST_F32m },
749 { X86::ST_Fp64m , X86::ST_F64m },
750 { X86::ST_Fp64m32 , X86::ST_F32m },
751 { X86::ST_Fp80m32 , X86::ST_F32m },
752 { X86::ST_Fp80m64 , X86::ST_F64m },
753 { X86::ST_FpP80m , X86::ST_FP80m },
754 { X86::SUBR_Fp32m , X86::SUBR_F32m },
755 { X86::SUBR_Fp64m , X86::SUBR_F64m },
756 { X86::SUBR_Fp64m32 , X86::SUBR_F32m },
757 { X86::SUBR_Fp80m32 , X86::SUBR_F32m },
758 { X86::SUBR_Fp80m64 , X86::SUBR_F64m },
759 { X86::SUBR_FpI16m32, X86::SUBR_FI16m},
760 { X86::SUBR_FpI16m64, X86::SUBR_FI16m},
761 { X86::SUBR_FpI16m80, X86::SUBR_FI16m},
762 { X86::SUBR_FpI32m32, X86::SUBR_FI32m},
763 { X86::SUBR_FpI32m64, X86::SUBR_FI32m},
764 { X86::SUBR_FpI32m80, X86::SUBR_FI32m},
765 { X86::SUB_Fp32m , X86::SUB_F32m },
766 { X86::SUB_Fp64m , X86::SUB_F64m },
767 { X86::SUB_Fp64m32 , X86::SUB_F32m },
768 { X86::SUB_Fp80m32 , X86::SUB_F32m },
769 { X86::SUB_Fp80m64 , X86::SUB_F64m },
770 { X86::SUB_FpI16m32 , X86::SUB_FI16m },
771 { X86::SUB_FpI16m64 , X86::SUB_FI16m },
772 { X86::SUB_FpI16m80 , X86::SUB_FI16m },
773 { X86::SUB_FpI32m32 , X86::SUB_FI32m },
774 { X86::SUB_FpI32m64 , X86::SUB_FI32m },
775 { X86::SUB_FpI32m80 , X86::SUB_FI32m },
776 { X86::TST_Fp32 , X86::TST_F },
777 { X86::TST_Fp64 , X86::TST_F },
778 { X86::TST_Fp80 , X86::TST_F },
779 { X86::UCOM_FpIr32 , X86::UCOM_FIr },
780 { X86::UCOM_FpIr64 , X86::UCOM_FIr },
781 { X86::UCOM_FpIr80 , X86::UCOM_FIr },
782 { X86::UCOM_Fpr32 , X86::UCOM_Fr },
783 { X86::UCOM_Fpr64 , X86::UCOM_Fr },
784 { X86::UCOM_Fpr80 , X86::UCOM_Fr },
785 { X86::XAM_Fp32 , X86::XAM_F },
786 { X86::XAM_Fp64 , X86::XAM_F },
787 { X86::XAM_Fp80 , X86::XAM_F },
788};
789
790static unsigned getConcreteOpcode(unsigned Opcode) {
792 int Opc = Lookup(OpcodeTable, Opcode);
793 assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!");
794 return Opc;
795}
796
797//===----------------------------------------------------------------------===//
798// Helper Methods
799//===----------------------------------------------------------------------===//
800
801// PopTable - Sorted map of instructions to their popping version. The first
802// element is an instruction, the second is the version which pops.
803//
804static const TableEntry PopTable[] = {
805 { X86::ADD_FrST0 , X86::ADD_FPrST0 },
806
807 { X86::COMP_FST0r, X86::FCOMPP },
808 { X86::COM_FIr , X86::COM_FIPr },
809 { X86::COM_FST0r , X86::COMP_FST0r },
810
811 { X86::DIVR_FrST0, X86::DIVR_FPrST0 },
812 { X86::DIV_FrST0 , X86::DIV_FPrST0 },
813
814 { X86::IST_F16m , X86::IST_FP16m },
815 { X86::IST_F32m , X86::IST_FP32m },
816
817 { X86::MUL_FrST0 , X86::MUL_FPrST0 },
818
819 { X86::ST_F32m , X86::ST_FP32m },
820 { X86::ST_F64m , X86::ST_FP64m },
821 { X86::ST_Frr , X86::ST_FPrr },
822
823 { X86::SUBR_FrST0, X86::SUBR_FPrST0 },
824 { X86::SUB_FrST0 , X86::SUB_FPrST0 },
825
826 { X86::UCOM_FIr , X86::UCOM_FIPr },
827
828 { X86::UCOM_FPr , X86::UCOM_FPPr },
829 { X86::UCOM_Fr , X86::UCOM_FPr },
830};
831
833 if (const MachineOperand *MO = MI.findRegisterDefOperand(X86::FPSW))
834 if (!MO->isDead())
835 return true;
836 return false;
837}
838
841 MachineBasicBlock &MBB = *I->getParent();
842 while (++I != MBB.end()) {
843 MachineInstr &MI = *I;
845 return I;
846 }
847 return MBB.end();
848}
849
850/// popStackAfter - Pop the current value off of the top of the FP stack after
851/// the specified instruction. This attempts to be sneaky and combine the pop
852/// into the instruction itself if possible. The iterator is left pointing to
853/// the last instruction, be it a new pop instruction inserted, or the old
854/// instruction if it was modified in place.
855///
856void FPS::popStackAfter(MachineBasicBlock::iterator &I) {
857 MachineInstr &MI = *I;
858 const DebugLoc &dl = MI.getDebugLoc();
860
861 popReg();
862
863 // Check to see if there is a popping version of this instruction...
864 int Opcode = Lookup(PopTable, I->getOpcode());
865 if (Opcode != -1) {
866 I->setDesc(TII->get(Opcode));
867 if (Opcode == X86::FCOMPP || Opcode == X86::UCOM_FPPr)
868 I->removeOperand(0);
869 MI.dropDebugNumber();
870 } else { // Insert an explicit pop
871 // If this instruction sets FPSW, which is read in following instruction,
872 // insert pop after that reader.
874 MachineBasicBlock &MBB = *MI.getParent();
876 if (Next != MBB.end() && Next->readsRegister(X86::FPSW))
877 I = Next;
878 }
879 I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(X86::ST0);
880 }
881}
882
883/// freeStackSlotAfter - Free the specified register from the register stack, so
884/// that it is no longer in a register. If the register is currently at the top
885/// of the stack, we just pop the current instruction, otherwise we store the
886/// current top-of-stack into the specified slot, then pop the top of stack.
887void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) {
888 if (getStackEntry(0) == FPRegNo) { // already at the top of stack? easy.
889 popStackAfter(I);
890 return;
891 }
892
893 // Otherwise, store the top of stack into the dead slot, killing the operand
894 // without having to add in an explicit xchg then pop.
895 //
896 I = freeStackSlotBefore(++I, FPRegNo);
897}
898
899/// freeStackSlotBefore - Free the specified register without trying any
900/// folding.
902FPS::freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo) {
903 unsigned STReg = getSTReg(FPRegNo);
904 unsigned OldSlot = getSlot(FPRegNo);
905 unsigned TopReg = Stack[StackTop-1];
906 Stack[OldSlot] = TopReg;
907 RegMap[TopReg] = OldSlot;
908 RegMap[FPRegNo] = ~0;
909 Stack[--StackTop] = ~0;
910 return BuildMI(*MBB, I, DebugLoc(), TII->get(X86::ST_FPrr))
911 .addReg(STReg)
912 .getInstr();
913}
914
915/// adjustLiveRegs - Kill and revive registers such that exactly the FP
916/// registers with a bit in Mask are live.
917void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) {
918 unsigned Defs = Mask;
919 unsigned Kills = 0;
920 for (unsigned i = 0; i < StackTop; ++i) {
921 unsigned RegNo = Stack[i];
922 if (!(Defs & (1 << RegNo)))
923 // This register is live, but we don't want it.
924 Kills |= (1 << RegNo);
925 else
926 // We don't need to imp-def this live register.
927 Defs &= ~(1 << RegNo);
928 }
929 assert((Kills & Defs) == 0 && "Register needs killing and def'ing?");
930
931 // Produce implicit-defs for free by using killed registers.
932 while (Kills && Defs) {
933 unsigned KReg = llvm::countr_zero(Kills);
934 unsigned DReg = llvm::countr_zero(Defs);
935 LLVM_DEBUG(dbgs() << "Renaming %fp" << KReg << " as imp %fp" << DReg
936 << "\n");
937 std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]);
938 std::swap(RegMap[KReg], RegMap[DReg]);
939 Kills &= ~(1 << KReg);
940 Defs &= ~(1 << DReg);
941 }
942
943 // Kill registers by popping.
944 if (Kills && I != MBB->begin()) {
945 MachineBasicBlock::iterator I2 = std::prev(I);
946 while (StackTop) {
947 unsigned KReg = getStackEntry(0);
948 if (!(Kills & (1 << KReg)))
949 break;
950 LLVM_DEBUG(dbgs() << "Popping %fp" << KReg << "\n");
951 popStackAfter(I2);
952 Kills &= ~(1 << KReg);
953 }
954 }
955
956 // Manually kill the rest.
957 while (Kills) {
958 unsigned KReg = llvm::countr_zero(Kills);
959 LLVM_DEBUG(dbgs() << "Killing %fp" << KReg << "\n");
960 freeStackSlotBefore(I, KReg);
961 Kills &= ~(1 << KReg);
962 }
963
964 // Load zeros for all the imp-defs.
965 while(Defs) {
966 unsigned DReg = llvm::countr_zero(Defs);
967 LLVM_DEBUG(dbgs() << "Defining %fp" << DReg << " as 0\n");
968 BuildMI(*MBB, I, DebugLoc(), TII->get(X86::LD_F0));
969 pushReg(DReg);
970 Defs &= ~(1 << DReg);
971 }
972
973 // Now we should have the correct registers live.
974 LLVM_DEBUG(dumpStack());
975 assert(StackTop == (unsigned)llvm::popcount(Mask) && "Live count mismatch");
976}
977
978/// shuffleStackTop - emit fxch instructions before I to shuffle the top
979/// FixCount entries into the order given by FixStack.
980/// FIXME: Is there a better algorithm than insertion sort?
981void FPS::shuffleStackTop(const unsigned char *FixStack,
982 unsigned FixCount,
984 // Move items into place, starting from the desired stack bottom.
985 while (FixCount--) {
986 // Old register at position FixCount.
987 unsigned OldReg = getStackEntry(FixCount);
988 // Desired register at position FixCount.
989 unsigned Reg = FixStack[FixCount];
990 if (Reg == OldReg)
991 continue;
992 // (Reg st0) (OldReg st0) = (Reg OldReg st0)
993 moveToTop(Reg, I);
994 if (FixCount > 0)
995 moveToTop(OldReg, I);
996 }
997 LLVM_DEBUG(dumpStack());
998}
999
1000
1001//===----------------------------------------------------------------------===//
1002// Instruction transformation implementation
1003//===----------------------------------------------------------------------===//
1004
1005void FPS::handleCall(MachineBasicBlock::iterator &I) {
1006 MachineInstr &MI = *I;
1007 unsigned STReturns = 0;
1008
1009 bool ClobbersFPStack = false;
1010 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1011 MachineOperand &Op = MI.getOperand(i);
1012 // Check if this call clobbers the FP stack.
1013 // is sufficient.
1014 if (Op.isRegMask()) {
1015 bool ClobbersFP0 = Op.clobbersPhysReg(X86::FP0);
1016#ifndef NDEBUG
1017 static_assert(X86::FP7 - X86::FP0 == 7, "sequential FP regnumbers");
1018 for (unsigned i = 1; i != 8; ++i)
1019 assert(Op.clobbersPhysReg(X86::FP0 + i) == ClobbersFP0 &&
1020 "Inconsistent FP register clobber");
1021#endif
1022
1023 if (ClobbersFP0)
1024 ClobbersFPStack = true;
1025 }
1026
1027 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1028 continue;
1029
1030 assert(Op.isImplicit() && "Expected implicit def/use");
1031
1032 if (Op.isDef())
1033 STReturns |= 1 << getFPReg(Op);
1034
1035 // Remove the operand so that later passes don't see it.
1036 MI.removeOperand(i);
1037 --i;
1038 --e;
1039 }
1040
1041 // Most calls should have a regmask that clobbers the FP registers. If it
1042 // isn't present then the register allocator didn't spill the FP registers
1043 // so they are still on the stack.
1044 assert((ClobbersFPStack || STReturns == 0) &&
1045 "ST returns without FP stack clobber");
1046 if (!ClobbersFPStack)
1047 return;
1048
1049 unsigned N = llvm::countr_one(STReturns);
1050
1051 // FP registers used for function return must be consecutive starting at
1052 // FP0
1053 assert(STReturns == 0 || (isMask_32(STReturns) && N <= 2));
1054
1055 // Reset the FP Stack - It is required because of possible leftovers from
1056 // passed arguments. The caller should assume that the FP stack is
1057 // returned empty (unless the callee returns values on FP stack).
1058 while (StackTop > 0)
1059 popReg();
1060
1061 for (unsigned I = 0; I < N; ++I)
1062 pushReg(N - I - 1);
1063
1064 // If this call has been modified, drop all variable values defined by it.
1065 // We can't track them once they've been stackified.
1066 if (STReturns)
1067 I->dropDebugNumber();
1068}
1069
1070/// If RET has an FP register use operand, pass the first one in ST(0) and
1071/// the second one in ST(1).
1072void FPS::handleReturn(MachineBasicBlock::iterator &I) {
1073 MachineInstr &MI = *I;
1074
1075 // Find the register operands.
1076 unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U;
1077 unsigned LiveMask = 0;
1078
1079 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1080 MachineOperand &Op = MI.getOperand(i);
1081 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1082 continue;
1083 // FP Register uses must be kills unless there are two uses of the same
1084 // register, in which case only one will be a kill.
1085 assert(Op.isUse() &&
1086 (Op.isKill() || // Marked kill.
1087 getFPReg(Op) == FirstFPRegOp || // Second instance.
1088 MI.killsRegister(Op.getReg())) && // Later use is marked kill.
1089 "Ret only defs operands, and values aren't live beyond it");
1090
1091 if (FirstFPRegOp == ~0U)
1092 FirstFPRegOp = getFPReg(Op);
1093 else {
1094 assert(SecondFPRegOp == ~0U && "More than two fp operands!");
1095 SecondFPRegOp = getFPReg(Op);
1096 }
1097 LiveMask |= (1 << getFPReg(Op));
1098
1099 // Remove the operand so that later passes don't see it.
1100 MI.removeOperand(i);
1101 --i;
1102 --e;
1103 }
1104
1105 // We may have been carrying spurious live-ins, so make sure only the
1106 // returned registers are left live.
1107 adjustLiveRegs(LiveMask, MI);
1108 if (!LiveMask) return; // Quick check to see if any are possible.
1109
1110 // There are only four possibilities here:
1111 // 1) we are returning a single FP value. In this case, it has to be in
1112 // ST(0) already, so just declare success by removing the value from the
1113 // FP Stack.
1114 if (SecondFPRegOp == ~0U) {
1115 // Assert that the top of stack contains the right FP register.
1116 assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) &&
1117 "Top of stack not the right register for RET!");
1118
1119 // Ok, everything is good, mark the value as not being on the stack
1120 // anymore so that our assertion about the stack being empty at end of
1121 // block doesn't fire.
1122 StackTop = 0;
1123 return;
1124 }
1125
1126 // Otherwise, we are returning two values:
1127 // 2) If returning the same value for both, we only have one thing in the FP
1128 // stack. Consider: RET FP1, FP1
1129 if (StackTop == 1) {
1130 assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&&
1131 "Stack misconfiguration for RET!");
1132
1133 // Duplicate the TOS so that we return it twice. Just pick some other FPx
1134 // register to hold it.
1135 unsigned NewReg = ScratchFPReg;
1136 duplicateToTop(FirstFPRegOp, NewReg, MI);
1137 FirstFPRegOp = NewReg;
1138 }
1139
1140 /// Okay we know we have two different FPx operands now:
1141 assert(StackTop == 2 && "Must have two values live!");
1142
1143 /// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently
1144 /// in ST(1). In this case, emit an fxch.
1145 if (getStackEntry(0) == SecondFPRegOp) {
1146 assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live");
1147 moveToTop(FirstFPRegOp, MI);
1148 }
1149
1150 /// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in
1151 /// ST(1). Just remove both from our understanding of the stack and return.
1152 assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live");
1153 assert(getStackEntry(1) == SecondFPRegOp && "Unknown regs live");
1154 StackTop = 0;
1155}
1156
1157/// handleZeroArgFP - ST(0) = fld0 ST(0) = flds <mem>
1158///
1159void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) {
1160 MachineInstr &MI = *I;
1161 unsigned DestReg = getFPReg(MI.getOperand(0));
1162
1163 // Change from the pseudo instruction to the concrete instruction.
1164 MI.removeOperand(0); // Remove the explicit ST(0) operand
1165 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1166 MI.addOperand(
1167 MachineOperand::CreateReg(X86::ST0, /*isDef*/ true, /*isImp*/ true));
1168
1169 // Result gets pushed on the stack.
1170 pushReg(DestReg);
1171
1172 MI.dropDebugNumber();
1173}
1174
1175/// handleOneArgFP - fst <mem>, ST(0)
1176///
1177void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
1178 MachineInstr &MI = *I;
1179 unsigned NumOps = MI.getDesc().getNumOperands();
1180 assert((NumOps == X86::AddrNumOperands + 1 || NumOps == 1) &&
1181 "Can only handle fst* & ftst instructions!");
1182
1183 // Is this the last use of the source register?
1184 unsigned Reg = getFPReg(MI.getOperand(NumOps - 1));
1185 bool KillsSrc = MI.killsRegister(X86::FP0 + Reg);
1186
1187 // FISTP64m is strange because there isn't a non-popping versions.
1188 // If we have one _and_ we don't want to pop the operand, duplicate the value
1189 // on the stack instead of moving it. This ensure that popping the value is
1190 // always ok.
1191 // Ditto FISTTP16m, FISTTP32m, FISTTP64m, ST_FpP80m.
1192 //
1193 if (!KillsSrc && (MI.getOpcode() == X86::IST_Fp64m32 ||
1194 MI.getOpcode() == X86::ISTT_Fp16m32 ||
1195 MI.getOpcode() == X86::ISTT_Fp32m32 ||
1196 MI.getOpcode() == X86::ISTT_Fp64m32 ||
1197 MI.getOpcode() == X86::IST_Fp64m64 ||
1198 MI.getOpcode() == X86::ISTT_Fp16m64 ||
1199 MI.getOpcode() == X86::ISTT_Fp32m64 ||
1200 MI.getOpcode() == X86::ISTT_Fp64m64 ||
1201 MI.getOpcode() == X86::IST_Fp64m80 ||
1202 MI.getOpcode() == X86::ISTT_Fp16m80 ||
1203 MI.getOpcode() == X86::ISTT_Fp32m80 ||
1204 MI.getOpcode() == X86::ISTT_Fp64m80 ||
1205 MI.getOpcode() == X86::ST_FpP80m)) {
1206 duplicateToTop(Reg, ScratchFPReg, I);
1207 } else {
1208 moveToTop(Reg, I); // Move to the top of the stack...
1209 }
1210
1211 // Convert from the pseudo instruction to the concrete instruction.
1212 MI.removeOperand(NumOps - 1); // Remove explicit ST(0) operand
1213 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1214 MI.addOperand(
1215 MachineOperand::CreateReg(X86::ST0, /*isDef*/ false, /*isImp*/ true));
1216
1217 if (MI.getOpcode() == X86::IST_FP64m || MI.getOpcode() == X86::ISTT_FP16m ||
1218 MI.getOpcode() == X86::ISTT_FP32m || MI.getOpcode() == X86::ISTT_FP64m ||
1219 MI.getOpcode() == X86::ST_FP80m) {
1220 if (StackTop == 0)
1221 report_fatal_error("Stack empty??");
1222 --StackTop;
1223 } else if (KillsSrc) { // Last use of operand?
1224 popStackAfter(I);
1225 }
1226
1227 MI.dropDebugNumber();
1228}
1229
1230
1231/// handleOneArgFPRW: Handle instructions that read from the top of stack and
1232/// replace the value with a newly computed value. These instructions may have
1233/// non-fp operands after their FP operands.
1234///
1235/// Examples:
1236/// R1 = fchs R2
1237/// R1 = fadd R2, [mem]
1238///
1239void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) {
1240 MachineInstr &MI = *I;
1241#ifndef NDEBUG
1242 unsigned NumOps = MI.getDesc().getNumOperands();
1243 assert(NumOps >= 2 && "FPRW instructions must have 2 ops!!");
1244#endif
1245
1246 // Is this the last use of the source register?
1247 unsigned Reg = getFPReg(MI.getOperand(1));
1248 bool KillsSrc = MI.killsRegister(X86::FP0 + Reg);
1249
1250 if (KillsSrc) {
1251 // If this is the last use of the source register, just make sure it's on
1252 // the top of the stack.
1253 moveToTop(Reg, I);
1254 if (StackTop == 0)
1255 report_fatal_error("Stack cannot be empty!");
1256 --StackTop;
1257 pushReg(getFPReg(MI.getOperand(0)));
1258 } else {
1259 // If this is not the last use of the source register, _copy_ it to the top
1260 // of the stack.
1261 duplicateToTop(Reg, getFPReg(MI.getOperand(0)), I);
1262 }
1263
1264 // Change from the pseudo instruction to the concrete instruction.
1265 MI.removeOperand(1); // Drop the source operand.
1266 MI.removeOperand(0); // Drop the destination operand.
1267 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1268 MI.dropDebugNumber();
1269}
1270
1271
1272//===----------------------------------------------------------------------===//
1273// Define tables of various ways to map pseudo instructions
1274//
1275
1276// ForwardST0Table - Map: A = B op C into: ST(0) = ST(0) op ST(i)
1277static const TableEntry ForwardST0Table[] = {
1278 { X86::ADD_Fp32 , X86::ADD_FST0r },
1279 { X86::ADD_Fp64 , X86::ADD_FST0r },
1280 { X86::ADD_Fp80 , X86::ADD_FST0r },
1281 { X86::DIV_Fp32 , X86::DIV_FST0r },
1282 { X86::DIV_Fp64 , X86::DIV_FST0r },
1283 { X86::DIV_Fp80 , X86::DIV_FST0r },
1284 { X86::MUL_Fp32 , X86::MUL_FST0r },
1285 { X86::MUL_Fp64 , X86::MUL_FST0r },
1286 { X86::MUL_Fp80 , X86::MUL_FST0r },
1287 { X86::SUB_Fp32 , X86::SUB_FST0r },
1288 { X86::SUB_Fp64 , X86::SUB_FST0r },
1289 { X86::SUB_Fp80 , X86::SUB_FST0r },
1290};
1291
1292// ReverseST0Table - Map: A = B op C into: ST(0) = ST(i) op ST(0)
1293static const TableEntry ReverseST0Table[] = {
1294 { X86::ADD_Fp32 , X86::ADD_FST0r }, // commutative
1295 { X86::ADD_Fp64 , X86::ADD_FST0r }, // commutative
1296 { X86::ADD_Fp80 , X86::ADD_FST0r }, // commutative
1297 { X86::DIV_Fp32 , X86::DIVR_FST0r },
1298 { X86::DIV_Fp64 , X86::DIVR_FST0r },
1299 { X86::DIV_Fp80 , X86::DIVR_FST0r },
1300 { X86::MUL_Fp32 , X86::MUL_FST0r }, // commutative
1301 { X86::MUL_Fp64 , X86::MUL_FST0r }, // commutative
1302 { X86::MUL_Fp80 , X86::MUL_FST0r }, // commutative
1303 { X86::SUB_Fp32 , X86::SUBR_FST0r },
1304 { X86::SUB_Fp64 , X86::SUBR_FST0r },
1305 { X86::SUB_Fp80 , X86::SUBR_FST0r },
1306};
1307
1308// ForwardSTiTable - Map: A = B op C into: ST(i) = ST(0) op ST(i)
1309static const TableEntry ForwardSTiTable[] = {
1310 { X86::ADD_Fp32 , X86::ADD_FrST0 }, // commutative
1311 { X86::ADD_Fp64 , X86::ADD_FrST0 }, // commutative
1312 { X86::ADD_Fp80 , X86::ADD_FrST0 }, // commutative
1313 { X86::DIV_Fp32 , X86::DIVR_FrST0 },
1314 { X86::DIV_Fp64 , X86::DIVR_FrST0 },
1315 { X86::DIV_Fp80 , X86::DIVR_FrST0 },
1316 { X86::MUL_Fp32 , X86::MUL_FrST0 }, // commutative
1317 { X86::MUL_Fp64 , X86::MUL_FrST0 }, // commutative
1318 { X86::MUL_Fp80 , X86::MUL_FrST0 }, // commutative
1319 { X86::SUB_Fp32 , X86::SUBR_FrST0 },
1320 { X86::SUB_Fp64 , X86::SUBR_FrST0 },
1321 { X86::SUB_Fp80 , X86::SUBR_FrST0 },
1322};
1323
1324// ReverseSTiTable - Map: A = B op C into: ST(i) = ST(i) op ST(0)
1325static const TableEntry ReverseSTiTable[] = {
1326 { X86::ADD_Fp32 , X86::ADD_FrST0 },
1327 { X86::ADD_Fp64 , X86::ADD_FrST0 },
1328 { X86::ADD_Fp80 , X86::ADD_FrST0 },
1329 { X86::DIV_Fp32 , X86::DIV_FrST0 },
1330 { X86::DIV_Fp64 , X86::DIV_FrST0 },
1331 { X86::DIV_Fp80 , X86::DIV_FrST0 },
1332 { X86::MUL_Fp32 , X86::MUL_FrST0 },
1333 { X86::MUL_Fp64 , X86::MUL_FrST0 },
1334 { X86::MUL_Fp80 , X86::MUL_FrST0 },
1335 { X86::SUB_Fp32 , X86::SUB_FrST0 },
1336 { X86::SUB_Fp64 , X86::SUB_FrST0 },
1337 { X86::SUB_Fp80 , X86::SUB_FrST0 },
1338};
1339
1340
1341/// handleTwoArgFP - Handle instructions like FADD and friends which are virtual
1342/// instructions which need to be simplified and possibly transformed.
1343///
1344/// Result: ST(0) = fsub ST(0), ST(i)
1345/// ST(i) = fsub ST(0), ST(i)
1346/// ST(0) = fsubr ST(0), ST(i)
1347/// ST(i) = fsubr ST(0), ST(i)
1348///
1349void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) {
1352 MachineInstr &MI = *I;
1353
1354 unsigned NumOperands = MI.getDesc().getNumOperands();
1355 assert(NumOperands == 3 && "Illegal TwoArgFP instruction!");
1356 unsigned Dest = getFPReg(MI.getOperand(0));
1357 unsigned Op0 = getFPReg(MI.getOperand(NumOperands - 2));
1358 unsigned Op1 = getFPReg(MI.getOperand(NumOperands - 1));
1359 bool KillsOp0 = MI.killsRegister(X86::FP0 + Op0);
1360 bool KillsOp1 = MI.killsRegister(X86::FP0 + Op1);
1361 const DebugLoc &dl = MI.getDebugLoc();
1362
1363 unsigned TOS = getStackEntry(0);
1364
1365 // One of our operands must be on the top of the stack. If neither is yet, we
1366 // need to move one.
1367 if (Op0 != TOS && Op1 != TOS) { // No operand at TOS?
1368 // We can choose to move either operand to the top of the stack. If one of
1369 // the operands is killed by this instruction, we want that one so that we
1370 // can update right on top of the old version.
1371 if (KillsOp0) {
1372 moveToTop(Op0, I); // Move dead operand to TOS.
1373 TOS = Op0;
1374 } else if (KillsOp1) {
1375 moveToTop(Op1, I);
1376 TOS = Op1;
1377 } else {
1378 // All of the operands are live after this instruction executes, so we
1379 // cannot update on top of any operand. Because of this, we must
1380 // duplicate one of the stack elements to the top. It doesn't matter
1381 // which one we pick.
1382 //
1383 duplicateToTop(Op0, Dest, I);
1384 Op0 = TOS = Dest;
1385 KillsOp0 = true;
1386 }
1387 } else if (!KillsOp0 && !KillsOp1) {
1388 // If we DO have one of our operands at the top of the stack, but we don't
1389 // have a dead operand, we must duplicate one of the operands to a new slot
1390 // on the stack.
1391 duplicateToTop(Op0, Dest, I);
1392 Op0 = TOS = Dest;
1393 KillsOp0 = true;
1394 }
1395
1396 // Now we know that one of our operands is on the top of the stack, and at
1397 // least one of our operands is killed by this instruction.
1398 assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) &&
1399 "Stack conditions not set up right!");
1400
1401 // We decide which form to use based on what is on the top of the stack, and
1402 // which operand is killed by this instruction.
1403 ArrayRef<TableEntry> InstTable;
1404 bool isForward = TOS == Op0;
1405 bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0);
1406 if (updateST0) {
1407 if (isForward)
1408 InstTable = ForwardST0Table;
1409 else
1410 InstTable = ReverseST0Table;
1411 } else {
1412 if (isForward)
1413 InstTable = ForwardSTiTable;
1414 else
1415 InstTable = ReverseSTiTable;
1416 }
1417
1418 int Opcode = Lookup(InstTable, MI.getOpcode());
1419 assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!");
1420
1421 // NotTOS - The register which is not on the top of stack...
1422 unsigned NotTOS = (TOS == Op0) ? Op1 : Op0;
1423
1424 // Replace the old instruction with a new instruction
1425 MBB->remove(&*I++);
1426 I = BuildMI(*MBB, I, dl, TII->get(Opcode)).addReg(getSTReg(NotTOS));
1427
1428 if (!MI.mayRaiseFPException())
1429 I->setFlag(MachineInstr::MIFlag::NoFPExcept);
1430
1431 // If both operands are killed, pop one off of the stack in addition to
1432 // overwriting the other one.
1433 if (KillsOp0 && KillsOp1 && Op0 != Op1) {
1434 assert(!updateST0 && "Should have updated other operand!");
1435 popStackAfter(I); // Pop the top of stack
1436 }
1437
1438 // Update stack information so that we know the destination register is now on
1439 // the stack.
1440 unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS);
1441 assert(UpdatedSlot < StackTop && Dest < 7);
1442 Stack[UpdatedSlot] = Dest;
1443 RegMap[Dest] = UpdatedSlot;
1444 MBB->getParent()->deleteMachineInstr(&MI); // Remove the old instruction
1445}
1446
1447/// handleCompareFP - Handle FUCOM and FUCOMI instructions, which have two FP
1448/// register arguments and no explicit destinations.
1449///
1450void FPS::handleCompareFP(MachineBasicBlock::iterator &I) {
1451 MachineInstr &MI = *I;
1452
1453 unsigned NumOperands = MI.getDesc().getNumOperands();
1454 assert(NumOperands == 2 && "Illegal FUCOM* instruction!");
1455 unsigned Op0 = getFPReg(MI.getOperand(NumOperands - 2));
1456 unsigned Op1 = getFPReg(MI.getOperand(NumOperands - 1));
1457 bool KillsOp0 = MI.killsRegister(X86::FP0 + Op0);
1458 bool KillsOp1 = MI.killsRegister(X86::FP0 + Op1);
1459
1460 // Make sure the first operand is on the top of stack, the other one can be
1461 // anywhere.
1462 moveToTop(Op0, I);
1463
1464 // Change from the pseudo instruction to the concrete instruction.
1465 MI.getOperand(0).setReg(getSTReg(Op1));
1466 MI.removeOperand(1);
1467 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1468 MI.dropDebugNumber();
1469
1470 // If any of the operands are killed by this instruction, free them.
1471 if (KillsOp0) freeStackSlotAfter(I, Op0);
1472 if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(I, Op1);
1473}
1474
1475/// handleCondMovFP - Handle two address conditional move instructions. These
1476/// instructions move a st(i) register to st(0) iff a condition is true. These
1477/// instructions require that the first operand is at the top of the stack, but
1478/// otherwise don't modify the stack at all.
1479void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) {
1480 MachineInstr &MI = *I;
1481
1482 unsigned Op0 = getFPReg(MI.getOperand(0));
1483 unsigned Op1 = getFPReg(MI.getOperand(2));
1484 bool KillsOp1 = MI.killsRegister(X86::FP0 + Op1);
1485
1486 // The first operand *must* be on the top of the stack.
1487 moveToTop(Op0, I);
1488
1489 // Change the second operand to the stack register that the operand is in.
1490 // Change from the pseudo instruction to the concrete instruction.
1491 MI.removeOperand(0);
1492 MI.removeOperand(1);
1493 MI.getOperand(0).setReg(getSTReg(Op1));
1494 MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
1495 MI.dropDebugNumber();
1496
1497 // If we kill the second operand, make sure to pop it from the stack.
1498 if (Op0 != Op1 && KillsOp1) {
1499 // Get this value off of the register stack.
1500 freeStackSlotAfter(I, Op1);
1501 }
1502}
1503
1504
1505/// handleSpecialFP - Handle special instructions which behave unlike other
1506/// floating point instructions. This is primarily intended for use by pseudo
1507/// instructions.
1508///
1509void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) {
1510 MachineInstr &MI = *Inst;
1511
1512 if (MI.isCall()) {
1513 handleCall(Inst);
1514 return;
1515 }
1516
1517 if (MI.isReturn()) {
1518 handleReturn(Inst);
1519 return;
1520 }
1521
1522 switch (MI.getOpcode()) {
1523 default: llvm_unreachable("Unknown SpecialFP instruction!");
1524 case TargetOpcode::COPY: {
1525 // We handle three kinds of copies: FP <- FP, FP <- ST, and ST <- FP.
1526 const MachineOperand &MO1 = MI.getOperand(1);
1527 const MachineOperand &MO0 = MI.getOperand(0);
1528 bool KillsSrc = MI.killsRegister(MO1.getReg());
1529
1530 // FP <- FP copy.
1531 unsigned DstFP = getFPReg(MO0);
1532 unsigned SrcFP = getFPReg(MO1);
1533 assert(isLive(SrcFP) && "Cannot copy dead register");
1534 if (KillsSrc) {
1535 // If the input operand is killed, we can just change the owner of the
1536 // incoming stack slot into the result.
1537 unsigned Slot = getSlot(SrcFP);
1538 Stack[Slot] = DstFP;
1539 RegMap[DstFP] = Slot;
1540 } else {
1541 // For COPY we just duplicate the specified value to a new stack slot.
1542 // This could be made better, but would require substantial changes.
1543 duplicateToTop(SrcFP, DstFP, Inst);
1544 }
1545 break;
1546 }
1547
1548 case TargetOpcode::IMPLICIT_DEF: {
1549 // All FP registers must be explicitly defined, so load a 0 instead.
1550 unsigned Reg = MI.getOperand(0).getReg() - X86::FP0;
1551 LLVM_DEBUG(dbgs() << "Emitting LD_F0 for implicit FP" << Reg << '\n');
1552 BuildMI(*MBB, Inst, MI.getDebugLoc(), TII->get(X86::LD_F0));
1553 pushReg(Reg);
1554 break;
1555 }
1556
1557 case TargetOpcode::INLINEASM:
1558 case TargetOpcode::INLINEASM_BR: {
1559 // The inline asm MachineInstr currently only *uses* FP registers for the
1560 // 'f' constraint. These should be turned into the current ST(x) register
1561 // in the machine instr.
1562 //
1563 // There are special rules for x87 inline assembly. The compiler must know
1564 // exactly how many registers are popped and pushed implicitly by the asm.
1565 // Otherwise it is not possible to restore the stack state after the inline
1566 // asm.
1567 //
1568 // There are 3 kinds of input operands:
1569 //
1570 // 1. Popped inputs. These must appear at the stack top in ST0-STn. A
1571 // popped input operand must be in a fixed stack slot, and it is either
1572 // tied to an output operand, or in the clobber list. The MI has ST use
1573 // and def operands for these inputs.
1574 //
1575 // 2. Fixed inputs. These inputs appear in fixed stack slots, but are
1576 // preserved by the inline asm. The fixed stack slots must be STn-STm
1577 // following the popped inputs. A fixed input operand cannot be tied to
1578 // an output or appear in the clobber list. The MI has ST use operands
1579 // and no defs for these inputs.
1580 //
1581 // 3. Preserved inputs. These inputs use the "f" constraint which is
1582 // represented as an FP register. The inline asm won't change these
1583 // stack slots.
1584 //
1585 // Outputs must be in ST registers, FP outputs are not allowed. Clobbered
1586 // registers do not count as output operands. The inline asm changes the
1587 // stack as if it popped all the popped inputs and then pushed all the
1588 // output operands.
1589
1590 // Scan the assembly for ST registers used, defined and clobbered. We can
1591 // only tell clobbers from defs by looking at the asm descriptor.
1592 unsigned STUses = 0, STDefs = 0, STClobbers = 0;
1593 unsigned NumOps = 0;
1594 SmallSet<unsigned, 1> FRegIdx;
1595 unsigned RCID;
1596
1597 for (unsigned i = InlineAsm::MIOp_FirstOperand, e = MI.getNumOperands();
1598 i != e && MI.getOperand(i).isImm(); i += 1 + NumOps) {
1599 unsigned Flags = MI.getOperand(i).getImm();
1600 const InlineAsm::Flag F(Flags);
1601
1602 NumOps = F.getNumOperandRegisters();
1603 if (NumOps != 1)
1604 continue;
1605 const MachineOperand &MO = MI.getOperand(i + 1);
1606 if (!MO.isReg())
1607 continue;
1608 unsigned STReg = MO.getReg() - X86::FP0;
1609 if (STReg >= 8)
1610 continue;
1611
1612 // If the flag has a register class constraint, this must be an operand
1613 // with constraint "f". Record its index and continue.
1614 if (F.hasRegClassConstraint(RCID)) {
1615 FRegIdx.insert(i + 1);
1616 continue;
1617 }
1618
1619 switch (F.getKind()) {
1620 case InlineAsm::Kind::RegUse:
1621 STUses |= (1u << STReg);
1622 break;
1623 case InlineAsm::Kind::RegDef:
1624 case InlineAsm::Kind::RegDefEarlyClobber:
1625 STDefs |= (1u << STReg);
1626 break;
1627 case InlineAsm::Kind::Clobber:
1628 STClobbers |= (1u << STReg);
1629 break;
1630 default:
1631 break;
1632 }
1633 }
1634
1635 if (STUses && !isMask_32(STUses))
1636 MI.emitError("fixed input regs must be last on the x87 stack");
1637 unsigned NumSTUses = llvm::countr_one(STUses);
1638
1639 // Defs must be contiguous from the stack top. ST0-STn.
1640 if (STDefs && !isMask_32(STDefs)) {
1641 MI.emitError("output regs must be last on the x87 stack");
1642 STDefs = NextPowerOf2(STDefs) - 1;
1643 }
1644 unsigned NumSTDefs = llvm::countr_one(STDefs);
1645
1646 // So must the clobbered stack slots. ST0-STm, m >= n.
1647 if (STClobbers && !isMask_32(STDefs | STClobbers))
1648 MI.emitError("clobbers must be last on the x87 stack");
1649
1650 // Popped inputs are the ones that are also clobbered or defined.
1651 unsigned STPopped = STUses & (STDefs | STClobbers);
1652 if (STPopped && !isMask_32(STPopped))
1653 MI.emitError("implicitly popped regs must be last on the x87 stack");
1654 unsigned NumSTPopped = llvm::countr_one(STPopped);
1655
1656 LLVM_DEBUG(dbgs() << "Asm uses " << NumSTUses << " fixed regs, pops "
1657 << NumSTPopped << ", and defines " << NumSTDefs
1658 << " regs.\n");
1659
1660#ifndef NDEBUG
1661 // If any input operand uses constraint "f", all output register
1662 // constraints must be early-clobber defs.
1663 for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I)
1664 if (FRegIdx.count(I)) {
1665 assert((1 << getFPReg(MI.getOperand(I)) & STDefs) == 0 &&
1666 "Operands with constraint \"f\" cannot overlap with defs");
1667 }
1668#endif
1669
1670 // Collect all FP registers (register operands with constraints "t", "u",
1671 // and "f") to kill afer the instruction.
1672 unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff;
1673 for (const MachineOperand &Op : MI.operands()) {
1674 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1675 continue;
1676 unsigned FPReg = getFPReg(Op);
1677
1678 // If we kill this operand, make sure to pop it from the stack after the
1679 // asm. We just remember it for now, and pop them all off at the end in
1680 // a batch.
1681 if (Op.isUse() && Op.isKill())
1682 FPKills |= 1U << FPReg;
1683 }
1684
1685 // Do not include registers that are implicitly popped by defs/clobbers.
1686 FPKills &= ~(STDefs | STClobbers);
1687
1688 // Now we can rearrange the live registers to match what was requested.
1689 unsigned char STUsesArray[8];
1690
1691 for (unsigned I = 0; I < NumSTUses; ++I)
1692 STUsesArray[I] = I;
1693
1694 shuffleStackTop(STUsesArray, NumSTUses, Inst);
1695 LLVM_DEBUG({
1696 dbgs() << "Before asm: ";
1697 dumpStack();
1698 });
1699
1700 // With the stack layout fixed, rewrite the FP registers.
1701 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1702 MachineOperand &Op = MI.getOperand(i);
1703 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1704 continue;
1705
1706 unsigned FPReg = getFPReg(Op);
1707
1708 if (FRegIdx.count(i))
1709 // Operand with constraint "f".
1710 Op.setReg(getSTReg(FPReg));
1711 else
1712 // Operand with a single register class constraint ("t" or "u").
1713 Op.setReg(X86::ST0 + FPReg);
1714 }
1715
1716 // Simulate the inline asm popping its inputs and pushing its outputs.
1717 StackTop -= NumSTPopped;
1718
1719 for (unsigned i = 0; i < NumSTDefs; ++i)
1720 pushReg(NumSTDefs - i - 1);
1721
1722 // If this asm kills any FP registers (is the last use of them) we must
1723 // explicitly emit pop instructions for them. Do this now after the asm has
1724 // executed so that the ST(x) numbers are not off (which would happen if we
1725 // did this inline with operand rewriting).
1726 //
1727 // Note: this might be a non-optimal pop sequence. We might be able to do
1728 // better by trying to pop in stack order or something.
1729 while (FPKills) {
1730 unsigned FPReg = llvm::countr_zero(FPKills);
1731 if (isLive(FPReg))
1732 freeStackSlotAfter(Inst, FPReg);
1733 FPKills &= ~(1U << FPReg);
1734 }
1735
1736 // Don't delete the inline asm!
1737 return;
1738 }
1739 }
1740
1741 Inst = MBB->erase(Inst); // Remove the pseudo instruction
1742
1743 // We want to leave I pointing to the previous instruction, but what if we
1744 // just erased the first instruction?
1745 if (Inst == MBB->begin()) {
1746 LLVM_DEBUG(dbgs() << "Inserting dummy KILL\n");
1747 Inst = BuildMI(*MBB, Inst, DebugLoc(), TII->get(TargetOpcode::KILL));
1748 } else
1749 --Inst;
1750}
1751
1752void FPS::setKillFlags(MachineBasicBlock &MBB) const {
1753 const TargetRegisterInfo &TRI =
1755 LivePhysRegs LPR(TRI);
1756
1757 LPR.addLiveOuts(MBB);
1758
1759 for (MachineInstr &MI : llvm::reverse(MBB)) {
1760 if (MI.isDebugInstr())
1761 continue;
1762
1763 std::bitset<8> Defs;
1765
1766 for (auto &MO : MI.operands()) {
1767 if (!MO.isReg())
1768 continue;
1769
1770 unsigned Reg = MO.getReg() - X86::FP0;
1771
1772 if (Reg >= 8)
1773 continue;
1774
1775 if (MO.isDef()) {
1776 Defs.set(Reg);
1777 if (!LPR.contains(MO.getReg()))
1778 MO.setIsDead();
1779 } else
1780 Uses.push_back(&MO);
1781 }
1782
1783 for (auto *MO : Uses)
1784 if (Defs.test(getFPReg(*MO)) || !LPR.contains(MO->getReg()))
1785 MO->setIsKill();
1786
1787 LPR.stepBackward(MI);
1788 }
1789}
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock & MBB
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:184
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
Rewrite Partial Register Uses
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static const TableEntry ReverseST0Table[]
X86 FP Stackifier
#define ASSERT_SORTED(TABLE)
static const TableEntry ForwardST0Table[]
static bool doesInstructionSetFPSW(MachineInstr &MI)
static unsigned getFPReg(const MachineOperand &MO)
getFPReg - Return the X86::FPx register number for the specified operand.
static const TableEntry ForwardSTiTable[]
static const TableEntry OpcodeTable[]
static const TableEntry ReverseSTiTable[]
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
static const TableEntry PopTable[]
static unsigned getConcreteOpcode(unsigned Opcode)
#define DEBUG_TYPE
static MachineBasicBlock::iterator getNextFPInstruction(MachineBasicBlock::iterator I)
static constexpr uint32_t Opcode
Definition: aarch32.h:200
Represent the analysis usage information of a pass.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
unsigned getBundle(unsigned N, bool Out) const
getBundle - Return the ingoing (Out = false) or outgoing (Out = true) bundle number for basic block N
Definition: EdgeBundles.h:41
unsigned getNumBundles() const
getNumBundles - Return the total number of bundles in the CFG.
Definition: EdgeBundles.h:44
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
A set of physical registers with utility functions to track liveness when walking backward/forward th...
Definition: LivePhysRegs.h:50
livein_iterator livein_end() const
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
MachineInstr * remove(MachineInstr *I)
Remove the unbundled instruction from the instruction list without deleting it.
LiveInVector::const_iterator livein_iterator
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
livein_iterator livein_begin() const
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
void removeLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Remove the specified register from the live in set.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void deleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
unsigned size() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineBasicBlock & front() const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:68
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
size_type size() const
Definition: SmallPtrSet.h:93
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
void resize(size_type N)
Definition: SmallVector.h:642
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ X86_RegCall
Register calling convention used for parameters transfer optimization.
Definition: CallingConv.h:200
Reg
All possible values of the reg field in the ModR/M byte.
@ SpecialFP
SpecialFP - Special instruction forms. Dispatch by opcode explicitly.
Definition: X86BaseInfo.h:808
@ NotFP
NotFP - The default, set for instructions that do not use FP registers.
Definition: X86BaseInfo.h:790
@ OneArgFPRW
OneArgFPRW - 1 arg FP instruction which implicitly read ST(0) and write a result back to ST(0).
Definition: X86BaseInfo.h:797
@ ZeroArgFP
ZeroArgFP - 0 arg FP instruction which implicitly pushes ST(0), f.e. fld0.
Definition: X86BaseInfo.h:792
@ OneArgFP
OneArgFP - 1 arg FP instructions which implicitly read ST(0), such as fst.
Definition: X86BaseInfo.h:794
@ CompareFP
CompareFP - 2 arg FP instructions which implicitly read ST(0) and an explicit argument,...
Definition: X86BaseInfo.h:804
@ CondMovFP
CondMovFP - "2 operand" floating point conditional move instructions.
Definition: X86BaseInfo.h:806
@ TwoArgFP
TwoArgFP - 2 arg FP instructions which implicitly read ST(0), and an explicit argument,...
Definition: X86BaseInfo.h:801
bool isX87Instruction(MachineInstr &MI)
Check if the instruction is X87 instruction.
@ AddrNumOperands
Definition: X86BaseInfo.h:36
constexpr double e
Definition: MathExtras.h:31
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
FunctionPass * createX86FloatingPointStackifierPass()
This function returns a pass which converts floating-point register references and pseudo instruction...
iterator_range< df_ext_iterator< T, SetTy > > depth_first_ext(const T &G, SetTy &S)
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:384
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:306
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:240
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:214
char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1945
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:349
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
std::pair< iterator, bool > insert(NodeRef N)