LLVM  10.0.0svn
PPCVSXFMAMutate.cpp
Go to the documentation of this file.
1 //===--------------- PPCVSXFMAMutate.cpp - VSX FMA Mutation ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass mutates the form of VSX FMA instructions to avoid unnecessary
10 // copies.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "PPC.h"
16 #include "PPCInstrBuilder.h"
17 #include "PPCInstrInfo.h"
18 #include "PPCMachineFunctionInfo.h"
19 #include "PPCTargetMachine.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/Statistic.h"
32 #include "llvm/MC/MCAsmInfo.h"
34 #include "llvm/Support/Debug.h"
38 
39 using namespace llvm;
40 
41 // Temporarily disable FMA mutation by default, since it doesn't handle
42 // cross-basic-block intervals well.
43 // See: http://lists.llvm.org/pipermail/llvm-dev/2016-February/095669.html
44 // http://reviews.llvm.org/D17087
46  "disable-ppc-vsx-fma-mutation",
47  cl::desc("Disable VSX FMA instruction mutation"), cl::init(true),
48  cl::Hidden);
49 
50 #define DEBUG_TYPE "ppc-vsx-fma-mutate"
51 
52 namespace llvm { namespace PPC {
53  int getAltVSXFMAOpcode(uint16_t Opcode);
54 } }
55 
56 namespace {
57  // PPCVSXFMAMutate pass - For copies between VSX registers and non-VSX registers
58  // (Altivec and scalar floating-point registers), we need to transform the
59  // copies into subregister copies with other restrictions.
60  struct PPCVSXFMAMutate : public MachineFunctionPass {
61  static char ID;
62  PPCVSXFMAMutate() : MachineFunctionPass(ID) {
64  }
65 
66  LiveIntervals *LIS;
67  const PPCInstrInfo *TII;
68 
69 protected:
70  bool processBlock(MachineBasicBlock &MBB) {
71  bool Changed = false;
72 
74  const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
75  for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
76  I != IE; ++I) {
77  MachineInstr &MI = *I;
78 
79  // The default (A-type) VSX FMA form kills the addend (it is taken from
80  // the target register, which is then updated to reflect the result of
81  // the FMA). If the instruction, however, kills one of the registers
82  // used for the product, then we can use the M-form instruction (which
83  // will take that value from the to-be-defined register).
84 
85  int AltOpc = PPC::getAltVSXFMAOpcode(MI.getOpcode());
86  if (AltOpc == -1)
87  continue;
88 
89  // This pass is run after register coalescing, and so we're looking for
90  // a situation like this:
91  // ...
92  // %5 = COPY %9; VSLRC:%5,%9
93  // %5<def,tied1> = XSMADDADP %5<tied0>, %17, %16,
94  // implicit %rm; VSLRC:%5,%17,%16
95  // ...
96  // %9<def,tied1> = XSMADDADP %9<tied0>, %17, %19,
97  // implicit %rm; VSLRC:%9,%17,%19
98  // ...
99  // Where we can eliminate the copy by changing from the A-type to the
100  // M-type instruction. Specifically, for this example, this means:
101  // %5<def,tied1> = XSMADDADP %5<tied0>, %17, %16,
102  // implicit %rm; VSLRC:%5,%17,%16
103  // is replaced by:
104  // %16<def,tied1> = XSMADDMDP %16<tied0>, %18, %9,
105  // implicit %rm; VSLRC:%16,%18,%9
106  // and we remove: %5 = COPY %9; VSLRC:%5,%9
107 
108  SlotIndex FMAIdx = LIS->getInstructionIndex(MI);
109 
110  VNInfo *AddendValNo =
111  LIS->getInterval(MI.getOperand(1).getReg()).Query(FMAIdx).valueIn();
112 
113  // This can be null if the register is undef.
114  if (!AddendValNo)
115  continue;
116 
117  MachineInstr *AddendMI = LIS->getInstructionFromIndex(AddendValNo->def);
118 
119  // The addend and this instruction must be in the same block.
120 
121  if (!AddendMI || AddendMI->getParent() != MI.getParent())
122  continue;
123 
124  // The addend must be a full copy within the same register class.
125 
126  if (!AddendMI->isFullCopy())
127  continue;
128 
129  Register AddendSrcReg = AddendMI->getOperand(1).getReg();
130  if (Register::isVirtualRegister(AddendSrcReg)) {
131  if (MRI.getRegClass(AddendMI->getOperand(0).getReg()) !=
132  MRI.getRegClass(AddendSrcReg))
133  continue;
134  } else {
135  // If AddendSrcReg is a physical register, make sure the destination
136  // register class contains it.
137  if (!MRI.getRegClass(AddendMI->getOperand(0).getReg())
138  ->contains(AddendSrcReg))
139  continue;
140  }
141 
142  // In theory, there could be other uses of the addend copy before this
143  // fma. We could deal with this, but that would require additional
144  // logic below and I suspect it will not occur in any relevant
145  // situations. Additionally, check whether the copy source is killed
146  // prior to the fma. In order to replace the addend here with the
147  // source of the copy, it must still be live here. We can't use
148  // interval testing for a physical register, so as long as we're
149  // walking the MIs we may as well test liveness here.
150  //
151  // FIXME: There is a case that occurs in practice, like this:
152  // %9 = COPY %f1; VSSRC:%9
153  // ...
154  // %6 = COPY %9; VSSRC:%6,%9
155  // %7 = COPY %9; VSSRC:%7,%9
156  // %9<def,tied1> = XSMADDASP %9<tied0>, %1, %4; VSSRC:
157  // %6<def,tied1> = XSMADDASP %6<tied0>, %1, %2; VSSRC:
158  // %7<def,tied1> = XSMADDASP %7<tied0>, %1, %3; VSSRC:
159  // which prevents an otherwise-profitable transformation.
160  bool OtherUsers = false, KillsAddendSrc = false;
161  for (auto J = std::prev(I), JE = MachineBasicBlock::iterator(AddendMI);
162  J != JE; --J) {
163  if (J->readsVirtualRegister(AddendMI->getOperand(0).getReg())) {
164  OtherUsers = true;
165  break;
166  }
167  if (J->modifiesRegister(AddendSrcReg, TRI) ||
168  J->killsRegister(AddendSrcReg, TRI)) {
169  KillsAddendSrc = true;
170  break;
171  }
172  }
173 
174  if (OtherUsers || KillsAddendSrc)
175  continue;
176 
177 
178  // The transformation doesn't work well with things like:
179  // %5 = A-form-op %5, %11, %5;
180  // unless %11 is also a kill, so skip when it is not,
181  // and check operand 3 to see it is also a kill to handle the case:
182  // %5 = A-form-op %5, %5, %11;
183  // where %5 and %11 are both kills. This case would be skipped
184  // otherwise.
185  Register OldFMAReg = MI.getOperand(0).getReg();
186 
187  // Find one of the product operands that is killed by this instruction.
188  unsigned KilledProdOp = 0, OtherProdOp = 0;
189  Register Reg2 = MI.getOperand(2).getReg();
190  Register Reg3 = MI.getOperand(3).getReg();
191  if (LIS->getInterval(Reg2).Query(FMAIdx).isKill()
192  && Reg2 != OldFMAReg) {
193  KilledProdOp = 2;
194  OtherProdOp = 3;
195  } else if (LIS->getInterval(Reg3).Query(FMAIdx).isKill()
196  && Reg3 != OldFMAReg) {
197  KilledProdOp = 3;
198  OtherProdOp = 2;
199  }
200 
201  // If there are no usable killed product operands, then this
202  // transformation is likely not profitable.
203  if (!KilledProdOp)
204  continue;
205 
206  // If the addend copy is used only by this MI, then the addend source
207  // register is likely not live here. This could be fixed (based on the
208  // legality checks above, the live range for the addend source register
209  // could be extended), but it seems likely that such a trivial copy can
210  // be coalesced away later, and thus is not worth the effort.
211  if (Register::isVirtualRegister(AddendSrcReg) &&
212  !LIS->getInterval(AddendSrcReg).liveAt(FMAIdx))
213  continue;
214 
215  // Transform: (O2 * O3) + O1 -> (O2 * O1) + O3.
216 
217  Register KilledProdReg = MI.getOperand(KilledProdOp).getReg();
218  Register OtherProdReg = MI.getOperand(OtherProdOp).getReg();
219 
220  unsigned AddSubReg = AddendMI->getOperand(1).getSubReg();
221  unsigned KilledProdSubReg = MI.getOperand(KilledProdOp).getSubReg();
222  unsigned OtherProdSubReg = MI.getOperand(OtherProdOp).getSubReg();
223 
224  bool AddRegKill = AddendMI->getOperand(1).isKill();
225  bool KilledProdRegKill = MI.getOperand(KilledProdOp).isKill();
226  bool OtherProdRegKill = MI.getOperand(OtherProdOp).isKill();
227 
228  bool AddRegUndef = AddendMI->getOperand(1).isUndef();
229  bool KilledProdRegUndef = MI.getOperand(KilledProdOp).isUndef();
230  bool OtherProdRegUndef = MI.getOperand(OtherProdOp).isUndef();
231 
232  // If there isn't a class that fits, we can't perform the transform.
233  // This is needed for correctness with a mixture of VSX and Altivec
234  // instructions to make sure that a low VSX register is not assigned to
235  // the Altivec instruction.
236  if (!MRI.constrainRegClass(KilledProdReg,
237  MRI.getRegClass(OldFMAReg)))
238  continue;
239 
240  assert(OldFMAReg == AddendMI->getOperand(0).getReg() &&
241  "Addend copy not tied to old FMA output!");
242 
243  LLVM_DEBUG(dbgs() << "VSX FMA Mutation:\n " << MI);
244 
245  MI.getOperand(0).setReg(KilledProdReg);
246  MI.getOperand(1).setReg(KilledProdReg);
247  MI.getOperand(3).setReg(AddendSrcReg);
248 
249  MI.getOperand(0).setSubReg(KilledProdSubReg);
250  MI.getOperand(1).setSubReg(KilledProdSubReg);
251  MI.getOperand(3).setSubReg(AddSubReg);
252 
253  MI.getOperand(1).setIsKill(KilledProdRegKill);
254  MI.getOperand(3).setIsKill(AddRegKill);
255 
256  MI.getOperand(1).setIsUndef(KilledProdRegUndef);
257  MI.getOperand(3).setIsUndef(AddRegUndef);
258 
259  MI.setDesc(TII->get(AltOpc));
260 
261  // If the addend is also a multiplicand, replace it with the addend
262  // source in both places.
263  if (OtherProdReg == AddendMI->getOperand(0).getReg()) {
264  MI.getOperand(2).setReg(AddendSrcReg);
265  MI.getOperand(2).setSubReg(AddSubReg);
266  MI.getOperand(2).setIsKill(AddRegKill);
267  MI.getOperand(2).setIsUndef(AddRegUndef);
268  } else {
269  MI.getOperand(2).setReg(OtherProdReg);
270  MI.getOperand(2).setSubReg(OtherProdSubReg);
271  MI.getOperand(2).setIsKill(OtherProdRegKill);
272  MI.getOperand(2).setIsUndef(OtherProdRegUndef);
273  }
274 
275  LLVM_DEBUG(dbgs() << " -> " << MI);
276 
277  // The killed product operand was killed here, so we can reuse it now
278  // for the result of the fma.
279 
280  LiveInterval &FMAInt = LIS->getInterval(OldFMAReg);
281  VNInfo *FMAValNo = FMAInt.getVNInfoAt(FMAIdx.getRegSlot());
282  for (auto UI = MRI.reg_nodbg_begin(OldFMAReg), UE = MRI.reg_nodbg_end();
283  UI != UE;) {
284  MachineOperand &UseMO = *UI;
285  MachineInstr *UseMI = UseMO.getParent();
286  ++UI;
287 
288  // Don't replace the result register of the copy we're about to erase.
289  if (UseMI == AddendMI)
290  continue;
291 
292  UseMO.substVirtReg(KilledProdReg, KilledProdSubReg, *TRI);
293  }
294 
295  // Extend the live intervals of the killed product operand to hold the
296  // fma result.
297 
298  LiveInterval &NewFMAInt = LIS->getInterval(KilledProdReg);
299  for (LiveInterval::iterator AI = FMAInt.begin(), AE = FMAInt.end();
300  AI != AE; ++AI) {
301  // Don't add the segment that corresponds to the original copy.
302  if (AI->valno == AddendValNo)
303  continue;
304 
305  VNInfo *NewFMAValNo =
306  NewFMAInt.getNextValue(AI->start,
307  LIS->getVNInfoAllocator());
308 
309  NewFMAInt.addSegment(LiveInterval::Segment(AI->start, AI->end,
310  NewFMAValNo));
311  }
312  LLVM_DEBUG(dbgs() << " extended: " << NewFMAInt << '\n');
313 
314  // Extend the live interval of the addend source (it might end at the
315  // copy to be removed, or somewhere in between there and here). This
316  // is necessary only if it is a physical register.
317  if (!Register::isVirtualRegister(AddendSrcReg))
318  for (MCRegUnitIterator Units(AddendSrcReg, TRI); Units.isValid();
319  ++Units) {
320  unsigned Unit = *Units;
321 
322  LiveRange &AddendSrcRange = LIS->getRegUnit(Unit);
323  AddendSrcRange.extendInBlock(LIS->getMBBStartIdx(&MBB),
324  FMAIdx.getRegSlot());
325  LLVM_DEBUG(dbgs() << " extended: " << AddendSrcRange << '\n');
326  }
327 
328  FMAInt.removeValNo(FMAValNo);
329  LLVM_DEBUG(dbgs() << " trimmed: " << FMAInt << '\n');
330 
331  // Remove the (now unused) copy.
332 
333  LLVM_DEBUG(dbgs() << " removing: " << *AddendMI << '\n');
334  LIS->RemoveMachineInstrFromMaps(*AddendMI);
335  AddendMI->eraseFromParent();
336 
337  Changed = true;
338  }
339 
340  return Changed;
341  }
342 
343 public:
344  bool runOnMachineFunction(MachineFunction &MF) override {
345  if (skipFunction(MF.getFunction()))
346  return false;
347 
348  // If we don't have VSX then go ahead and return without doing
349  // anything.
350  const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>();
351  if (!STI.hasVSX())
352  return false;
353 
354  LIS = &getAnalysis<LiveIntervals>();
355 
356  TII = STI.getInstrInfo();
357 
358  bool Changed = false;
359 
361  return Changed;
362 
363  for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
364  MachineBasicBlock &B = *I++;
365  if (processBlock(B))
366  Changed = true;
367  }
368 
369  return Changed;
370  }
371 
372  void getAnalysisUsage(AnalysisUsage &AU) const override {
375  AU.addRequired<SlotIndexes>();
380  }
381  };
382 }
383 
384 INITIALIZE_PASS_BEGIN(PPCVSXFMAMutate, DEBUG_TYPE,
385  "PowerPC VSX FMA Mutation", false, false)
390  "PowerPC VSX FMA Mutation", false, false)
391 
392 char &llvm::PPCVSXFMAMutateID = PPCVSXFMAMutate::ID;
393 
394 char PPCVSXFMAMutate::ID = 0;
396  return new PPCVSXFMAMutate();
397 }
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:60
void RemoveMachineInstrFromMaps(MachineInstr &MI)
This class represents lattice values for constants.
Definition: AllocatorList.h:23
Segments::iterator iterator
Definition: LiveInterval.h:211
FunctionPass * createPPCVSXFMAMutatePass()
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:679
void setIsUndef(bool Val=true)
unsigned getSubReg() const
bool hasVSX() const
Definition: PPCSubtarget.h:252
void substVirtReg(Register Reg, unsigned SubIdx, const TargetRegisterInfo &)
substVirtReg - Substitute the current register with the virtual subregister Reg:SubReg.
unsigned const TargetRegisterInfo * TRI
char & PPCVSXFMAMutateID
VNInfo - Value Number Information.
Definition: LiveInterval.h:52
return AArch64::GPR64RegClass contains(Reg)
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
This class represents the liveness of a register, stack slot, etc.
Definition: LiveInterval.h:156
static const MachineInstrBuilder & AddSubReg(const MachineInstrBuilder &MIB, unsigned Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI)
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
iterator end()
Definition: LiveInterval.h:215
VNInfo::Allocator & getVNInfoAllocator()
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:410
PowerPC VSX FMA Mutation
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
bool isFullCopy() const
SlotIndexes pass.
Definition: SlotIndexes.h:314
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def...
Definition: SlotIndexes.h:254
iterator addSegment(Segment S)
Add the specified Segment to this range, merging segments as appropriate.
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isKill() const
Return true if the live-in value is killed by this instruction.
Definition: LiveInterval.h:111
const TargetRegisterClass * constrainRegClass(unsigned Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
Definition: LiveInterval.h:532
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
Definition: LiveInterval.h:412
void removeValNo(VNInfo *ValNo)
removeValNo - Remove all the segments defined by the specified value#.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
MachineInstrBundleIterator< MachineInstr > iterator
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
int getAltVSXFMAOpcode(uint16_t Opcode)
bool liveAt(SlotIndex index) const
Definition: LiveInterval.h:392
MachineInstrBuilder & UseMI
void initializePPCVSXFMAMutatePass(PassRegistry &)
Represent the analysis usage information of a pass.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
LiveInterval & getInterval(Register Reg)
#define DEBUG_TYPE
SlotIndex getMBBStartIdx(const MachineBasicBlock *mbb) const
Return the first index in the given basic block.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
void setIsKill(bool Val=true)
Iterator for intrusive lists based on ilist_node.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
const PPCInstrInfo * getInstrInfo() const override
Definition: PPCSubtarget.h:184
MachineOperand class - Representation of each machine instruction operand.
const Function & getFunction() const
Return the LLVM function that this machine code represents.
static cl::opt< bool > DisableVSXFMAMutate("disable-ppc-vsx-fma-mutation", cl::desc("Disable VSX FMA instruction mutation"), cl::init(true), cl::Hidden)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:255
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Representation of each machine instruction.
Definition: MachineInstr.h:63
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
reg_nodbg_iterator reg_nodbg_begin(Register RegNo) const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
#define I(x, y, z)
Definition: MD5.cpp:58
void setSubReg(unsigned subReg)
static reg_nodbg_iterator reg_nodbg_end()
iterator begin()
Definition: LiveInterval.h:214
VNInfo * getNextValue(SlotIndex def, VNInfo::Allocator &VNInfoAllocator)
getNextValue - Create a new value number and return it.
Definition: LiveInterval.h:322
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::pair< VNInfo *, bool > extendInBlock(ArrayRef< SlotIndex > Undefs, SlotIndex StartIdx, SlotIndex Kill)
Attempt to extend a value defined after StartIdx to include Use.
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:334
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:69
IRTranslator LLVM IR MI
Register getReg() const
getReg - Returns the register number.
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:415
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:83
const PPCRegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
Definition: PPCInstrInfo.h:185
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
INITIALIZE_PASS_BEGIN(PPCVSXFMAMutate, DEBUG_TYPE, "PowerPC VSX FMA Mutation", false, false) INITIALIZE_PASS_END(PPCVSXFMAMutate