LLVM  13.0.0git
SIModeRegister.cpp
Go to the documentation of this file.
1 //===-- SIModeRegister.cpp - Mode Register --------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This pass inserts changes to the Mode register settings as required.
10 /// Note that currently it only deals with the Double Precision Floating Point
11 /// rounding mode setting, but is intended to be generic enough to be easily
12 /// expanded.
13 ///
14 //===----------------------------------------------------------------------===//
15 //
16 #include "AMDGPU.h"
17 #include "GCNSubtarget.h"
19 #include "llvm/ADT/Statistic.h"
20 #include <queue>
21 
22 #define DEBUG_TYPE "si-mode-register"
23 
24 STATISTIC(NumSetregInserted, "Number of setreg of mode register inserted.");
25 
26 using namespace llvm;
27 
28 struct Status {
29  // Mask is a bitmask where a '1' indicates the corresponding Mode bit has a
30  // known value
31  unsigned Mask;
32  unsigned Mode;
33 
34  Status() : Mask(0), Mode(0){};
35 
36  Status(unsigned NewMask, unsigned NewMode) : Mask(NewMask), Mode(NewMode) {
37  Mode &= Mask;
38  };
39 
40  // merge two status values such that only values that don't conflict are
41  // preserved
42  Status merge(const Status &S) const {
43  return Status((Mask | S.Mask), ((Mode & ~S.Mask) | (S.Mode & S.Mask)));
44  }
45 
46  // merge an unknown value by using the unknown value's mask to remove bits
47  // from the result
48  Status mergeUnknown(unsigned newMask) {
49  return Status(Mask & ~newMask, Mode & ~newMask);
50  }
51 
52  // intersect two Status values to produce a mode and mask that is a subset
53  // of both values
54  Status intersect(const Status &S) const {
55  unsigned NewMask = (Mask & S.Mask) & (Mode ^ ~S.Mode);
56  unsigned NewMode = (Mode & NewMask);
57  return Status(NewMask, NewMode);
58  }
59 
60  // produce the delta required to change the Mode to the required Mode
61  Status delta(const Status &S) const {
62  return Status((S.Mask & (Mode ^ S.Mode)) | (~Mask & S.Mask), S.Mode);
63  }
64 
65  bool operator==(const Status &S) const {
66  return (Mask == S.Mask) && (Mode == S.Mode);
67  }
68 
69  bool operator!=(const Status &S) const { return !(*this == S); }
70 
72  return ((Mask & S.Mask) == S.Mask) && ((Mode & S.Mask) == S.Mode);
73  }
74 
75  bool isCombinable(Status &S) { return !(Mask & S.Mask) || isCompatible(S); }
76 };
77 
78 class BlockData {
79 public:
80  // The Status that represents the mode register settings required by the
81  // FirstInsertionPoint (if any) in this block. Calculated in Phase 1.
83 
84  // The Status that represents the net changes to the Mode register made by
85  // this block, Calculated in Phase 1.
87 
88  // The Status that represents the mode register settings on exit from this
89  // block. Calculated in Phase 2.
91 
92  // The Status that represents the intersection of exit Mode register settings
93  // from all predecessor blocks. Calculated in Phase 2, and used by Phase 3.
95 
96  // In Phase 1 we record the first instruction that has a mode requirement,
97  // which is used in Phase 3 if we need to insert a mode change.
99 
100  // A flag to indicate whether an Exit value has been set (we can't tell by
101  // examining the Exit value itself as all values may be valid results).
102  bool ExitSet;
103 
104  BlockData() : FirstInsertionPoint(nullptr), ExitSet(false){};
105 };
106 
107 namespace {
108 
109 class SIModeRegister : public MachineFunctionPass {
110 public:
111  static char ID;
112 
113  std::vector<std::unique_ptr<BlockData>> BlockInfo;
114  std::queue<MachineBasicBlock *> Phase2List;
115 
116  // The default mode register setting currently only caters for the floating
117  // point double precision rounding mode.
118  // We currently assume the default rounding mode is Round to Nearest
119  // NOTE: this should come from a per function rounding mode setting once such
120  // a setting exists.
121  unsigned DefaultMode = FP_ROUND_ROUND_TO_NEAREST;
122  Status DefaultStatus =
123  Status(FP_ROUND_MODE_DP(0x3), FP_ROUND_MODE_DP(DefaultMode));
124 
125  bool Changed = false;
126 
127 public:
128  SIModeRegister() : MachineFunctionPass(ID) {}
129 
130  bool runOnMachineFunction(MachineFunction &MF) override;
131 
132  void getAnalysisUsage(AnalysisUsage &AU) const override {
133  AU.setPreservesCFG();
135  }
136 
137  void processBlockPhase1(MachineBasicBlock &MBB, const SIInstrInfo *TII);
138 
139  void processBlockPhase2(MachineBasicBlock &MBB, const SIInstrInfo *TII);
140 
141  void processBlockPhase3(MachineBasicBlock &MBB, const SIInstrInfo *TII);
142 
143  Status getInstructionMode(MachineInstr &MI, const SIInstrInfo *TII);
144 
145  void insertSetreg(MachineBasicBlock &MBB, MachineInstr *I,
146  const SIInstrInfo *TII, Status InstrMode);
147 };
148 } // End anonymous namespace.
149 
150 INITIALIZE_PASS(SIModeRegister, DEBUG_TYPE,
151  "Insert required mode register values", false, false)
152 
153 char SIModeRegister::ID = 0;
154 
155 char &llvm::SIModeRegisterID = SIModeRegister::ID;
156 
157 FunctionPass *llvm::createSIModeRegisterPass() { return new SIModeRegister(); }
158 
159 // Determine the Mode register setting required for this instruction.
160 // Instructions which don't use the Mode register return a null Status.
161 // Note this currently only deals with instructions that use the floating point
162 // double precision setting.
163 Status SIModeRegister::getInstructionMode(MachineInstr &MI,
164  const SIInstrInfo *TII) {
165  if (TII->usesFPDPRounding(MI)) {
166  switch (MI.getOpcode()) {
167  case AMDGPU::V_INTERP_P1LL_F16:
168  case AMDGPU::V_INTERP_P1LV_F16:
169  case AMDGPU::V_INTERP_P2_F16:
170  // f16 interpolation instructions need double precision round to zero
171  return Status(FP_ROUND_MODE_DP(3),
173  default:
174  return DefaultStatus;
175  }
176  }
177  return Status();
178 }
179 
180 // Insert a setreg instruction to update the Mode register.
181 // It is possible (though unlikely) for an instruction to require a change to
182 // the value of disjoint parts of the Mode register when we don't know the
183 // value of the intervening bits. In that case we need to use more than one
184 // setreg instruction.
185 void SIModeRegister::insertSetreg(MachineBasicBlock &MBB, MachineInstr *MI,
186  const SIInstrInfo *TII, Status InstrMode) {
187  while (InstrMode.Mask) {
188  unsigned Offset = countTrailingZeros<unsigned>(InstrMode.Mask);
189  unsigned Width = countTrailingOnes<unsigned>(InstrMode.Mask >> Offset);
190  unsigned Value = (InstrMode.Mode >> Offset) & ((1 << Width) - 1);
191  BuildMI(MBB, MI, 0, TII->get(AMDGPU::S_SETREG_IMM32_B32))
192  .addImm(Value)
196  ++NumSetregInserted;
197  Changed = true;
198  InstrMode.Mask &= ~(((1 << Width) - 1) << Offset);
199  }
200 }
201 
202 // In Phase 1 we iterate through the instructions of the block and for each
203 // instruction we get its mode usage. If the instruction uses the Mode register
204 // we:
205 // - update the Change status, which tracks the changes to the Mode register
206 // made by this block
207 // - if this instruction's requirements are compatible with the current setting
208 // of the Mode register we merge the modes
209 // - if it isn't compatible and an InsertionPoint isn't set, then we set the
210 // InsertionPoint to the current instruction, and we remember the current
211 // mode
212 // - if it isn't compatible and InsertionPoint is set we insert a seteg before
213 // that instruction (unless this instruction forms part of the block's
214 // entry requirements in which case the insertion is deferred until Phase 3
215 // when predecessor exit values are known), and move the insertion point to
216 // this instruction
217 // - if this is a setreg instruction we treat it as an incompatible instruction.
218 // This is sub-optimal but avoids some nasty corner cases, and is expected to
219 // occur very rarely.
220 // - on exit we have set the Require, Change, and initial Exit modes.
221 void SIModeRegister::processBlockPhase1(MachineBasicBlock &MBB,
222  const SIInstrInfo *TII) {
223  auto NewInfo = std::make_unique<BlockData>();
224  MachineInstr *InsertionPoint = nullptr;
225  // RequirePending is used to indicate whether we are collecting the initial
226  // requirements for the block, and need to defer the first InsertionPoint to
227  // Phase 3. It is set to false once we have set FirstInsertionPoint, or when
228  // we discover an explict setreg that means this block doesn't have any
229  // initial requirements.
230  bool RequirePending = true;
231  Status IPChange;
232  for (MachineInstr &MI : MBB) {
233  Status InstrMode = getInstructionMode(MI, TII);
234  if (MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
235  MI.getOpcode() == AMDGPU::S_SETREG_B32_mode ||
236  MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
237  MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) {
238  // We preserve any explicit mode register setreg instruction we encounter,
239  // as we assume it has been inserted by a higher authority (this is
240  // likely to be a very rare occurrence).
241  unsigned Dst = TII->getNamedOperand(MI, AMDGPU::OpName::simm16)->getImm();
244  continue;
245 
246  unsigned Width = ((Dst & AMDGPU::Hwreg::WIDTH_M1_MASK_) >>
248  1;
249  unsigned Offset =
251  unsigned Mask = ((1 << Width) - 1) << Offset;
252 
253  // If an InsertionPoint is set we will insert a setreg there.
254  if (InsertionPoint) {
255  insertSetreg(MBB, InsertionPoint, TII, IPChange.delta(NewInfo->Change));
256  InsertionPoint = nullptr;
257  }
258  // If this is an immediate then we know the value being set, but if it is
259  // not an immediate then we treat the modified bits of the mode register
260  // as unknown.
261  if (MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
262  MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) {
263  unsigned Val = TII->getNamedOperand(MI, AMDGPU::OpName::imm)->getImm();
264  unsigned Mode = (Val << Offset) & Mask;
265  Status Setreg = Status(Mask, Mode);
266  // If we haven't already set the initial requirements for the block we
267  // don't need to as the requirements start from this explicit setreg.
268  RequirePending = false;
269  NewInfo->Change = NewInfo->Change.merge(Setreg);
270  } else {
271  NewInfo->Change = NewInfo->Change.mergeUnknown(Mask);
272  }
273  } else if (!NewInfo->Change.isCompatible(InstrMode)) {
274  // This instruction uses the Mode register and its requirements aren't
275  // compatible with the current mode.
276  if (InsertionPoint) {
277  // If the required mode change cannot be included in the current
278  // InsertionPoint changes, we need a setreg and start a new
279  // InsertionPoint.
280  if (!IPChange.delta(NewInfo->Change).isCombinable(InstrMode)) {
281  if (RequirePending) {
282  // This is the first insertionPoint in the block so we will defer
283  // the insertion of the setreg to Phase 3 where we know whether or
284  // not it is actually needed.
285  NewInfo->FirstInsertionPoint = InsertionPoint;
286  NewInfo->Require = NewInfo->Change;
287  RequirePending = false;
288  } else {
289  insertSetreg(MBB, InsertionPoint, TII,
290  IPChange.delta(NewInfo->Change));
291  IPChange = NewInfo->Change;
292  }
293  // Set the new InsertionPoint
294  InsertionPoint = &MI;
295  }
296  NewInfo->Change = NewInfo->Change.merge(InstrMode);
297  } else {
298  // No InsertionPoint is currently set - this is either the first in
299  // the block or we have previously seen an explicit setreg.
300  InsertionPoint = &MI;
301  IPChange = NewInfo->Change;
302  NewInfo->Change = NewInfo->Change.merge(InstrMode);
303  }
304  }
305  }
306  if (RequirePending) {
307  // If we haven't yet set the initial requirements for the block we set them
308  // now.
309  NewInfo->FirstInsertionPoint = InsertionPoint;
310  NewInfo->Require = NewInfo->Change;
311  } else if (InsertionPoint) {
312  // We need to insert a setreg at the InsertionPoint
313  insertSetreg(MBB, InsertionPoint, TII, IPChange.delta(NewInfo->Change));
314  }
315  NewInfo->Exit = NewInfo->Change;
316  BlockInfo[MBB.getNumber()] = std::move(NewInfo);
317 }
318 
319 // In Phase 2 we revisit each block and calculate the common Mode register
320 // value provided by all predecessor blocks. If the Exit value for the block
321 // is changed, then we add the successor blocks to the worklist so that the
322 // exit value is propagated.
323 void SIModeRegister::processBlockPhase2(MachineBasicBlock &MBB,
324  const SIInstrInfo *TII) {
325  bool RevisitRequired = false;
326  bool ExitSet = false;
327  unsigned ThisBlock = MBB.getNumber();
328  if (MBB.pred_empty()) {
329  // There are no predecessors, so use the default starting status.
330  BlockInfo[ThisBlock]->Pred = DefaultStatus;
331  ExitSet = true;
332  } else {
333  // Build a status that is common to all the predecessors by intersecting
334  // all the predecessor exit status values.
335  // Mask bits (which represent the Mode bits with a known value) can only be
336  // added by explicit SETREG instructions or the initial default value -
337  // the intersection process may remove Mask bits.
338  // If we find a predecessor that has not yet had an exit value determined
339  // (this can happen for example if a block is its own predecessor) we defer
340  // use of that value as the Mask will be all zero, and we will revisit this
341  // block again later (unless the only predecessor without an exit value is
342  // this block).
344  MachineBasicBlock &PB = *(*P);
345  unsigned PredBlock = PB.getNumber();
346  if ((ThisBlock == PredBlock) && (std::next(P) == E)) {
347  BlockInfo[ThisBlock]->Pred = DefaultStatus;
348  ExitSet = true;
349  } else if (BlockInfo[PredBlock]->ExitSet) {
350  BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit;
351  ExitSet = true;
352  } else if (PredBlock != ThisBlock)
353  RevisitRequired = true;
354 
355  for (P = std::next(P); P != E; P = std::next(P)) {
356  MachineBasicBlock *Pred = *P;
357  unsigned PredBlock = Pred->getNumber();
358  if (BlockInfo[PredBlock]->ExitSet) {
359  if (BlockInfo[ThisBlock]->ExitSet) {
360  BlockInfo[ThisBlock]->Pred =
361  BlockInfo[ThisBlock]->Pred.intersect(BlockInfo[PredBlock]->Exit);
362  } else {
363  BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit;
364  }
365  ExitSet = true;
366  } else if (PredBlock != ThisBlock)
367  RevisitRequired = true;
368  }
369  }
370  Status TmpStatus =
371  BlockInfo[ThisBlock]->Pred.merge(BlockInfo[ThisBlock]->Change);
372  if (BlockInfo[ThisBlock]->Exit != TmpStatus) {
373  BlockInfo[ThisBlock]->Exit = TmpStatus;
374  // Add the successors to the work list so we can propagate the changed exit
375  // status.
377  E = MBB.succ_end();
378  S != E; S = std::next(S)) {
379  MachineBasicBlock &B = *(*S);
380  Phase2List.push(&B);
381  }
382  }
383  BlockInfo[ThisBlock]->ExitSet = ExitSet;
384  if (RevisitRequired)
385  Phase2List.push(&MBB);
386 }
387 
388 // In Phase 3 we revisit each block and if it has an insertion point defined we
389 // check whether the predecessor mode meets the block's entry requirements. If
390 // not we insert an appropriate setreg instruction to modify the Mode register.
391 void SIModeRegister::processBlockPhase3(MachineBasicBlock &MBB,
392  const SIInstrInfo *TII) {
393  unsigned ThisBlock = MBB.getNumber();
394  if (!BlockInfo[ThisBlock]->Pred.isCompatible(BlockInfo[ThisBlock]->Require)) {
395  Status Delta =
396  BlockInfo[ThisBlock]->Pred.delta(BlockInfo[ThisBlock]->Require);
397  if (BlockInfo[ThisBlock]->FirstInsertionPoint)
398  insertSetreg(MBB, BlockInfo[ThisBlock]->FirstInsertionPoint, TII, Delta);
399  else
400  insertSetreg(MBB, &MBB.instr_front(), TII, Delta);
401  }
402 }
403 
404 bool SIModeRegister::runOnMachineFunction(MachineFunction &MF) {
405  BlockInfo.resize(MF.getNumBlockIDs());
406  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
407  const SIInstrInfo *TII = ST.getInstrInfo();
408 
409  // Processing is performed in a number of phases
410 
411  // Phase 1 - determine the initial mode required by each block, and add setreg
412  // instructions for intra block requirements.
413  for (MachineBasicBlock &BB : MF)
414  processBlockPhase1(BB, TII);
415 
416  // Phase 2 - determine the exit mode from each block. We add all blocks to the
417  // list here, but will also add any that need to be revisited during Phase 2
418  // processing.
419  for (MachineBasicBlock &BB : MF)
420  Phase2List.push(&BB);
421  while (!Phase2List.empty()) {
422  processBlockPhase2(*Phase2List.front(), TII);
423  Phase2List.pop();
424  }
425 
426  // Phase 3 - add an initial setreg to each block where the required entry mode
427  // is not satisfied by the exit mode of all its predecessors.
428  for (MachineBasicBlock &BB : MF)
429  processBlockPhase3(BB, TII);
430 
431  BlockInfo.clear();
432 
433  return Changed;
434 }
FP_ROUND_ROUND_TO_NEAREST
#define FP_ROUND_ROUND_TO_NEAREST
Definition: SIDefines.h:879
BlockData::Change
Status Change
Definition: SIModeRegister.cpp:86
llvm::MachineBasicBlock::pred_begin
pred_iterator pred_begin()
Definition: MachineBasicBlock.h:316
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:100
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
Definition: AllocatorList.h:23
Status::mergeUnknown
Status mergeUnknown(unsigned newMask)
Definition: SIModeRegister.cpp:48
llvm::AMDGPU::Hwreg::ID_SHIFT_
@ ID_SHIFT_
Definition: SIDefines.h:375
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Statistic.h
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::MachineFunction::getNumBlockIDs
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
Definition: MachineFunction.h:685
llvm::AMDGPU::Hwreg::ID_MASK_
@ ID_MASK_
Definition: SIDefines.h:377
BlockData::BlockData
BlockData()
Definition: SIModeRegister.cpp:104
BlockData::Pred
Status Pred
Definition: SIModeRegister.cpp:94
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::GCNSubtarget
Definition: GCNSubtarget.h:38
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:102
FP_ROUND_ROUND_TO_ZERO
#define FP_ROUND_ROUND_TO_ZERO
Definition: SIDefines.h:882
DEBUG_TYPE
#define DEBUG_TYPE
Definition: SIModeRegister.cpp:22
x3
In x86 we generate this spiffy xmm0 xmm0 ret in x86 we generate this which could be xmm1 movss xmm1 xmm0 ret In sse4 we could use insertps to make both better Here s another testcase that could use x3
Definition: README-SSE.txt:547
GCNSubtarget.h
llvm::AMDGPU::Hwreg::ID_MODE
@ ID_MODE
Definition: SIDefines.h:354
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineBasicBlock::succ_end
succ_iterator succ_end()
Definition: MachineBasicBlock.h:334
INITIALIZE_PASS
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:37
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
BlockData::FirstInsertionPoint
MachineInstr * FirstInsertionPoint
Definition: SIModeRegister.cpp:98
false
Definition: StackSlotColoring.cpp:142
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
STATISTIC
STATISTIC(NumSetregInserted, "Number of setreg of mode register inserted.")
BlockData::Exit
Status Exit
Definition: SIModeRegister.cpp:90
Status::operator==
bool operator==(const Status &S) const
Definition: SIModeRegister.cpp:65
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
llvm::AMDGPU::Hwreg::OFFSET_MASK_
@ OFFSET_MASK_
Definition: SIDefines.h:384
llvm::AMDGPU::Hwreg::WIDTH_M1_SHIFT_
@ WIDTH_M1_SHIFT_
Definition: SIDefines.h:394
llvm::MachineBasicBlock::succ_iterator
std::vector< MachineBasicBlock * >::iterator succ_iterator
Definition: MachineBasicBlock.h:306
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:558
BlockData::Require
Status Require
Definition: SIModeRegister.cpp:82
AMDGPUMCTargetDesc.h
llvm::MachineBasicBlock::pred_end
pred_iterator pred_end()
Definition: MachineBasicBlock.h:318
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::MachineBasicBlock::pred_iterator
std::vector< MachineBasicBlock * >::iterator pred_iterator
Definition: MachineBasicBlock.h:304
llvm::MachineBasicBlock::succ_begin
succ_iterator succ_begin()
Definition: MachineBasicBlock.h:332
BlockData
Definition: SIModeRegister.cpp:78
Mode
SI Whole Quad Mode
Definition: SIWholeQuadMode.cpp:262
llvm::SIModeRegisterID
char & SIModeRegisterID
llvm::MachineBasicBlock::pred_empty
bool pred_empty() const
Definition: MachineBasicBlock.h:331
Status
Definition: SIModeRegister.cpp:28
llvm::MachineFunction
Definition: MachineFunction.h:230
Status::operator!=
bool operator!=(const Status &S) const
Definition: SIModeRegister.cpp:69
llvm::MachineBasicBlock::getNumber
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
Definition: MachineBasicBlock.h:969
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
AMDGPU.h
Status::Status
Status()
Definition: SIModeRegister.cpp:34
llvm::AMDGPU::Hwreg::OFFSET_SHIFT_
@ OFFSET_SHIFT_
Definition: SIDefines.h:382
Status::Mode
unsigned Mode
Definition: SIModeRegister.cpp:32
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::MachineBasicBlock::instr_front
MachineInstr & instr_front()
Definition: MachineBasicBlock.h:242
FP_ROUND_MODE_DP
#define FP_ROUND_MODE_DP(x)
Definition: SIDefines.h:887
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
Status::merge
Status merge(const Status &S) const
Definition: SIModeRegister.cpp:42
Status::delta
Status delta(const Status &S) const
Definition: SIModeRegister.cpp:61
Status::intersect
Status intersect(const Status &S) const
Definition: SIModeRegister.cpp:54
llvm::createSIModeRegisterPass
FunctionPass * createSIModeRegisterPass()
Definition: SIModeRegister.cpp:157
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:403
llvm::AMDGPU::Hwreg::WIDTH_M1_MASK_
@ WIDTH_M1_MASK_
Definition: SIDefines.h:396
llvm::SIInstrInfo
Definition: SIInstrInfo.h:38
Status::Status
Status(unsigned NewMask, unsigned NewMode)
Definition: SIModeRegister.cpp:36
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
Status::isCombinable
bool isCombinable(Status &S)
Definition: SIModeRegister.cpp:75
Status::isCompatible
bool isCompatible(Status &S)
Definition: SIModeRegister.cpp:71
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
Status::Mask
unsigned Mask
Definition: SIModeRegister.cpp:31
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
BlockData::ExitSet
bool ExitSet
Definition: SIModeRegister.cpp:102
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38