LLVM  13.0.0git
R600EmitClauseMarkers.cpp
Go to the documentation of this file.
1 //===-- R600EmitClauseMarkers.cpp - Emit CF_ALU ---------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Add CF_ALU. R600 Alu instructions are grouped in clause which can hold
11 /// 128 Alu instructions ; these instructions can access up to 4 prefetched
12 /// 4 lines of 16 registers from constant buffers. Such ALU clauses are
13 /// initiated by CF_ALU instructions.
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPU.h"
18 #include "R600Defines.h"
19 #include "R600Subtarget.h"
20 
21 using namespace llvm;
22 
23 namespace llvm {
24 
26 
27 } // end namespace llvm
28 
29 namespace {
30 
31 class R600EmitClauseMarkers : public MachineFunctionPass {
32 private:
33  const R600InstrInfo *TII = nullptr;
34  int Address = 0;
35 
36  unsigned OccupiedDwords(MachineInstr &MI) const {
37  switch (MI.getOpcode()) {
38  case R600::INTERP_PAIR_XY:
39  case R600::INTERP_PAIR_ZW:
40  case R600::INTERP_VEC_LOAD:
41  case R600::DOT_4:
42  return 4;
43  case R600::KILL:
44  return 0;
45  default:
46  break;
47  }
48 
49  // These will be expanded to two ALU instructions in the
50  // ExpandSpecialInstructions pass.
51  if (TII->isLDSRetInstr(MI.getOpcode()))
52  return 2;
53 
54  if (TII->isVector(MI) || TII->isCubeOp(MI.getOpcode()) ||
55  TII->isReductionOp(MI.getOpcode()))
56  return 4;
57 
58  unsigned NumLiteral = 0;
59  for (MachineInstr::mop_iterator It = MI.operands_begin(),
60  E = MI.operands_end();
61  It != E; ++It) {
62  MachineOperand &MO = *It;
63  if (MO.isReg() && MO.getReg() == R600::ALU_LITERAL_X)
64  ++NumLiteral;
65  }
66  return 1 + NumLiteral;
67  }
68 
69  bool isALU(const MachineInstr &MI) const {
70  if (TII->isALUInstr(MI.getOpcode()))
71  return true;
72  if (TII->isVector(MI) || TII->isCubeOp(MI.getOpcode()))
73  return true;
74  switch (MI.getOpcode()) {
75  case R600::PRED_X:
76  case R600::INTERP_PAIR_XY:
77  case R600::INTERP_PAIR_ZW:
78  case R600::INTERP_VEC_LOAD:
79  case R600::COPY:
80  case R600::DOT_4:
81  return true;
82  default:
83  return false;
84  }
85  }
86 
87  bool IsTrivialInst(MachineInstr &MI) const {
88  switch (MI.getOpcode()) {
89  case R600::KILL:
90  case R600::RETURN:
91  case R600::IMPLICIT_DEF:
92  return true;
93  default:
94  return false;
95  }
96  }
97 
98  std::pair<unsigned, unsigned> getAccessedBankLine(unsigned Sel) const {
99  // Sel is (512 + (kc_bank << 12) + ConstIndex) << 2
100  // (See also R600ISelLowering.cpp)
101  // ConstIndex value is in [0, 4095];
102  return std::pair<unsigned, unsigned>(
103  ((Sel >> 2) - 512) >> 12, // KC_BANK
104  // Line Number of ConstIndex
105  // A line contains 16 constant registers however KCX bank can lock
106  // two line at the same time ; thus we want to get an even line number.
107  // Line number can be retrieved with (>>4), using (>>5) <<1 generates
108  // an even number.
109  ((((Sel >> 2) - 512) & 4095) >> 5) << 1);
110  }
111 
112  bool
113  SubstituteKCacheBank(MachineInstr &MI,
114  std::vector<std::pair<unsigned, unsigned>> &CachedConsts,
115  bool UpdateInstr = true) const {
116  std::vector<std::pair<unsigned, unsigned>> UsedKCache;
117 
118  if (!TII->isALUInstr(MI.getOpcode()) && MI.getOpcode() != R600::DOT_4)
119  return true;
120 
122  TII->getSrcs(MI);
123  assert(
124  (TII->isALUInstr(MI.getOpcode()) || MI.getOpcode() == R600::DOT_4) &&
125  "Can't assign Const");
126  for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
127  if (Consts[i].first->getReg() != R600::ALU_CONST)
128  continue;
129  unsigned Sel = Consts[i].second;
130  unsigned Chan = Sel & 3, Index = ((Sel >> 2) - 512) & 31;
131  unsigned KCacheIndex = Index * 4 + Chan;
132  const std::pair<unsigned, unsigned> &BankLine = getAccessedBankLine(Sel);
133  if (CachedConsts.empty()) {
134  CachedConsts.push_back(BankLine);
135  UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex));
136  continue;
137  }
138  if (CachedConsts[0] == BankLine) {
139  UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex));
140  continue;
141  }
142  if (CachedConsts.size() == 1) {
143  CachedConsts.push_back(BankLine);
144  UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex));
145  continue;
146  }
147  if (CachedConsts[1] == BankLine) {
148  UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex));
149  continue;
150  }
151  return false;
152  }
153 
154  if (!UpdateInstr)
155  return true;
156 
157  for (unsigned i = 0, j = 0, n = Consts.size(); i < n; ++i) {
158  if (Consts[i].first->getReg() != R600::ALU_CONST)
159  continue;
160  switch(UsedKCache[j].first) {
161  case 0:
162  Consts[i].first->setReg(
163  R600::R600_KC0RegClass.getRegister(UsedKCache[j].second));
164  break;
165  case 1:
166  Consts[i].first->setReg(
167  R600::R600_KC1RegClass.getRegister(UsedKCache[j].second));
168  break;
169  default:
170  llvm_unreachable("Wrong Cache Line");
171  }
172  j++;
173  }
174  return true;
175  }
176 
177  bool canClauseLocalKillFitInClause(
178  unsigned AluInstCount,
179  std::vector<std::pair<unsigned, unsigned>> KCacheBanks,
182  const R600RegisterInfo &TRI = TII->getRegisterInfo();
183  //TODO: change this to defs?
185  MOI = Def->operands_begin(),
186  MOE = Def->operands_end(); MOI != MOE; ++MOI) {
187  if (!MOI->isReg() || !MOI->isDef() ||
188  TRI.isPhysRegLiveAcrossClauses(MOI->getReg()))
189  continue;
190 
191  // Def defines a clause local register, so check that its use will fit
192  // in the clause.
193  unsigned LastUseCount = 0;
194  for (MachineBasicBlock::iterator UseI = Def; UseI != BBEnd; ++UseI) {
195  AluInstCount += OccupiedDwords(*UseI);
196  // Make sure we won't need to end the clause due to KCache limitations.
197  if (!SubstituteKCacheBank(*UseI, KCacheBanks, false))
198  return false;
199 
200  // We have reached the maximum instruction limit before finding the
201  // use that kills this register, so we cannot use this def in the
202  // current clause.
203  if (AluInstCount >= TII->getMaxAlusPerClause())
204  return false;
205 
206  // TODO: Is this true? kill flag appears to work OK below
207  // Register kill flags have been cleared by the time we get to this
208  // pass, but it is safe to assume that all uses of this register
209  // occur in the same basic block as its definition, because
210  // it is illegal for the scheduler to schedule them in
211  // different blocks.
212  if (UseI->readsRegister(MOI->getReg(), &TRI))
213  LastUseCount = AluInstCount;
214 
215  // Exit early if the current use kills the register
216  if (UseI != Def && UseI->killsRegister(MOI->getReg(), &TRI))
217  break;
218  }
219  if (LastUseCount)
220  return LastUseCount <= TII->getMaxAlusPerClause();
221  llvm_unreachable("Clause local register live at end of clause.");
222  }
223  return true;
224  }
225 
228  MachineBasicBlock::iterator ClauseHead = I;
229  std::vector<std::pair<unsigned, unsigned>> KCacheBanks;
230  bool PushBeforeModifier = false;
231  unsigned AluInstCount = 0;
232  for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
233  if (IsTrivialInst(*I))
234  continue;
235  if (!isALU(*I))
236  break;
237  if (AluInstCount > TII->getMaxAlusPerClause())
238  break;
239  if (I->getOpcode() == R600::PRED_X) {
240  // We put PRED_X in its own clause to ensure that ifcvt won't create
241  // clauses with more than 128 insts.
242  // IfCvt is indeed checking that "then" and "else" branches of an if
243  // statement have less than ~60 insts thus converted clauses can't be
244  // bigger than ~121 insts (predicate setter needs to be in the same
245  // clause as predicated alus).
246  if (AluInstCount > 0)
247  break;
248  if (TII->getFlagOp(*I).getImm() & MO_FLAG_PUSH)
249  PushBeforeModifier = true;
250  AluInstCount ++;
251  continue;
252  }
253  // XXX: GROUP_BARRIER instructions cannot be in the same ALU clause as:
254  //
255  // * KILL or INTERP instructions
256  // * Any instruction that sets UPDATE_EXEC_MASK or UPDATE_PRED bits
257  // * Uses waterfalling (i.e. INDEX_MODE = AR.X)
258  //
259  // XXX: These checks have not been implemented yet.
260  if (TII->mustBeLastInClause(I->getOpcode())) {
261  I++;
262  break;
263  }
264 
265  // If this instruction defines a clause local register, make sure
266  // its use can fit in this clause.
267  if (!canClauseLocalKillFitInClause(AluInstCount, KCacheBanks, I, E))
268  break;
269 
270  if (!SubstituteKCacheBank(*I, KCacheBanks))
271  break;
272  AluInstCount += OccupiedDwords(*I);
273  }
274  unsigned Opcode = PushBeforeModifier ?
275  R600::CF_ALU_PUSH_BEFORE : R600::CF_ALU;
276  BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode))
277  // We don't use the ADDR field until R600ControlFlowFinalizer pass, where
278  // it is safe to assume it is 0. However if we always put 0 here, the ifcvt
279  // pass may assume that identical ALU clause starter at the beginning of a
280  // true and false branch can be factorized which is not the case.
281  .addImm(Address++) // ADDR
282  .addImm(KCacheBanks.empty()?0:KCacheBanks[0].first) // KB0
283  .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].first) // KB1
284  .addImm(KCacheBanks.empty()?0:2) // KM0
285  .addImm((KCacheBanks.size() < 2)?0:2) // KM1
286  .addImm(KCacheBanks.empty()?0:KCacheBanks[0].second) // KLINE0
287  .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].second) // KLINE1
288  .addImm(AluInstCount) // COUNT
289  .addImm(1); // Enabled
290  return I;
291  }
292 
293 public:
294  static char ID;
295 
296  R600EmitClauseMarkers() : MachineFunctionPass(ID) {
298  }
299 
300  bool runOnMachineFunction(MachineFunction &MF) override {
302  TII = ST.getInstrInfo();
303 
304  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
305  BB != BB_E; ++BB) {
308  if (I != MBB.end() && I->getOpcode() == R600::CF_ALU)
309  continue; // BB was already parsed
310  for (MachineBasicBlock::iterator E = MBB.end(); I != E;) {
311  if (isALU(*I)) {
312  auto next = MakeALUClause(MBB, I);
313  assert(next != I);
314  I = next;
315  } else
316  ++I;
317  }
318  }
319  return false;
320  }
321 
322  StringRef getPassName() const override {
323  return "R600 Emit Clause Markers Pass";
324  }
325 };
326 
328 
329 } // end anonymous namespace
330 
331 INITIALIZE_PASS_BEGIN(R600EmitClauseMarkers, "emitclausemarkers",
332  "R600 Emit Clause Markters", false, false)
333 INITIALIZE_PASS_END(R600EmitClauseMarkers, "emitclausemarkers",
334  "R600 Emit Clause Markters", false, false)
335 
337  return new R600EmitClauseMarkers();
338 }
i
i
Definition: README.txt:29
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:100
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
Definition: AllocatorList.h:23
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
Markters
R600 Emit Clause Markters
Definition: R600EmitClauseMarkers.cpp:334
llvm::MachineFunction::end
iterator end()
Definition: MachineFunction.h:742
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::MachineBasicBlock::findDebugLoc
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions.
Definition: MachineBasicBlock.cpp:1398
llvm::NVPTXISD::RETURN
@ RETURN
Definition: NVPTXISelLowering.h:49
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
llvm::R600RegisterInfo
Definition: R600RegisterInfo.h:22
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
emitclausemarkers
emitclausemarkers
Definition: R600EmitClauseMarkers.cpp:333
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
MO_FLAG_PUSH
#define MO_FLAG_PUSH
Definition: R600Defines.h:18
llvm::createR600EmitClauseMarkers
FunctionPass * createR600EmitClauseMarkers()
Definition: R600EmitClauseMarkers.cpp:336
false
Definition: StackSlotColoring.cpp:142
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
llvm::PassRegistry
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:38
llvm::MachineFunction::begin
iterator begin()
Definition: MachineFunction.h:740
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:558
llvm::Clause
Definition: DirectiveEmitter.h:123
llvm::R600Subtarget
Definition: R600Subtarget.h:36
AMDGPUMCTargetDesc.h
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:321
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::HighlightColor::Address
@ Address
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:360
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(R600EmitClauseMarkers, "emitclausemarkers", "R600 Emit Clause Markters", false, false) INITIALIZE_PASS_END(R600EmitClauseMarkers
llvm::MachineFunction
Definition: MachineFunction.h:230
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
AMDGPU.h
llvm::initializeR600EmitClauseMarkersPass
void initializeR600EmitClauseMarkersPass(PassRegistry &)
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
j
return j(j<< 16)
llvm::ilist_iterator
Iterator for intrusive lists based on ilist_node.
Definition: ilist_iterator.h:57
R600Subtarget.h
llvm::R600InstrInfo
Definition: R600InstrInfo.h:39
R600Defines.h
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:268
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
n
The same transformation can work with an even modulo with the addition of a and shrink the compare RHS by the same amount Unless the target supports that transformation probably isn t worthwhile The transformation can also easily be made to work with non zero equality for n
Definition: README.txt:685
llvm::MachineInstrBundleIterator< MachineInstr >
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:270
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38