LLVM 22.0.0git
X86FastPreTileConfig.cpp
Go to the documentation of this file.
1//===-- X86FastPreTileConfig.cpp - Fast Tile Register Configure------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file Pass to preconfig the shape of physical tile registers
10/// It inserts ldtilecfg ahead of each group of tile registers. The algorithm
11/// walk each instruction of basic block in reverse order. All the tile
12/// registers that live out the basic block would be spilled and reloaded
13/// before its user. It also check the depenedency of the shape to ensure
14/// the shape is defined before ldtilecfg.
15//
16//===----------------------------------------------------------------------===//
17
18#include "X86.h"
19#include "X86InstrBuilder.h"
21#include "X86RegisterInfo.h"
22#include "X86Subtarget.h"
24#include "llvm/ADT/Statistic.h"
29#include "llvm/CodeGen/Passes.h"
32#include "llvm/Support/Debug.h"
33
34using namespace llvm;
35
36#define DEBUG_TYPE "fastpretileconfig"
37
38STATISTIC(NumStores, "Number of stores added");
39STATISTIC(NumLoads, "Number of loads added");
40
41namespace {
42
43class X86FastPreTileConfig : public MachineFunctionPass {
44 MachineFunction *MF = nullptr;
45 const X86Subtarget *ST = nullptr;
46 const TargetInstrInfo *TII = nullptr;
47 MachineRegisterInfo *MRI = nullptr;
48 X86MachineFunctionInfo *X86FI = nullptr;
49 MachineFrameInfo *MFI = nullptr;
50 const TargetRegisterInfo *TRI = nullptr;
51 MachineBasicBlock *MBB = nullptr;
52 int CfgSS = -1;
53 struct PHIInfo {
54 Register Row;
55 Register Col;
56 Register StackAddr;
57 };
58 DenseMap<MachineInstr *, struct PHIInfo> VisitedPHIs;
59
60 /// Maps virtual regs to the frame index where these values are spilled.
61 IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;
62
63 /// Has a bit set for tile virtual register for which it was determined
64 /// that it is alive across blocks.
65 BitVector MayLiveAcrossBlocks;
66
67 int getStackSpaceFor(Register VirtReg);
68 void InitializeTileConfigStackSpace();
69 bool mayLiveOut(Register VirtReg, MachineInstr *CfgMI);
70 void spill(MachineBasicBlock::iterator Before, Register VirtReg, bool Kill);
71 void reload(MachineBasicBlock::iterator UseMI, Register VirtReg,
72 MachineOperand *RowMO, MachineOperand *ColMO);
73 void canonicalizePHIs(MachineBasicBlock &MBB);
74 void convertPHI(MachineBasicBlock *MBB, MachineInstr &PHI);
75 void convertPHIs(MachineBasicBlock &MBB);
76 bool configBasicBlock(MachineBasicBlock &MBB);
77
78public:
79 X86FastPreTileConfig() : MachineFunctionPass(ID), StackSlotForVirtReg(-1) {}
80
81 /// Return the pass name.
82 StringRef getPassName() const override {
83 return "Fast Tile Register Preconfigure";
84 }
85
86 /// Perform tile register configure.
87 bool runOnMachineFunction(MachineFunction &MFunc) override;
88
89 static char ID;
90};
91
92} // end anonymous namespace
93
94char X86FastPreTileConfig::ID = 0;
95
96INITIALIZE_PASS_BEGIN(X86FastPreTileConfig, DEBUG_TYPE,
97 "Fast Tile Register Preconfigure", false, false)
98INITIALIZE_PASS_END(X86FastPreTileConfig, DEBUG_TYPE,
99 "Fast Tile Register Preconfigure", false, false)
100
104 auto MBBEnd = MBB.end();
105 if (B == MBBEnd)
106 return true;
107
109 for (; &*I != A && &*I != B; ++I)
110 ;
111
112 return &*I == A;
113}
114
115/// This allocates space for the specified virtual register to be held on the
116/// stack.
117int X86FastPreTileConfig::getStackSpaceFor(Register VirtReg) {
118 // Find the location Reg would belong...
119 int SS = StackSlotForVirtReg[VirtReg];
120 // Already has space allocated?
121 if (SS != -1)
122 return SS;
123
124 // Allocate a new stack object for this spill location...
125 const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
126 unsigned Size = TRI->getSpillSize(RC);
127 Align Alignment = TRI->getSpillAlign(RC);
128 int FrameIdx = MFI->CreateSpillStackObject(Size, Alignment);
129
130 // Assign the slot.
131 StackSlotForVirtReg[VirtReg] = FrameIdx;
132 return FrameIdx;
133}
134
135/// Returns false if \p VirtReg is known to not live out of the current config.
136/// If \p VirtReg live out of the current MBB, it must live out of the current
137/// config
138bool X86FastPreTileConfig::mayLiveOut(Register VirtReg, MachineInstr *CfgMI) {
139 if (MayLiveAcrossBlocks.test(VirtReg.virtRegIndex()))
140 return true;
141
142 for (const MachineInstr &UseInst : MRI->use_nodbg_instructions(VirtReg)) {
143 if (UseInst.getParent() != MBB) {
144 MayLiveAcrossBlocks.set(VirtReg.virtRegIndex());
145 return true;
146 }
147
148 // The use and def are in the same MBB. If the tile register is
149 // reconfigured, it is crobbered and we need to spill and reload
150 // tile register.
151 if (CfgMI) {
152 if (dominates(*MBB, *CfgMI, UseInst)) {
153 MayLiveAcrossBlocks.set(VirtReg.virtRegIndex());
154 return true;
155 }
156 }
157 }
158
159 return false;
160}
161
162void X86FastPreTileConfig::InitializeTileConfigStackSpace() {
163 MachineBasicBlock &MBB = MF->front();
164 MachineInstr *MI = &*MBB.getFirstNonPHI();
165 DebugLoc DL;
166 if (ST->hasAVX512()) {
167 Register Zmm = MRI->createVirtualRegister(&X86::VR512RegClass);
168 BuildMI(MBB, MI, DL, TII->get(X86::AVX512_512_SET0), Zmm);
169 addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSZmr)), CfgSS)
170 .addReg(Zmm);
171 } else if (ST->hasAVX2()) {
172 Register Ymm = MRI->createVirtualRegister(&X86::VR256RegClass);
173 BuildMI(MBB, MI, DL, TII->get(X86::AVX_SET0), Ymm);
174 addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS)
175 .addReg(Ymm);
176 addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS,
177 32)
178 .addReg(Ymm);
179 } else {
180 assert(ST->hasSSE2() && "AMX should assume SSE2 enabled");
181 unsigned StoreOpc = ST->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;
182 Register Xmm = MRI->createVirtualRegister(&X86::VR128RegClass);
183 BuildMI(MBB, MI, DL, TII->get(X86::V_SET0), Xmm);
184 addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS)
185 .addReg(Xmm);
186 addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 16)
187 .addReg(Xmm);
188 addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 32)
189 .addReg(Xmm);
190 addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 48)
191 .addReg(Xmm);
192 }
193 // Fill in the palette first.
194 addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV8mi)), CfgSS)
195 .addImm(1);
196}
197
198/// Insert spill instruction for \p AssignedReg before \p Before.
199/// TODO: Update DBG_VALUEs with \p VirtReg operands with the stack slot.
200void X86FastPreTileConfig::spill(MachineBasicBlock::iterator Before,
201 Register VirtReg, bool Kill) {
202 LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg, TRI) << " \n");
203 int FI = getStackSpaceFor(VirtReg);
204 LLVM_DEBUG(dbgs() << " to stack slot #" << FI << '\n');
205
206 const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
207 // Don't need shape information for tile store, becasue it is adjacent to
208 // the tile def instruction.
209 TII->storeRegToStackSlot(*MBB, Before, VirtReg, Kill, FI, &RC, TRI,
210 Register());
211 ++NumStores;
212
213 // TODO: update DBG_VALUEs
214}
215
216/// Insert reload instruction for \p PhysReg before \p Before.
217void X86FastPreTileConfig::reload(MachineBasicBlock::iterator UseMI,
218 Register OrigReg, MachineOperand *RowMO,
219 MachineOperand *ColMO) {
220 int FI = getStackSpaceFor(OrigReg);
221 const TargetRegisterClass &RC = *MRI->getRegClass(OrigReg);
222 Register TileReg;
223 // Fold copy to tileload
224 // BB1:
225 // spill src to s
226 //
227 // BB2:
228 // t = copy src
229 // -->
230 // t = tileload (s)
231 if (UseMI->isCopy())
232 TileReg = UseMI->getOperand(0).getReg();
233 else
234 TileReg = MRI->createVirtualRegister(&RC);
235 // Can't use TII->loadRegFromStackSlot(), because we need the shape
236 // information for reload.
237 // tileloadd (%sp, %idx), %tmm
238 unsigned Opc = X86::PTILELOADDV;
239 Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
240 // FIXME: MBB is not the parent of UseMI.
241 MachineInstr *NewMI = BuildMI(*UseMI->getParent(), UseMI, DebugLoc(),
242 TII->get(X86::MOV64ri), StrideReg)
243 .addImm(64);
244 NewMI = addFrameReference(
245 BuildMI(*UseMI->getParent(), UseMI, DebugLoc(), TII->get(Opc), TileReg)
246 .addReg(RowMO->getReg())
247 .addReg(ColMO->getReg()),
248 FI);
249 MachineOperand &MO = NewMI->getOperand(5);
250 MO.setReg(StrideReg);
251 MO.setIsKill(true);
252 RowMO->setIsKill(false);
253 ColMO->setIsKill(false);
254 // Erase copy instruction after it is folded.
255 if (UseMI->isCopy()) {
257 } else {
258 // Replace the register in the user MI.
259 for (auto &MO : UseMI->operands()) {
260 if (MO.isReg() && MO.getReg() == OrigReg)
261 MO.setReg(TileReg);
262 }
263 }
264
265 ++NumLoads;
266 LLVM_DEBUG(dbgs() << "Reloading " << printReg(OrigReg, TRI) << " into "
267 << printReg(TileReg, TRI) << '\n');
268}
269
271 if (Reg.isVirtual() &&
272 (MRI->getRegClass(Reg)->getID() == X86::TILERegClassID)) {
273 return true;
274 }
275
276 if (Reg >= X86::TMM0 && Reg <= X86::TMM7)
277 return true;
278
279 return false;
280}
281
283 // The instruction must have 3 operands: tile def, row, col.
284 if (MI.isDebugInstr() || MI.getNumOperands() < 3 || !MI.isPseudo())
285 return false;
286 MachineOperand &MO = MI.getOperand(0);
287
288 if (!MO.isReg())
289 return false;
290
291 return isTileRegister(MRI, MO.getReg());
292}
293
295 MachineInstr *MI = MRI->getVRegDef(TileReg);
296 if (isTileDef(MRI, *MI)) {
297 MachineOperand *RowMO = &MI->getOperand(1);
298 MachineOperand *ColMO = &MI->getOperand(2);
299 return ShapeT(RowMO, ColMO, MRI);
300 } else if (MI->isCopy()) {
301 TileReg = MI->getOperand(1).getReg();
302 return getShape(MRI, TileReg);
303 }
304
305 // The def should not be PHI node, because we walk the MBB in reverse post
306 // order.
307 assert(MI->isPHI() && "Unexpected PHI when get shape.");
308 llvm_unreachable("Unexpected MI when get shape.");
309}
310
311// BB0:
312// spill t0 to s0
313// BB1:
314// spill t1 to s1
315//
316// BB2:
317// t = phi [t0, bb0] [t1, bb1]
318// -->
319// row = phi [r0, bb0] [r1, bb1]
320// col = phi [c0, bb0] [c1, bb1]
321// s = phi [s0, bb0] [s1, bb1]
322// t = tileload row, col, s
323// The new instruction is inserted at the end of the phi node. The order
324// of the original phi node is not ensured.
325void X86FastPreTileConfig::convertPHI(MachineBasicBlock *MBB,
326 MachineInstr &PHI) {
327 // 1. Create instruction to get stack slot address of each incoming block.
328 // 2. Create PHI node for the stack address.
329 // 3. Create PHI node for shape. If one of the incoming shape is immediate
330 // use the immediate and delete the PHI node.
331 // 4. Create tileload instruction from the stack address.
332 Register StackAddrReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
333 MachineInstrBuilder AddrPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
334 TII->get(X86::PHI), StackAddrReg);
335 Register RowReg = MRI->createVirtualRegister(&X86::GR16RegClass);
336 MachineInstrBuilder RowPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
337 TII->get(X86::PHI), RowReg);
338 Register ColReg = MRI->createVirtualRegister(&X86::GR16RegClass);
339 MachineInstrBuilder ColPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
340 TII->get(X86::PHI), ColReg);
341 // Record the mapping of phi node and its row/column information.
342 VisitedPHIs[&PHI] = {RowReg, ColReg, StackAddrReg};
343
344 for (unsigned I = 1, E = PHI.getNumOperands(); I != E; I += 2) {
345 // Get the 2 incoming value of tile register and MBB.
346 Register InTileReg = PHI.getOperand(I).getReg();
347 // Mark it as liveout, so that it will be spilled when visit
348 // the incoming MBB. Otherwise since phi will be deleted, it
349 // would miss spill when visit incoming MBB.
350 MayLiveAcrossBlocks.set(InTileReg.virtRegIndex());
351 MachineBasicBlock *InMBB = PHI.getOperand(I + 1).getMBB();
352
353 MachineInstr *TileDefMI = MRI->getVRegDef(InTileReg);
355 if (TileDefMI->isPHI()) {
356 InsertPos = TileDefMI->getParent()->getFirstNonPHI();
357 if (auto It = VisitedPHIs.find(TileDefMI);
358 It != VisitedPHIs.end()) { // circular phi reference
359 // def t1
360 // / \
361 // def t2 t3 = phi(t1, t4) <--
362 // \ / |
363 // t4 = phi(t2, t3)-------------
364 //
365 // For each (row, column and stack address) append phi incoming value.
366 // Create r3 = phi(r1, r4)
367 // Create r4 = phi(r2, r3)
368 Register InRowReg = It->second.Row;
369 Register InColReg = It->second.Col;
370 Register InStackAddrReg = It->second.StackAddr;
371 RowPHI.addReg(InRowReg).addMBB(InMBB);
372 ColPHI.addReg(InColReg).addMBB(InMBB);
373 AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
374 continue;
375 } else {
376 // Recursively convert PHI to tileload
377 convertPHI(TileDefMI->getParent(), *TileDefMI);
378 // The PHI node is coverted to tileload instruction. Get the stack
379 // address from tileload operands.
380 MachineInstr *TileLoad = MRI->getVRegDef(InTileReg);
381 assert(TileLoad && TileLoad->getOpcode() == X86::PTILELOADDV);
382 Register InRowReg = TileLoad->getOperand(1).getReg();
383 Register InColReg = TileLoad->getOperand(2).getReg();
384 Register InStackAddrReg = TileLoad->getOperand(3).getReg();
385 RowPHI.addReg(InRowReg).addMBB(InMBB);
386 ColPHI.addReg(InColReg).addMBB(InMBB);
387 AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
388 }
389 } else {
390 InsertPos = TileDefMI->getIterator();
391
392 // Fill the incoming operand of row/column phi instruction.
393 ShapeT Shape = getShape(MRI, InTileReg);
394 Shape.getRow()->setIsKill(false);
395 Shape.getCol()->setIsKill(false);
396 RowPHI.addReg(Shape.getRow()->getReg()).addMBB(InMBB);
397 ColPHI.addReg(Shape.getCol()->getReg()).addMBB(InMBB);
398
399 // The incoming tile register live out of its def BB, it would be spilled.
400 // Create MI to get the spill stack slot address for the tile register
401 int FI = getStackSpaceFor(InTileReg);
402 Register InStackAddrReg =
403 MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
404 addOffset(BuildMI(*TileDefMI->getParent(), InsertPos, DebugLoc(),
405 TII->get(X86::LEA64r), InStackAddrReg)
406 .addFrameIndex(FI),
407 0);
408 AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
409 }
410 }
411
413 Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
414 BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::MOV64ri), StrideReg)
415 .addImm(64);
416 Register TileReg = PHI.getOperand(0).getReg();
417 MachineInstr *NewMI = addDirectMem(
418 BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::PTILELOADDV), TileReg)
419 .addReg(RowReg)
420 .addReg(ColReg),
421 StackAddrReg);
422 MachineOperand &MO = NewMI->getOperand(5);
423 MO.setReg(StrideReg);
424 MO.setIsKill(true);
425 PHI.eraseFromParent();
426 VisitedPHIs.erase(&PHI);
427}
428
430 MachineOperand &MO = MI.getOperand(0);
431 if (MO.isReg() && MO.getReg().isVirtual() && isTileRegister(MRI, MO.getReg()))
432 return true;
433 return false;
434}
435
436void X86FastPreTileConfig::canonicalizePHIs(MachineBasicBlock &MBB) {
437 SmallVector<MachineInstr *, 8> PHIs;
438
439 for (MachineInstr &MI : MBB) {
440 if (!MI.isPHI())
441 break;
442 if (!isTileRegDef(MRI, MI))
443 continue;
444 PHIs.push_back(&MI);
445 }
446 // Canonicalize the phi node first. One tile phi may depeneds previous
447 // phi node. For below case, we need convert %t4.
448 //
449 // BB0:
450 // %t3 = phi (t1 BB1, t2 BB0)
451 // %t4 = phi (t5 BB1, t3 BB0)
452 // -->
453 // %t3 = phi (t1 BB1, t2 BB0)
454 // %t4 = phi (t5 BB1, t2 BB0)
455 //
456 while (!PHIs.empty()) {
457 MachineInstr *PHI = PHIs.pop_back_val();
458
459 // Find the operand that is incoming from the same MBB and the def
460 // is also phi node.
461 MachineOperand *InMO = nullptr;
462 MachineInstr *DefMI = nullptr;
463 for (unsigned I = 1, E = PHI->getNumOperands(); I != E; I += 2) {
464 Register InTileReg = PHI->getOperand(I).getReg();
465 MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB();
466 DefMI = MRI->getVRegDef(InTileReg);
467 if (InMBB != &MBB || !DefMI->isPHI())
468 continue;
469
470 InMO = &PHI->getOperand(I);
471 break;
472 }
473 // If can't find such operand, do nothing.
474 if (!InMO)
475 continue;
476
477 // Current phi node depends on previous phi node. Break the
478 // dependency.
479 Register DefTileReg;
480 for (unsigned I = 1, E = DefMI->getNumOperands(); I != E; I += 2) {
481 MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB();
482 if (InMBB != &MBB)
483 continue;
484 DefTileReg = DefMI->getOperand(I).getReg();
485 InMO->setReg(DefTileReg);
486 break;
487 }
488 }
489}
490
491void X86FastPreTileConfig::convertPHIs(MachineBasicBlock &MBB) {
492 SmallVector<MachineInstr *, 8> PHIs;
493 for (MachineInstr &MI : MBB) {
494 if (!MI.isPHI())
495 break;
496 if (!isTileRegDef(MRI, MI))
497 continue;
498 PHIs.push_back(&MI);
499 }
500 while (!PHIs.empty()) {
501 MachineInstr *MI = PHIs.pop_back_val();
502 VisitedPHIs.clear();
503 convertPHI(&MBB, *MI);
504 }
505}
506
507// PreTileConfig should configure the tile registers based on basic
508// block.
509bool X86FastPreTileConfig::configBasicBlock(MachineBasicBlock &MBB) {
510 this->MBB = &MBB;
511 bool Change = false;
512 MachineInstr *LastShapeMI = nullptr;
513 MachineInstr *LastTileCfg = nullptr;
514 bool HasUnconfigTile = false;
515
516 auto Config = [&](MachineInstr &Before) {
517 if (CfgSS == -1)
518 CfgSS = MFI->CreateStackObject(ST->getTileConfigSize(),
519 ST->getTileConfigAlignment(), false);
520 LastTileCfg = addFrameReference(
521 BuildMI(MBB, Before, DebugLoc(), TII->get(X86::PLDTILECFGV)), CfgSS);
522 LastShapeMI = nullptr;
523 Change = true;
524 };
525 auto HasTileOperand = [](MachineRegisterInfo *MRI, MachineInstr &MI) {
526 for (const MachineOperand &MO : MI.operands()) {
527 if (!MO.isReg())
528 continue;
529 Register Reg = MO.getReg();
531 return true;
532 }
533 return false;
534 };
535 for (MachineInstr &MI : reverse(MBB)) {
536 // We have transformed phi node before configuring BB.
537 if (MI.isPHI())
538 break;
539 // Don't collect the shape of used tile, the tile should be defined
540 // before the tile use. Spill and reload would happen if there is only
541 // tile use after ldtilecfg, so the shape can be collected from reload.
542 // Take below code for example. %t would be reloaded before tilestore
543 // call
544 // ....
545 // tilestore %r, %c, %t
546 // -->
547 // call
548 // ldtilecfg
549 // %t = tileload %r, %c
550 // tilestore %r, %c, %t
551 if (HasTileOperand(MRI, MI))
552 HasUnconfigTile = true;
553 // According to AMX ABI, all the tile registers including config register
554 // are volatile. Caller need to save/restore config register.
555 if (MI.isCall() && HasUnconfigTile) {
557 if (LastShapeMI && dominates(MBB, MI, LastShapeMI))
558 I = ++LastShapeMI->getIterator();
559 else {
560 // Call can overwrite registers like rax, ensure the tile config
561 // instruction is sinked closer to first instruction that uses tile.
562 auto UseIt = MI.getIterator();
563 while (UseIt != MBB.end()) {
564 if (HasTileOperand(MRI, *UseIt))
565 break;
566 ++UseIt;
567 }
568 I = UseIt;
569 }
570 Config(*I);
571 HasUnconfigTile = false;
572 continue;
573 }
574 if (!isTileDef(MRI, MI))
575 continue;
576 //
577 //---------------------------------------------------------------------
578 // Don't handle COPY instruction. If the src and dst of the COPY can be
579 // in the same config in below case, we just check the shape of t0.
580 // def row0
581 // def col0
582 // ldtilecfg
583 // t0 = tielzero(row0, col0)
584 // t1 = copy t0
585 // ...
586 // If the src and dst of the COPY can NOT be in the same config in below
587 // case. Reload would be generated befor the copy instruction.
588 // def row0
589 // def col0
590 // t0 = tielzero(row0, col0)
591 // spill t0
592 // ...
593 // def row1
594 // def col1
595 // ldtilecfg
596 // t1 = tilezero(row1, col1)
597 // reload t0
598 // t1 = copy t0
599 //---------------------------------------------------------------------
600 //
601 // If MI dominate the last shape def instruction, we need insert
602 // ldtilecfg after LastShapeMI now. The config doesn't include
603 // current MI.
604 // def row0
605 // def col0
606 // tilezero(row0, col0) <- MI
607 // def row1
608 // def col1
609 // ldtilecfg <- insert
610 // tilezero(row1, col1)
611 if (LastShapeMI && dominates(MBB, MI, LastShapeMI))
612 Config(*(++LastShapeMI->getIterator()));
613 MachineOperand *RowMO = &MI.getOperand(1);
614 MachineOperand *ColMO = &MI.getOperand(2);
615 MachineInstr *RowMI = MRI->getVRegDef(RowMO->getReg());
616 MachineInstr *ColMI = MRI->getVRegDef(ColMO->getReg());
617 // If the shape is defined in current MBB, check the domination.
618 // FIXME how about loop?
619 if (RowMI->getParent() == &MBB) {
620 if (!LastShapeMI)
621 LastShapeMI = RowMI;
622 else if (dominates(MBB, LastShapeMI, RowMI))
623 LastShapeMI = RowMI;
624 }
625 if (ColMI->getParent() == &MBB) {
626 if (!LastShapeMI)
627 LastShapeMI = ColMI;
628 else if (dominates(MBB, LastShapeMI, ColMI))
629 LastShapeMI = ColMI;
630 }
631
632 // If there is user live out of the tilecfg, spill it and reload in
633 // before the user.
634 Register TileReg = MI.getOperand(0).getReg();
635 if (mayLiveOut(TileReg, LastTileCfg))
636 spill(++MI.getIterator(), TileReg, false);
637 for (MachineInstr &UseMI : MRI->use_instructions(TileReg)) {
638 if (UseMI.getParent() == &MBB) {
639 // check user should not across ldtilecfg
640 if (!LastTileCfg || !dominates(MBB, LastTileCfg, UseMI))
641 continue;
642 // reload befor UseMI
643 reload(UseMI.getIterator(), TileReg, RowMO, ColMO);
644 } else {
645 // Don't reload for phi instruction, we handle phi reload separately.
646 // TODO: merge the reload for the same user MBB.
647 if (!UseMI.isPHI())
648 reload(UseMI.getIterator(), TileReg, RowMO, ColMO);
649 }
650 }
651 }
652
653 // Configure tile registers at the head of the MBB
654 if (HasUnconfigTile) {
655 MachineInstr *Before;
656 if (LastShapeMI == nullptr || LastShapeMI->isPHI())
657 Before = &*MBB.getFirstNonPHI();
658 else
659 Before = &*(++LastShapeMI->getIterator());
660
661 Config(*Before);
662 }
663
664 return Change;
665}
666
667bool X86FastPreTileConfig::runOnMachineFunction(MachineFunction &MFunc) {
668 X86FI = MFunc.getInfo<X86MachineFunctionInfo>();
669 // Early exit in the common case of non-AMX code.
670 if (X86FI->getAMXProgModel() != AMXProgModelEnum::ManagedRA)
671 return false;
672
673 MF = &MFunc;
674 MRI = &MFunc.getRegInfo();
675 ST = &MFunc.getSubtarget<X86Subtarget>();
676 TII = ST->getInstrInfo();
677 MFI = &MFunc.getFrameInfo();
678 TRI = ST->getRegisterInfo();
679 CfgSS = -1;
680
681 unsigned NumVirtRegs = MRI->getNumVirtRegs();
682
683 StackSlotForVirtReg.resize(NumVirtRegs);
684 MayLiveAcrossBlocks.clear();
685 // We will create register during config. *3 is to make sure
686 // the virtual register number doesn't exceed the size of
687 // the bit vector.
688 MayLiveAcrossBlocks.resize(NumVirtRegs * 3);
689 bool Change = false;
690 assert(MRI->isSSA());
691
692 // Canonicalize the phi node first.
693 for (MachineBasicBlock &MBB : MFunc)
694 canonicalizePHIs(MBB);
695
696 // Loop over all of the basic blocks in reverse post order and insert
697 // ldtilecfg for tile registers. The reserse post order is to facilitate
698 // PHI node convert.
699 ReversePostOrderTraversal<MachineFunction *> RPOT(MF);
700 for (MachineBasicBlock *MBB : RPOT) {
701 convertPHIs(*MBB);
702 Change |= configBasicBlock(*MBB);
703 }
704
705 if (Change)
706 InitializeTileConfigStackSpace();
707
708 StackSlotForVirtReg.clear();
709 return Change;
710}
711
713 return new X86FastPreTileConfig();
714}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static bool dominates(InstrPosIndexes &PosIndexes, const MachineInstr &A, const MachineInstr &B)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static bool isTileRegDef(MachineRegisterInfo *MRI, MachineInstr &MI)
static ShapeT getShape(MachineRegisterInfo *MRI, Register TileReg)
static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI)
static bool isTileRegister(MachineRegisterInfo *MRI, Register Reg)
bool test(unsigned Idx) const
Definition BitVector.h:480
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition BitVector.h:360
void clear()
clear - Removes all bits from the bitvector.
Definition BitVector.h:354
BitVector & set()
Definition BitVector.h:370
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
Store the specified register of the given register class to the specified stack frame index.
MachineInstrBundleIterator< const MachineInstr > const_iterator
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
LLVM_ABI int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool isCopy() const
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
mop_range operands()
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
Definition Register.h:19
unsigned virtRegIndex() const
Convert a virtual register number to a 0-based index.
Definition Register.h:82
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
MachineOperand * getRow() const
MachineOperand * getCol() const
void push_back(const T &Elt)
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
AMXProgModelEnum getAMXProgModel() const
unsigned getTileConfigSize() const
Align getTileConfigAlignment() const
const X86InstrInfo * getInstrInfo() const override
bool hasAVX512() const
bool hasSSE2() const
const X86RegisterInfo * getRegisterInfo() const override
bool hasAVX() const
bool hasAVX2() const
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
FunctionPass * createX86FastPreTileConfigPass()
Return a pass that preconfig the tile registers before fast reg allocation.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
static const MachineInstrBuilder & addOffset(const MachineInstrBuilder &MIB, int Offset)
static const MachineInstrBuilder & addDirectMem(const MachineInstrBuilder &MIB, Register Reg)
addDirectMem - This function is used to add a direct memory reference to the current instruction – th...
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.