LLVM 23.0.0git
AMDGPURegBankLegalizeRules.cpp
Go to the documentation of this file.
1//===-- AMDGPURegBankLegalizeRules.cpp ------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// Definitions of RegBankLegalize Rules for all opcodes.
10/// Implementation of container for all the Rules and search.
11/// Fast search for most common case when Rule.Predicate checks LLT and
12/// uniformity of register in operand 0.
13//
14//===----------------------------------------------------------------------===//
15
17#include "AMDGPUInstrInfo.h"
18#include "GCNSubtarget.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
23
24#define DEBUG_TYPE "amdgpu-regbanklegalize"
25
26using namespace llvm;
27using namespace AMDGPU;
28
29bool AMDGPU::isAnyPtr(LLT Ty, unsigned Width) {
30 return Ty.isPointer() && Ty.getSizeInBits() == Width;
31}
32
34 std::initializer_list<RegBankLLTMappingApplyID> DstOpMappingList,
35 std::initializer_list<RegBankLLTMappingApplyID> SrcOpMappingList,
37 : DstOpMapping(DstOpMappingList), SrcOpMapping(SrcOpMappingList),
39
41 std::initializer_list<UniformityLLTOpPredicateID> OpList,
42 std::function<bool(const MachineInstr &)> TestFunc)
44
46 const MachineUniformityInfo &MUI,
47 const MachineRegisterInfo &MRI) {
48 switch (UniID) {
49 case S1:
50 return MRI.getType(Reg) == LLT::scalar(1);
51 case S16:
52 return MRI.getType(Reg) == LLT::scalar(16);
53 case S32:
54 return MRI.getType(Reg) == LLT::scalar(32);
55 case S64:
56 return MRI.getType(Reg) == LLT::scalar(64);
57 case S128:
58 return MRI.getType(Reg) == LLT::scalar(128);
59 case P0:
60 return MRI.getType(Reg) == LLT::pointer(0, 64);
61 case P1:
62 return MRI.getType(Reg) == LLT::pointer(1, 64);
63 case P2:
64 return MRI.getType(Reg) == LLT::pointer(2, 32);
65 case P3:
66 return MRI.getType(Reg) == LLT::pointer(3, 32);
67 case P4:
68 return MRI.getType(Reg) == LLT::pointer(4, 64);
69 case P5:
70 return MRI.getType(Reg) == LLT::pointer(5, 32);
71 case P8:
72 return MRI.getType(Reg) == LLT::pointer(8, 128);
73 case Ptr32:
74 return isAnyPtr(MRI.getType(Reg), 32);
75 case Ptr64:
76 return isAnyPtr(MRI.getType(Reg), 64);
77 case Ptr128:
78 return isAnyPtr(MRI.getType(Reg), 128);
79 case V2S16:
80 return MRI.getType(Reg) == LLT::fixed_vector(2, 16);
81 case V2S32:
82 return MRI.getType(Reg) == LLT::fixed_vector(2, 32);
83 case V3S32:
84 return MRI.getType(Reg) == LLT::fixed_vector(3, 32);
85 case V4S32:
86 return MRI.getType(Reg) == LLT::fixed_vector(4, 32);
87 case B32:
88 return MRI.getType(Reg).getSizeInBits() == 32;
89 case B64:
90 return MRI.getType(Reg).getSizeInBits() == 64;
91 case B96:
92 return MRI.getType(Reg).getSizeInBits() == 96;
93 case B128:
94 return MRI.getType(Reg).getSizeInBits() == 128;
95 case B160:
96 return MRI.getType(Reg).getSizeInBits() == 160;
97 case B256:
98 return MRI.getType(Reg).getSizeInBits() == 256;
99 case B512:
100 return MRI.getType(Reg).getSizeInBits() == 512;
101 case DivAnyTy:
102 return MUI.isDivergentAtDef(Reg);
103 case UniS1:
104 return MRI.getType(Reg) == LLT::scalar(1) && MUI.isUniformAtDef(Reg);
105 case UniS16:
106 return MRI.getType(Reg) == LLT::scalar(16) && MUI.isUniformAtDef(Reg);
107 case UniS32:
108 return MRI.getType(Reg) == LLT::scalar(32) && MUI.isUniformAtDef(Reg);
109 case UniS64:
110 return MRI.getType(Reg) == LLT::scalar(64) && MUI.isUniformAtDef(Reg);
111 case UniS128:
112 return MRI.getType(Reg) == LLT::scalar(128) && MUI.isUniformAtDef(Reg);
113 case UniP0:
114 return MRI.getType(Reg) == LLT::pointer(0, 64) && MUI.isUniformAtDef(Reg);
115 case UniP1:
116 return MRI.getType(Reg) == LLT::pointer(1, 64) && MUI.isUniformAtDef(Reg);
117 case UniP2:
118 return MRI.getType(Reg) == LLT::pointer(2, 32) && MUI.isUniformAtDef(Reg);
119 case UniP3:
120 return MRI.getType(Reg) == LLT::pointer(3, 32) && MUI.isUniformAtDef(Reg);
121 case UniP4:
122 return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isUniformAtDef(Reg);
123 case UniP5:
124 return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isUniformAtDef(Reg);
125 case UniP8:
126 return MRI.getType(Reg) == LLT::pointer(8, 128) && MUI.isUniformAtDef(Reg);
127 case UniPtr32:
128 return isAnyPtr(MRI.getType(Reg), 32) && MUI.isUniformAtDef(Reg);
129 case UniPtr64:
130 return isAnyPtr(MRI.getType(Reg), 64) && MUI.isUniformAtDef(Reg);
131 case UniPtr128:
132 return isAnyPtr(MRI.getType(Reg), 128) && MUI.isUniformAtDef(Reg);
133 case UniV2S16:
134 return MRI.getType(Reg) == LLT::fixed_vector(2, 16) &&
135 MUI.isUniformAtDef(Reg);
136 case UniV2S32:
137 return MRI.getType(Reg) == LLT::fixed_vector(2, 32) &&
138 MUI.isUniformAtDef(Reg);
139 case UniB32:
140 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isUniformAtDef(Reg);
141 case UniB64:
142 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isUniformAtDef(Reg);
143 case UniB96:
144 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isUniformAtDef(Reg);
145 case UniB128:
146 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isUniformAtDef(Reg);
147 case UniB160:
148 return MRI.getType(Reg).getSizeInBits() == 160 && MUI.isUniformAtDef(Reg);
149 case UniB256:
150 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isUniformAtDef(Reg);
151 case UniB512:
152 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isUniformAtDef(Reg);
153 case UniBRC: {
154 if (MUI.isDivergentAtDef(Reg))
155 return false;
156 // Check if there is SGPR register class of same size as the LLT.
157 const SIRegisterInfo *TRI =
158 static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
159 // There is no 16 bit SGPR register class. Extra size check is required
160 // since getSGPRClassForBitWidth returns SReg_32RegClass for Size 16.
161 unsigned LLTSize = MRI.getType(Reg).getSizeInBits();
162 return LLTSize >= 32 && TRI->getSGPRClassForBitWidth(LLTSize);
163 }
164 case DivS1:
165 return MRI.getType(Reg) == LLT::scalar(1) && MUI.isDivergentAtDef(Reg);
166 case DivS16:
167 return MRI.getType(Reg) == LLT::scalar(16) && MUI.isDivergentAtDef(Reg);
168 case DivS32:
169 return MRI.getType(Reg) == LLT::scalar(32) && MUI.isDivergentAtDef(Reg);
170 case DivS64:
171 return MRI.getType(Reg) == LLT::scalar(64) && MUI.isDivergentAtDef(Reg);
172 case DivS128:
173 return MRI.getType(Reg) == LLT::scalar(128) && MUI.isDivergentAtDef(Reg);
174 case DivP0:
175 return MRI.getType(Reg) == LLT::pointer(0, 64) && MUI.isDivergentAtDef(Reg);
176 case DivP1:
177 return MRI.getType(Reg) == LLT::pointer(1, 64) && MUI.isDivergentAtDef(Reg);
178 case DivP2:
179 return MRI.getType(Reg) == LLT::pointer(2, 32) && MUI.isDivergentAtDef(Reg);
180 case DivP3:
181 return MRI.getType(Reg) == LLT::pointer(3, 32) && MUI.isDivergentAtDef(Reg);
182 case DivP4:
183 return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isDivergentAtDef(Reg);
184 case DivP5:
185 return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isDivergentAtDef(Reg);
186 case DivPtr32:
187 return isAnyPtr(MRI.getType(Reg), 32) && MUI.isDivergentAtDef(Reg);
188 case DivPtr64:
189 return isAnyPtr(MRI.getType(Reg), 64) && MUI.isDivergentAtDef(Reg);
190 case DivPtr128:
191 return isAnyPtr(MRI.getType(Reg), 128) && MUI.isDivergentAtDef(Reg);
192 case DivV2S16:
193 return MRI.getType(Reg) == LLT::fixed_vector(2, 16) &&
195 case DivV2S32:
196 return MRI.getType(Reg) == LLT::fixed_vector(2, 32) &&
198 case DivV3S32:
199 return MRI.getType(Reg) == LLT::fixed_vector(3, 32) &&
201 case DivV4S16:
202 return MRI.getType(Reg) == LLT::fixed_vector(4, 16) &&
204 case DivV6S32:
205 return MRI.getType(Reg) == LLT::fixed_vector(6, 32) &&
207 case DivB32:
208 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isDivergentAtDef(Reg);
209 case DivB64:
210 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isDivergentAtDef(Reg);
211 case DivB96:
212 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isDivergentAtDef(Reg);
213 case DivB128:
214 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isDivergentAtDef(Reg);
215 case DivB160:
216 return MRI.getType(Reg).getSizeInBits() == 160 && MUI.isDivergentAtDef(Reg);
217 case DivB256:
218 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isDivergentAtDef(Reg);
219 case DivB512:
220 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isDivergentAtDef(Reg);
221 case DivBRC: {
222 if (MUI.isUniformAtDef(Reg))
223 return false;
224 // Check if there is VGPR register class of same size as the LLT.
225 const SIRegisterInfo *TRI =
226 static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
227 return TRI->getSGPRClassForBitWidth(MRI.getType(Reg).getSizeInBits());
228 }
229 case BRC: {
230 // Check if there is SGPR and VGPR register class of same size as the LLT.
231 const SIRegisterInfo *TRI =
232 static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
233 unsigned LLTSize = MRI.getType(Reg).getSizeInBits();
234 return LLTSize >= 32 && TRI->getSGPRClassForBitWidth(LLTSize) &&
235 TRI->getVGPRClassForBitWidth(LLTSize);
236 }
237 case _:
238 return true;
239 default:
240 llvm_unreachable("missing matchUniformityAndLLT");
241 }
242}
243
245 const MachineUniformityInfo &MUI,
246 const MachineRegisterInfo &MRI) const {
247 // Check LLT signature.
248 for (unsigned i = 0; i < OpUniformityAndTypes.size(); ++i) {
249 const MachineOperand &MO = MI.getOperand(i);
250 if (OpUniformityAndTypes[i] == _) {
251 assert((!MI.getOperand(i).isReg() ||
252 !MI.getOperand(i).getReg().isVirtual()) &&
253 "_ is for non-register and physical register operands only");
254 continue;
255 }
256
257 // Remaining IDs check registers.
258 if (!MO.isReg())
259 return false;
260
261 if (!matchUniformityAndLLT(MO.getReg(), OpUniformityAndTypes[i], MUI, MRI))
262 return false;
263 }
264
265 // More complex check.
266 if (TestFunc)
267 return TestFunc(MI);
268
269 return true;
270}
271
273
275 : FastTypes(FastTypes) {}
276
278 if (Ty == LLT::scalar(16))
279 return S16;
280 if (Ty == LLT::scalar(32))
281 return S32;
282 if (Ty == LLT::scalar(64))
283 return S64;
284 if (Ty == LLT::fixed_vector(2, 16))
285 return V2S16;
286 if (Ty == LLT::fixed_vector(2, 32))
287 return V2S32;
288 if (Ty == LLT::fixed_vector(3, 32))
289 return V3S32;
290 if (Ty == LLT::fixed_vector(4, 32))
291 return V4S32;
292 return _;
293}
294
296 if (Ty == LLT::scalar(32) || Ty == LLT::fixed_vector(2, 16) ||
297 isAnyPtr(Ty, 32))
298 return B32;
299 if (Ty == LLT::scalar(64) || Ty == LLT::fixed_vector(2, 32) ||
300 Ty == LLT::fixed_vector(4, 16) || isAnyPtr(Ty, 64))
301 return B64;
302 if (Ty == LLT::fixed_vector(3, 32))
303 return B96;
304 if (Ty == LLT::fixed_vector(4, 32) || Ty == LLT::fixed_vector(2, 64) ||
305 Ty == LLT::fixed_vector(8, 16) || isAnyPtr(Ty, 128))
306 return B128;
307 return _;
308}
309
310const RegBankLLTMapping *
312 const MachineRegisterInfo &MRI,
313 const MachineUniformityInfo &MUI) const {
314 // Search in "Fast Rules".
315 // Note: if fast rules are enabled, RegBankLLTMapping must be added in each
316 // slot that could "match fast Predicate". If not, InvalidMapping is
317 // returned which results in failure, does not search "Slow Rules".
318 if (FastTypes != NoFastRules) {
319 Register Reg = MI.getOperand(0).getReg();
320 int Slot;
321 if (FastTypes == StandardB)
322 Slot = getFastPredicateSlot(LLTToBId(MRI.getType(Reg)));
323 else
324 Slot = getFastPredicateSlot(LLTToId(MRI.getType(Reg)));
325
326 if (Slot != -1)
327 return MUI.isUniformAtDef(Reg) ? &Uni[Slot] : &Div[Slot];
328 }
329
330 // Slow search for more complex rules.
331 for (const RegBankLegalizeRule &Rule : Rules) {
332 if (Rule.Predicate.match(MI, MUI, MRI))
333 return &Rule.OperandMapping;
334 }
335
336 return nullptr;
337}
338
340 Rules.push_back(Rule);
341}
342
344 RegBankLLTMapping RuleApplyIDs) {
345 int Slot = getFastPredicateSlot(Ty);
346 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
347 Div[Slot] = std::move(RuleApplyIDs);
348}
349
351 RegBankLLTMapping RuleApplyIDs) {
352 int Slot = getFastPredicateSlot(Ty);
353 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
354 Uni[Slot] = std::move(RuleApplyIDs);
355}
356
357int SetOfRulesForOpcode::getFastPredicateSlot(
359 switch (FastTypes) {
360 case Standard: {
361 switch (Ty) {
362 case S32:
363 return 0;
364 case S16:
365 return 1;
366 case S64:
367 return 2;
368 case V2S16:
369 return 3;
370 default:
371 return -1;
372 }
373 }
374 case StandardB: {
375 switch (Ty) {
376 case B32:
377 return 0;
378 case B64:
379 return 1;
380 case B96:
381 return 2;
382 case B128:
383 return 3;
384 default:
385 return -1;
386 }
387 }
388 case Vector: {
389 switch (Ty) {
390 case S32:
391 return 0;
392 case V2S32:
393 return 1;
394 case V3S32:
395 return 2;
396 case V4S32:
397 return 3;
398 default:
399 return -1;
400 }
401 }
402 default:
403 return -1;
404 }
405}
406
407RegBankLegalizeRules::RuleSetInitializer
408RegBankLegalizeRules::addRulesForGOpcs(std::initializer_list<unsigned> OpcList,
409 FastRulesTypes FastTypes) {
410 return RuleSetInitializer(OpcList, GRulesAlias, GRules, FastTypes);
411}
412
413RegBankLegalizeRules::RuleSetInitializer
414RegBankLegalizeRules::addRulesForIOpcs(std::initializer_list<unsigned> OpcList,
415 FastRulesTypes FastTypes) {
416 return RuleSetInitializer(OpcList, IRulesAlias, IRules, FastTypes);
417}
418
421 unsigned Opc = MI.getOpcode();
422 if (Opc == AMDGPU::G_INTRINSIC || Opc == AMDGPU::G_INTRINSIC_CONVERGENT ||
423 Opc == AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS ||
424 Opc == AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS) {
425 unsigned IntrID = cast<GIntrinsic>(MI).getIntrinsicID();
426 auto IRAIt = IRulesAlias.find(IntrID);
427 if (IRAIt == IRulesAlias.end())
428 return nullptr;
429 return &IRules.at(IRAIt->second);
430 }
431
432 auto GRAIt = GRulesAlias.find(Opc);
433 if (GRAIt == GRulesAlias.end())
434 return nullptr;
435 return &GRules.at(GRAIt->second);
436}
437
438// Syntactic sugar wrapper for predicate lambda that enables '&&', '||' and '!'.
439class Predicate {
440private:
441 struct Elt {
442 // Save formula composed of Pred, '&&', '||' and '!' as a jump table.
443 // Sink ! to Pred. For example !((A && !B) || C) -> (!A || B) && !C
444 // Sequences of && and || will be represented by jumps, for example:
445 // (A && B && ... X) or (A && B && ... X) || Y
446 // A == true jump to B
447 // A == false jump to end or Y, result is A(false) or Y
448 // (A || B || ... X) or (A || B || ... X) && Y
449 // A == true jump to end or Y, result is A(true) or Y
450 // A == false jump to B
451 // Notice that when negating expression, we simply flip Neg on each Pred
452 // and swap TJumpOffset and FJumpOffset (&& becomes ||, || becomes &&).
453 std::function<bool(const MachineInstr &)> Pred;
454 bool Neg; // Neg of Pred is calculated before jump
455 unsigned TJumpOffset;
456 unsigned FJumpOffset;
457 };
458
459 SmallVector<Elt, 8> Expression;
460
461 Predicate(SmallVectorImpl<Elt> &&Expr) { Expression.swap(Expr); };
462
463public:
464 Predicate(std::function<bool(const MachineInstr &)> Pred) {
465 Expression.push_back({Pred, false, 1, 1});
466 };
467
468 bool operator()(const MachineInstr &MI) const {
469 unsigned Idx = 0;
470 unsigned ResultIdx = Expression.size();
471 bool Result;
472 do {
473 Result = Expression[Idx].Pred(MI);
474 Result = Expression[Idx].Neg ? !Result : Result;
475 if (Result) {
476 Idx += Expression[Idx].TJumpOffset;
477 } else {
478 Idx += Expression[Idx].FJumpOffset;
479 }
480 } while ((Idx != ResultIdx));
481
482 return Result;
483 };
484
485 Predicate operator!() const {
486 SmallVector<Elt, 8> NegExpression;
487 for (const Elt &ExprElt : Expression) {
488 NegExpression.push_back({ExprElt.Pred, !ExprElt.Neg, ExprElt.FJumpOffset,
489 ExprElt.TJumpOffset});
490 }
491 return Predicate(std::move(NegExpression));
492 };
493
494 Predicate operator&&(const Predicate &RHS) const {
495 SmallVector<Elt, 8> AndExpression = Expression;
496
497 unsigned RHSSize = RHS.Expression.size();
498 unsigned ResultIdx = Expression.size();
499 for (unsigned i = 0; i < ResultIdx; ++i) {
500 // LHS results in false, whole expression results in false.
501 if (i + AndExpression[i].FJumpOffset == ResultIdx)
502 AndExpression[i].FJumpOffset += RHSSize;
503 }
504
505 AndExpression.append(RHS.Expression);
506
507 return Predicate(std::move(AndExpression));
508 }
509
510 Predicate operator||(const Predicate &RHS) const {
511 SmallVector<Elt, 8> OrExpression = Expression;
512
513 unsigned RHSSize = RHS.Expression.size();
514 unsigned ResultIdx = Expression.size();
515 for (unsigned i = 0; i < ResultIdx; ++i) {
516 // LHS results in true, whole expression results in true.
517 if (i + OrExpression[i].TJumpOffset == ResultIdx)
518 OrExpression[i].TJumpOffset += RHSSize;
519 }
520
521 OrExpression.append(RHS.Expression);
522
523 return Predicate(std::move(OrExpression));
524 }
525};
526
527// Initialize rules
530 : ST(&_ST), MRI(&_MRI) {
531
532 addRulesForGOpcs({G_ADD, G_SUB}, Standard)
533 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}})
534 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
535 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
536 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
538 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
539 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr64}})
540 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}});
541
542 addRulesForGOpcs({G_UADDO, G_USUBO}, Standard)
543 .Uni(S32, {{Sgpr32, Sgpr32Trunc}, {Sgpr32, Sgpr32}})
544 .Div(S32, {{Vgpr32, Vcc}, {Vgpr32, Vgpr32}});
545
546 addRulesForGOpcs({G_UADDE, G_USUBE, G_SADDE, G_SSUBE}, Standard)
548 .Div(S32, {{Vgpr32, Vcc}, {Vgpr32, Vgpr32, Vcc}});
549
550 addRulesForGOpcs({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT}, Standard)
551 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}})
552 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
553 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}})
554 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
556 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
557
558 bool HasVecMulU64 = ST->hasVMulU64Inst();
559 addRulesForGOpcs({G_MUL}, Standard)
560 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
561 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
562 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
563 .Uni(S64, {{SgprB64}, {SgprB64, SgprB64}})
565 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
566 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}})
567 .Div(S64, {{VgprB64}, {VgprB64, VgprB64}}, HasVecMulU64)
568 .Div(S64, {{VgprB64}, {VgprB64, VgprB64}, SplitTo32Mul}, !HasVecMulU64);
569
570 bool hasMulHi = ST->hasScalarMulHiInsts();
571 addRulesForGOpcs({G_UMULH, G_SMULH}, Standard)
572 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
573 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasMulHi)
574 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasMulHi);
575
576 addRulesForGOpcs({G_AMDGPU_MAD_U64_U32}, Standard)
577 .Div(S64, {{Vgpr64, Vcc}, {Vgpr32, Vgpr32, Vgpr64}})
579
580 bool HasScalarSMulU64 = ST->hasScalarSMulU64();
581 addRulesForGOpcs({G_AMDGPU_S_MUL_U64_U32, G_AMDGPU_S_MUL_I64_I32}, Standard)
582 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr64}, UniMul64}, HasScalarSMulU64)
583 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}, DivSMulToMAD});
584
585 addRulesForGOpcs({G_XOR, G_OR, G_AND}, StandardB)
587 .Any({{DivS1}, {{Vcc}, {Vcc, Vcc}}})
588 .Any({{UniS16}, {{Sgpr16}, {Sgpr16, Sgpr16}}})
589 .Any({{DivS16}, {{Vgpr16}, {Vgpr16, Vgpr16}}})
590 .Uni(B32, {{SgprB32}, {SgprB32, SgprB32}})
591 .Div(B32, {{VgprB32}, {VgprB32, VgprB32}})
592 .Uni(B64, {{SgprB64}, {SgprB64, SgprB64}})
593 .Div(B64, {{VgprB64}, {VgprB64, VgprB64}, SplitTo32});
594
595 addRulesForGOpcs({G_SHL}, Standard)
596 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32ZExt}})
597 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
599 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
600 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
601 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
602 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
603 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
604
605 addRulesForGOpcs({G_LSHR}, Standard)
606 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
607 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
609 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
610 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
611 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
612 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
613 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
614
615 addRulesForGOpcs({G_ASHR}, Standard)
616 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32ZExt}})
617 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
619 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
620 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
621 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
622 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
623 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
624
625 addRulesForGOpcs({G_FSHR}, Standard)
626 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
627 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
628
629 addRulesForGOpcs({G_BSWAP}, Standard)
630 .Uni(S16, {{UniInVgprS16}, {Vgpr16}})
631 .Div(S16, {{Vgpr16}, {Vgpr16}})
632 .Uni(S32, {{UniInVgprS32}, {Vgpr32}})
633 .Div(S32, {{Vgpr32}, {Vgpr32}})
634 .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16}})
635 .Div(V2S16, {{VgprV2S16}, {VgprV2S16}});
636
637 addRulesForGOpcs({G_AMDGPU_CVT_F32_UBYTE0, G_AMDGPU_CVT_F32_UBYTE1,
638 G_AMDGPU_CVT_F32_UBYTE2, G_AMDGPU_CVT_F32_UBYTE3,
639 G_AMDGPU_RCP_IFLAG},
640 Standard)
641 .Uni(S32, {{UniInVgprS32}, {Vgpr32}})
642 .Div(S32, {{Vgpr32}, {Vgpr32}});
643
644 addRulesForGOpcs({G_FRAME_INDEX}).Any({{UniP5, _}, {{SgprP5}, {None}}});
645
646 addRulesForGOpcs({G_UBFX, G_SBFX}, Standard)
647 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}, S_BFE})
648 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
649 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32, Sgpr32}, S_BFE})
650 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32, Vgpr32}, V_BFE});
651
652 addRulesForGOpcs({G_SMIN, G_SMAX}, Standard)
653 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32SExt}})
654 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
655 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
656 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
658 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
659
660 addRulesForGOpcs({G_UMIN, G_UMAX}, Standard)
661 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
662 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
663 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
664 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
666 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
667
668 addRulesForGOpcs({G_IMPLICIT_DEF})
669 .Any({{UniS1}, {{Sgpr32Trunc}, {}}})
670 .Any({{UniS16}, {{Sgpr16}, {}}})
671 .Any({{UniBRC}, {{SgprBRC}, {}}});
672
673 addRulesForGOpcs({G_CONSTANT}, Standard)
674 .Any({{UniS1, _}, {{Sgpr32Trunc}, {}, UniCstExt}})
675 .Uni(S16, {{Sgpr16}, {}})
676 .Uni(S32, {{Sgpr32}, {}})
677 .Uni(S64, {{Sgpr64}, {}})
678 .Any({{UniPtr32, _}, {{SgprPtr32}, {}}})
679 .Any({{UniPtr64, _}, {{SgprPtr64}, {}}});
680
681 addRulesForGOpcs({G_FCONSTANT}, Standard)
682 .Uni(S16, {{Sgpr16}, {}})
683 .Uni(S32, {{Sgpr32}, {}})
684 .Uni(S64, {{Sgpr64}, {}});
685
686 addRulesForGOpcs({G_FREEZE})
687 .Any({{UniS1}, {{Sgpr32Trunc}, {Sgpr32AExt}}})
688 .Any({{DivS1}, {{Vcc}, {Vcc}}})
689 .Any({{UniS16}, {{Sgpr16}, {Sgpr16}}})
690 .Any({{UniBRC}, {{SgprBRC}, {SgprBRC}}})
691 .Any({{DivBRC}, {{VgprBRC}, {VgprBRC}}});
692
693 addRulesForGOpcs({G_BITCAST})
694 .Any({{UniBRC}, {{SgprBRC}, {SgprBRC}}})
695 .Any({{DivBRC}, {{VgprBRC}, {VgprBRC}}});
696
697 addRulesForGOpcs({G_UNMERGE_VALUES})
698 .Any({{UniS16}, {{}, {}, UnmergeToShiftTrunc}})
699 .Any({{UniBRC}, {{}, {}, VerifyAllSgpr}})
700 .Any({{DivBRC}, {{}, {}, ApplyAllVgpr}});
701
702 addRulesForGOpcs({G_BUILD_VECTOR, G_MERGE_VALUES})
703 .Any({{UniBRC, S16}, {{}, {}, VerifyAllSgpr}})
704 .Any({{UniBRC, BRC}, {{}, {}, VerifyAllSgpr}})
705 .Any({{DivBRC, S16}, {{}, {}, ApplyAllVgpr}})
706 .Any({{DivBRC, BRC}, {{}, {}, ApplyAllVgpr}});
707
708 addRulesForGOpcs({G_CONCAT_VECTORS})
709 .Any({{UniBRC, BRC}, {{}, {}, VerifyAllSgpr}})
710 .Any({{DivBRC, BRC}, {{}, {}, ApplyAllVgpr}});
711
712 addRulesForGOpcs({G_PHI})
713 .Any({{UniS1}, {{}, {}, AextToS32InIncomingBlockGPHI}})
714 .Any({{UniS16}, {{}, {}, VerifyAllSgprGPHI}})
715 .Any({{UniBRC}, {{}, {}, VerifyAllSgprGPHI}})
716 .Any({{DivBRC}, {{}, {}, VerifyAllSgprOrVgprGPHI}});
717
718 addRulesForGOpcs({G_EXTRACT_VECTOR_ELT})
719 .Any({{UniB32, UniBRC, UniS32}, {{SgprB32}, {SgprBRC, Sgpr32}}})
720 .Any({{DivB32, DivBRC, UniS32}, {{VgprB32}, {VgprBRC, Sgpr32}}})
721 .Any({{DivB32, BRC, DivS32},
723 .Any({{UniB64, UniBRC, UniS32}, {{SgprB64}, {SgprBRC, Sgpr32}}})
724 .Any({{DivB64, DivBRC, UniS32},
726 .Any({{DivB64, BRC, DivS32},
728
729 addRulesForGOpcs({G_INSERT_VECTOR_ELT})
731 {{SgprBRC}, {SgprBRC, SgprB32, Sgpr32}}})
732 .Any(
733 {{DivBRC, BRC, B32, UniS32}, {{VgprBRC}, {VgprBRC, VgprB32, Sgpr32}}})
734 .Any({{DivBRC, BRC, B32, DivS32},
738 .Any({{DivBRC, BRC, B64, UniS32},
740 .Any({{DivBRC, BRC, B64, DivS32},
742
743 // INTERSECT_RAY {Div}, {{VgprDst...}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}}
744 // INTERSECT_RAY {Uni}, {{UniInVgprDst...}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}}
745 addRulesForGOpcs({G_AMDGPU_BVH_INTERSECT_RAY, G_AMDGPU_BVH_DUAL_INTERSECT_RAY,
746 G_AMDGPU_BVH8_INTERSECT_RAY})
747 .Any({{}, {{}, {}, ApplyBVH_INTERSECT_RAY}});
748
749 // LOAD {Div}, {{VgprDst...}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}}
750 // LOAD {Uni}, {{UniInVgprDst...}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}}
751 // LOAD_NORET {}, {{}, {Imm, VgprSrc, ..., Sgpr_WF_RsrcIdx}}
752 // STORE {}, {{}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}}
753 addRulesForGOpcs({G_AMDGPU_INTRIN_IMAGE_LOAD, G_AMDGPU_INTRIN_IMAGE_LOAD_D16,
754 G_AMDGPU_INTRIN_IMAGE_LOAD_NORET,
755 G_AMDGPU_INTRIN_IMAGE_STORE,
756 G_AMDGPU_INTRIN_IMAGE_STORE_D16})
757 .Any({{}, {{}, {}, ApplyINTRIN_IMAGE}});
758
759 Predicate isSignedICmp([](const MachineInstr &MI) -> bool {
760 auto Pred =
761 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
762 return CmpInst::isSigned(Pred);
763 });
764
765 Predicate isEqualityICmp([](const MachineInstr &MI) -> bool {
766 auto Pred =
767 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
768 return ICmpInst::isEquality(Pred);
769 });
770
771 bool HasScalarCompareEq64 = ST->hasScalarCompareEq64();
772 // clang-format off
773 addRulesForGOpcs({G_ICMP})
774 .Any({{{UniS1, _, S16}, isEqualityICmp}, {{Sgpr32Trunc}, {None, Sgpr32ZExt, Sgpr32ZExt}}})
775 .Any({{{UniS1, _, S16}, !isEqualityICmp && isSignedICmp}, {{Sgpr32Trunc}, {None, Sgpr32SExt, Sgpr32SExt}}})
776 .Any({{{UniS1, _, S16}, !isEqualityICmp && !isSignedICmp}, {{Sgpr32Trunc}, {None, Sgpr32ZExt, Sgpr32ZExt}}})
777 .Any({{{DivS1, _, S16}}, {{Vcc}, {None, Vgpr16, Vgpr16}}})
778 .Any({{{UniS1, _, S32}}, {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}})
779 .Any({{{DivS1, _, S32}}, {{Vcc}, {None, Vgpr32, Vgpr32}}})
780 .Any({{{UniS1, _, S64}, isEqualityICmp}, {{Sgpr32Trunc}, {None, Sgpr64, Sgpr64}}}, HasScalarCompareEq64)
781 .Any({{{UniS1, _, S64}, isEqualityICmp}, {{UniInVcc}, {None, Vgpr64, Vgpr64}}}, !HasScalarCompareEq64)
782 .Any({{{UniS1, _, S64}, !isEqualityICmp}, {{UniInVcc}, {None, Vgpr64, Vgpr64}}})
783 .Any({{{DivS1, _, S64}}, {{Vcc}, {None, Vgpr64, Vgpr64}}})
784 .Any({{{UniS1, _, Ptr32}}, {{Sgpr32Trunc}, {None, SgprPtr32, SgprPtr32}}})
785 .Any({{{DivS1, _, Ptr32}}, {{Vcc}, {None, VgprPtr32, VgprPtr32}}})
786 .Any({{{UniS1, _, Ptr64}, isEqualityICmp}, {{Sgpr32Trunc}, {None, SgprPtr64, SgprPtr64}}}, HasScalarCompareEq64)
787 .Any({{{UniS1, _, Ptr64}, isEqualityICmp}, {{UniInVcc}, {None, VgprPtr64, VgprPtr64}}}, !HasScalarCompareEq64)
788 .Any({{{UniS1, _, Ptr64}, !isEqualityICmp}, {{UniInVcc}, {None, VgprPtr64, VgprPtr64}}})
789 .Any({{{DivS1, _, Ptr64}}, {{Vcc}, {None, VgprPtr64, VgprPtr64}}});
790 // clang-format on
791
792 addRulesForGOpcs({G_BRCOND})
793 .Any({{UniS1}, {{}, {Sgpr32AExtBoolInReg}}})
794 .Any({{DivS1}, {{}, {Vcc}}});
795
796 addRulesForGOpcs({G_BR}).Any({{_}, {{}, {None}}});
797
798 addRulesForGOpcs({G_SELECT}, StandardB)
799 .Any({{DivS16}, {{Vgpr16}, {Vcc, Vgpr16, Vgpr16}}})
801 .Div(B32, {{VgprB32}, {Vcc, VgprB32, VgprB32}})
805
806 addRulesForGOpcs({G_ANYEXT})
807 .Any({{UniS16, S1}, {{None}, {None}}}) // should be combined away
808 .Any({{UniS32, S1}, {{None}, {None}}}) // should be combined away
809 .Any({{UniS64, S1}, {{None}, {None}}}) // should be combined away
810 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
811 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
812 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
813 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
814 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
815 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
816 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
817
818 bool Has16bitCmp = ST->has16BitInsts();
819
820 // In global-isel G_TRUNC in-reg is treated as no-op, inst selected into COPY.
821 // It is up to user to deal with truncated bits.
822 // S1, S16, S32 and S64 results are handled with specific rules. Remaining
823 // (result, source) pairs with valid register classes are covered by the
824 // generic UniBRC/DivBRC wildcard rules.
825 addRulesForGOpcs({G_TRUNC})
826 .Any({{UniS1, UniS16}, {{None}, {None}}}) // should be combined away
827 .Any({{UniS1, UniS32}, {{None}, {None}}}) // should be combined away
828 .Any({{UniS1, UniS64}, {{None}, {None}}}) // should be combined away
829 .Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}})
830 .Any({{UniBRC, UniBRC}, {{SgprBRC}, {SgprBRC}}})
831 .Any({{DivBRC, DivBRC}, {{VgprBRC}, {VgprBRC}}})
832 .Any({{UniV2S16, V2S32}, {{SgprV2S16}, {SgprV2S32}}})
833 .Any({{DivV2S16, V2S32}, {{VgprV2S16}, {VgprV2S32}}})
834 // This is non-trivial. VgprToVccCopy is done using compare instruction.
835 .Any({{DivS1, DivS16}, {{Vcc}, {Vgpr16}, VgprToVccCopy}}, Has16bitCmp)
837 !Has16bitCmp)
838 .Any({{DivS1, DivS32}, {{Vcc}, {Vgpr32}, VgprToVccCopy}})
839 .Any({{DivS1, DivS64}, {{Vcc}, {Vgpr64}, VgprToVccCopy}});
840
841 addRulesForGOpcs({G_ZEXT})
845 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
846 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
847 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
848 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
849 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
850 // not extending S16 to S32 is questionable.
851 .Any({{UniS64, S16}, {{Sgpr64}, {Sgpr32ZExt}, Ext32To64}})
852 .Any({{DivS64, S16}, {{Vgpr64}, {Vgpr32ZExt}, Ext32To64}})
853 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
854 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
855
856 addRulesForGOpcs({G_SEXT})
860 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
861 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
862 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
863 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
864 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
865 // not extending S16 to S32 is questionable.
866 .Any({{UniS64, S16}, {{Sgpr64}, {Sgpr32SExt}, Ext32To64}})
867 .Any({{DivS64, S16}, {{Vgpr64}, {Vgpr32SExt}, Ext32To64}})
868 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
869 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
870
871 addRulesForGOpcs({G_SEXT_INREG})
872 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}})
873 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
874 .Any({{UniS64, S64}, {{Sgpr64}, {Sgpr64}}})
876
877 addRulesForGOpcs({G_ASSERT_ZEXT, G_ASSERT_SEXT}, Standard)
878 .Uni(S32, {{Sgpr32}, {Sgpr32, Imm}})
879 .Div(S32, {{Vgpr32}, {Vgpr32, Imm}})
880 .Uni(S64, {{Sgpr64}, {Sgpr64, Imm}})
881 .Div(S64, {{Vgpr64}, {Vgpr64, Imm}});
882
883 addRulesForGOpcs({G_ASSERT_ALIGN}, Standard)
884 .Uni(S32, {{Sgpr32}, {Sgpr32}})
885 .Div(S32, {{Vgpr32}, {Vgpr32}})
886 .Uni(S64, {{Sgpr64}, {Sgpr64}})
887 .Div(S64, {{Vgpr64}, {Vgpr64}})
888 .Any({{UniPtr32}, {{SgprPtr32}, {SgprPtr32}}})
889 .Any({{DivPtr32}, {{VgprPtr32}, {VgprPtr32}}})
890 .Any({{UniPtr64}, {{SgprPtr64}, {SgprPtr64}}})
891 .Any({{DivPtr64}, {{VgprPtr64}, {VgprPtr64}}});
892
893 // Atomic read-modify-write operations: result and value are always VGPR,
894 // pointer varies by address space.
895 addRulesForGOpcs({G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_XCHG,
896 G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR,
897 G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN,
898 G_ATOMICRMW_UMAX, G_ATOMICRMW_UINC_WRAP,
899 G_ATOMICRMW_UDEC_WRAP, G_ATOMICRMW_FMIN, G_ATOMICRMW_FMAX})
900 .Any({{DivS32, P0, S32}, {{Vgpr32}, {VgprP0, Vgpr32}}})
901 .Any({{DivS64, P0, S64}, {{Vgpr64}, {VgprP0, Vgpr64}}})
902 .Any({{DivS32, P1, S32}, {{Vgpr32}, {VgprP1, Vgpr32}}})
903 .Any({{DivS64, P1, S64}, {{Vgpr64}, {VgprP1, Vgpr64}}})
904 .Any({{DivS32, P3, S32}, {{Vgpr32}, {VgprP3, Vgpr32}}})
905 .Any({{DivS64, P3, S64}, {{Vgpr64}, {VgprP3, Vgpr64}}});
906
907 addRulesForGOpcs({G_ATOMICRMW_USUB_SAT, G_ATOMICRMW_USUB_COND})
908 .Any({{DivS32, P0}, {{Vgpr32}, {VgprP0, Vgpr32}}})
909 .Any({{DivS32, P1}, {{Vgpr32}, {VgprP1, Vgpr32}}})
910 .Any({{DivS32, P3}, {{Vgpr32}, {VgprP3, Vgpr32}}});
911
912 bool HasAtomicFlatPkAdd16Insts = ST->hasAtomicFlatPkAdd16Insts();
913 bool HasAtomicBufferGlobalPkAddF16Insts =
914 ST->hasAtomicBufferGlobalPkAddF16NoRtnInsts() ||
915 ST->hasAtomicBufferGlobalPkAddF16Insts();
916 bool HasAtomicDsPkAdd16Insts = ST->hasAtomicDsPkAdd16Insts();
917 addRulesForGOpcs({G_ATOMICRMW_FADD})
918 .Any({{DivS32, P0, S32}, {{Vgpr32}, {VgprP0, Vgpr32}}})
919 .Any({{DivS64, P0, S64}, {{Vgpr64}, {VgprP0, Vgpr64}}})
920 .Any({{DivS32, P1, S32}, {{Vgpr32}, {VgprP1, Vgpr32}}})
921 .Any({{DivS64, P1, S64}, {{Vgpr64}, {VgprP1, Vgpr64}}})
922 .Any({{DivS32, P3, S32}, {{Vgpr32}, {VgprP3, Vgpr32}}})
923 .Any({{DivS64, P3, S64}, {{Vgpr64}, {VgprP3, Vgpr64}}})
924 .Any({{DivV2S16, P0, V2S16}, {{VgprV2S16}, {VgprP0, VgprV2S16}}},
925 HasAtomicFlatPkAdd16Insts)
926 .Any({{DivV2S16, P1, V2S16}, {{VgprV2S16}, {VgprP1, VgprV2S16}}},
927 HasAtomicBufferGlobalPkAddF16Insts)
928 .Any({{DivV2S16, P3, V2S16}, {{VgprV2S16}, {VgprP3, VgprV2S16}}},
929 HasAtomicDsPkAdd16Insts);
930
931 addRulesForGOpcs({G_ATOMIC_CMPXCHG})
932 .Any({{DivS32, P2}, {{Vgpr32}, {VgprP2, Vgpr32, Vgpr32}}})
933 .Any({{DivS64, P2}, {{Vgpr64}, {VgprP2, Vgpr64, Vgpr64}}})
934 .Any({{DivS32, P3}, {{Vgpr32}, {VgprP3, Vgpr32, Vgpr32}}})
935 .Any({{DivS64, P3}, {{Vgpr64}, {VgprP3, Vgpr64, Vgpr64}}});
936
937 addRulesForGOpcs({G_AMDGPU_ATOMIC_CMPXCHG})
938 .Any({{DivS32, P0}, {{Vgpr32}, {VgprP0, VgprV2S32}}})
939 .Any({{DivS32, P1}, {{Vgpr32}, {VgprP1, VgprV2S32}}})
940 .Any({{DivS64, P0}, {{Vgpr64}, {VgprP0, VgprV2S64}}})
941 .Any({{DivS64, P1}, {{Vgpr64}, {VgprP1, VgprV2S64}}});
942
943 addRulesForGOpcs({G_AMDGPU_BUFFER_ATOMIC_CMPSWAP}, Standard)
944 .Div(S32, {{Vgpr32},
946 .Div(S64, {{Vgpr64},
948
949 addRulesForGOpcs({G_AMDGPU_BUFFER_ATOMIC_ADD, G_AMDGPU_BUFFER_ATOMIC_AND,
950 G_AMDGPU_BUFFER_ATOMIC_DEC, G_AMDGPU_BUFFER_ATOMIC_FMAX,
951 G_AMDGPU_BUFFER_ATOMIC_FMIN, G_AMDGPU_BUFFER_ATOMIC_INC,
952 G_AMDGPU_BUFFER_ATOMIC_OR, G_AMDGPU_BUFFER_ATOMIC_SMAX,
953 G_AMDGPU_BUFFER_ATOMIC_SMIN, G_AMDGPU_BUFFER_ATOMIC_SUB,
954 G_AMDGPU_BUFFER_ATOMIC_SWAP, G_AMDGPU_BUFFER_ATOMIC_UMAX,
955 G_AMDGPU_BUFFER_ATOMIC_UMIN, G_AMDGPU_BUFFER_ATOMIC_XOR},
956 Standard)
959
960 bool hasSMRDx3 = ST->hasScalarDwordx3Loads();
961 bool hasSMRDSmall = ST->hasScalarSubwordLoads();
962 bool usesTrue16 = ST->useRealTrue16Insts();
963
964 Predicate isAlign16([](const MachineInstr &MI) -> bool {
965 return (*MI.memoperands_begin())->getAlign() >= Align(16);
966 });
967
968 Predicate isAlign4([](const MachineInstr &MI) -> bool {
969 return (*MI.memoperands_begin())->getAlign() >= Align(4);
970 });
971
972 Predicate isAtomicMMO([](const MachineInstr &MI) -> bool {
973 return (*MI.memoperands_begin())->isAtomic();
974 });
975
976 Predicate isUniMMO([](const MachineInstr &MI) -> bool {
977 return AMDGPU::isUniformMMO(*MI.memoperands_begin());
978 });
979
980 Predicate isConst([](const MachineInstr &MI) -> bool {
981 // Address space in MMO be different then address space on pointer.
982 const MachineMemOperand *MMO = *MI.memoperands_begin();
983 const unsigned AS = MMO->getAddrSpace();
984 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
986 });
987
988 Predicate isVolatileMMO([](const MachineInstr &MI) -> bool {
989 return (*MI.memoperands_begin())->isVolatile();
990 });
991
992 Predicate isInvMMO([](const MachineInstr &MI) -> bool {
993 return (*MI.memoperands_begin())->isInvariant();
994 });
995
996 Predicate isNoClobberMMO([](const MachineInstr &MI) -> bool {
997 return (*MI.memoperands_begin())->getFlags() & MONoClobber;
998 });
999
1000 Predicate isNaturalAligned([](const MachineInstr &MI) -> bool {
1001 const MachineMemOperand *MMO = *MI.memoperands_begin();
1002 return MMO->getAlign() >= Align(MMO->getSize().getValue());
1003 });
1004
1005 Predicate is8Or16BitMMO([](const MachineInstr &MI) -> bool {
1006 const MachineMemOperand *MMO = *MI.memoperands_begin();
1007 const unsigned MemSize = 8 * MMO->getSize().getValue();
1008 return MemSize == 16 || MemSize == 8;
1009 });
1010
1011 Predicate is32BitMMO([](const MachineInstr &MI) -> bool {
1012 const MachineMemOperand *MMO = *MI.memoperands_begin();
1013 return 8 * MMO->getSize().getValue() == 32;
1014 });
1015
1016 auto isUL = !isAtomicMMO && isUniMMO && (isConst || !isVolatileMMO) &&
1017 (isConst || isInvMMO || isNoClobberMMO);
1018
1019 // clang-format off
1020 // TODO: S32Dst, 16-bit any-extending load should not appear on True16 targets
1021 addRulesForGOpcs({G_LOAD})
1022 // flat, addrspace(0), never uniform - flat_load
1023 .Any({{DivS16, P0}, {{Vgpr16}, {VgprP0}}}, usesTrue16)
1024 .Any({{DivB32, P0}, {{VgprB32}, {VgprP0}}}) // 32-bit load, 8-bit and 16-bit any-extending load
1025 .Any({{DivB64, P0}, {{VgprB64}, {VgprP0}}})
1026 .Any({{DivB96, P0}, {{VgprB96}, {VgprP0}}})
1027 .Any({{DivB128, P0}, {{VgprB128}, {VgprP0}}})
1028
1029 // global, addrspace(1)
1030 // divergent - global_load
1031 .Any({{DivS16, P1}, {{Vgpr16}, {VgprP1}}}, usesTrue16)
1032 .Any({{DivB32, P1}, {{VgprB32}, {VgprP1}}}) //32-bit load, 8-bit and 16-bit any-extending load
1033 .Any({{DivB64, P1}, {{VgprB64}, {VgprP1}}})
1034 .Any({{DivB96, P1}, {{VgprB96}, {VgprP1}}})
1035 .Any({{DivB128, P1}, {{VgprB128}, {VgprP1}}})
1036 .Any({{DivB256, P1}, {{VgprB256}, {VgprP1}, SplitLoad}})
1037 .Any({{DivB512, P1}, {{VgprB512}, {VgprP1}, SplitLoad}})
1038
1039 // uniform - s_load
1040 .Any({{{UniS16, P1}, isNaturalAligned && isUL}, {{Sgpr32Trunc}, {SgprP1}}}, usesTrue16 && hasSMRDSmall) // s16 load
1041 .Any({{{UniS16, P1}, isAlign4 && isUL}, {{Sgpr32Trunc}, {SgprP1}, WidenMMOToS32}}, usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
1042 .Any({{{UniB32, P1}, isNaturalAligned && isUL}, {{SgprB32}, {SgprP1}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
1043 // TODO: SplitLoad when !isNaturalAligned && isUL and target hasSMRDSmall
1044 .Any({{{UniB32, P1}, is8Or16BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP1}, WidenMMOToS32}}, !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
1045 .Any({{{UniB32, P1}, is32BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP1}}}) //32-bit load
1046 .Any({{{UniB64, P1}, isAlign4 && isUL}, {{SgprB64}, {SgprP1}}})
1047 .Any({{{UniB96, P1}, isAlign16 && isUL}, {{SgprB96}, {SgprP1}, WidenLoad}}, !hasSMRDx3)
1048 .Any({{{UniB96, P1}, isAlign4 && !isAlign16 && isUL}, {{SgprB96}, {SgprP1}, SplitLoad}}, !hasSMRDx3)
1049 .Any({{{UniB96, P1}, isAlign4 && isUL}, {{SgprB96}, {SgprP1}}}, hasSMRDx3)
1050 .Any({{{UniB128, P1}, isAlign4 && isUL}, {{SgprB128}, {SgprP1}}})
1051 .Any({{{UniB256, P1}, isAlign4 && isUL}, {{SgprB256}, {SgprP1}}})
1052 .Any({{{UniB512, P1}, isAlign4 && isUL}, {{SgprB512}, {SgprP1}}})
1053
1054 // Uniform via global or buffer load, for example volatile or non-aligned
1055 // uniform load. Not using standard {{UniInVgprTy}, {VgprP1}} since it is
1056 // selected as global_load, use SgprP1 for pointer instead to match
1057 // patterns without flat-for-global, default for GFX7 and older.
1058 // -> +flat-for-global + {{UniInVgprTy}, {SgprP1}} - global_load
1059 // -> -flat-for-global + {{UniInVgprTy}, {SgprP1}} - buffer_load
1060 .Any({{{UniS16, P1}, !isNaturalAligned || !isUL}, {{UniInVgprS16}, {SgprP1}}}, usesTrue16 && hasSMRDSmall) // s16 load
1061 .Any({{{UniS16, P1}, !isAlign4 || !isUL}, {{UniInVgprS16}, {SgprP1}}}, usesTrue16 && !hasSMRDSmall) // s16 load
1062 .Any({{{UniB32, P1}, !isNaturalAligned || !isUL}, {{UniInVgprB32}, {SgprP1}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
1063 .Any({{{UniB32, P1}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP1}}}, !hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
1064 .Any({{{UniB64, P1}, !isAlign4 || !isUL}, {{UniInVgprB64}, {SgprP1}}})
1065 .Any({{{UniB96, P1}, !isAlign4 || !isUL}, {{UniInVgprB96}, {SgprP1}}})
1066 .Any({{{UniB128, P1}, !isAlign4 || !isUL}, {{UniInVgprB128}, {SgprP1}}})
1067 .Any({{{UniB256, P1}, !isAlign4 || !isUL}, {{UniInVgprB256}, {SgprP1}, SplitLoad}})
1068 .Any({{{UniB512, P1}, !isAlign4 || !isUL}, {{UniInVgprB512}, {SgprP1}, SplitLoad}})
1069
1070 // local, addrspace(3) - ds_load
1071 .Any({{DivS16, P3}, {{Vgpr16}, {VgprP3}}}, usesTrue16)
1072 .Any({{DivB32, P3}, {{VgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
1073 .Any({{DivB64, P3}, {{VgprB64}, {VgprP3}}})
1074 .Any({{DivB96, P3}, {{VgprB96}, {VgprP3}}})
1075 .Any({{DivB128, P3}, {{VgprB128}, {VgprP3}}})
1076
1077 .Any({{UniS16, P3}, {{UniInVgprS16}, {SgprP3}}}, usesTrue16) // 16-bit load
1078 .Any({{UniB32, P3}, {{UniInVgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
1079 .Any({{UniB64, P3}, {{UniInVgprB64}, {VgprP3}}})
1080 .Any({{UniB96, P3}, {{UniInVgprB96}, {VgprP3}}})
1081 .Any({{UniB128, P3}, {{UniInVgprB128}, {VgprP3}}})
1082
1083 // constant, addrspace(4)
1084 // divergent - global_load
1085 .Any({{DivS16, P4}, {{Vgpr16}, {VgprP4}}}, usesTrue16)
1086 .Any({{DivB32, P4}, {{VgprB32}, {VgprP4}}}) //32-bit load, 8-bit and 16-bit any-extending load
1087 .Any({{DivB64, P4}, {{VgprB64}, {VgprP4}}})
1088 .Any({{DivB96, P4}, {{VgprB96}, {VgprP4}}})
1089 .Any({{DivB128, P4}, {{VgprB128}, {VgprP4}}})
1090 .Any({{DivB256, P4}, {{VgprB256}, {VgprP4}, SplitLoad}})
1091 .Any({{DivB512, P4}, {{VgprB512}, {VgprP4}, SplitLoad}})
1092
1093 // uniform - s_load
1094 .Any({{{UniS16, P4}, isNaturalAligned && isUL}, {{Sgpr32Trunc}, {SgprP4}}}, usesTrue16 && hasSMRDSmall) // s16 load
1095 .Any({{{UniS16, P4}, isAlign4 && isUL}, {{Sgpr32Trunc}, {SgprP4}, WidenMMOToS32}}, usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
1096 .Any({{{UniB32, P4}, isNaturalAligned && isUL}, {{SgprB32}, {SgprP4}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
1097 .Any({{{UniB32, P4}, is8Or16BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP4}, WidenMMOToS32}}, !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
1098 .Any({{{UniB32, P4}, is32BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP4}}}) //32-bit load
1099 .Any({{{UniB64, P4}, isAlign4 && isUL}, {{SgprB64}, {SgprP4}}})
1100 .Any({{{UniB96, P4}, isAlign16 && isUL}, {{SgprB96}, {SgprP4}, WidenLoad}}, !hasSMRDx3)
1101 .Any({{{UniB96, P4}, isAlign4 && !isAlign16 && isUL}, {{SgprB96}, {SgprP4}, SplitLoad}}, !hasSMRDx3)
1102 .Any({{{UniB96, P4}, isAlign4 && isUL}, {{SgprB96}, {SgprP4}}}, hasSMRDx3)
1103 .Any({{{UniB128, P4}, isAlign4 && isUL}, {{SgprB128}, {SgprP4}}})
1104 .Any({{{UniB256, P4}, isAlign4 && isUL}, {{SgprB256}, {SgprP4}}})
1105 .Any({{{UniB512, P4}, isAlign4 && isUL}, {{SgprB512}, {SgprP4}}})
1106
1107 // uniform in vgpr - global_load or buffer_load
1108 .Any({{{UniS16, P4}, !isNaturalAligned || !isUL}, {{UniInVgprS16}, {SgprP4}}}, usesTrue16 && hasSMRDSmall) // s16 load
1109 .Any({{{UniS16, P4}, !isAlign4 || !isUL}, {{UniInVgprS16}, {SgprP4}}}, usesTrue16 && !hasSMRDSmall) // s16 load
1110 .Any({{{UniB32, P4}, !isNaturalAligned || !isUL}, {{UniInVgprB32}, {SgprP4}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
1111 .Any({{{UniB32, P4}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP4}}}, !hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
1112 .Any({{{UniB64, P4}, !isAlign4 || !isUL}, {{UniInVgprB64}, {SgprP4}}})
1113 .Any({{{UniB96, P4}, !isAlign4 || !isUL}, {{UniInVgprB96}, {SgprP4}}})
1114 .Any({{{UniB128, P4}, !isAlign4 || !isUL}, {{UniInVgprB128}, {SgprP4}}})
1115 .Any({{{UniB256, P4}, !isAlign4 || !isUL}, {{UniInVgprB256}, {SgprP4}, SplitLoad}})
1116 .Any({{{UniB512, P4}, !isAlign4 || !isUL}, {{UniInVgprB512}, {SgprP4}, SplitLoad}})
1117
1118 // private, addrspace(5), never uniform - scratch_load
1119 .Any({{DivS16, P5}, {{Vgpr16}, {VgprP5}}}, usesTrue16)
1120 .Any({{DivB32, P5}, {{VgprB32}, {VgprP5}}}) // 32-bit load, 8-bit and 16-bit any-extending load
1121 .Any({{DivB64, P5}, {{VgprB64}, {VgprP5}}})
1122 .Any({{DivB96, P5}, {{VgprB96}, {VgprP5}}})
1123 .Any({{DivB128, P5}, {{VgprB128}, {VgprP5}}})
1124
1125 .Any({{DivS32, Ptr128}, {{Vgpr32}, {VgprPtr128}}});
1126
1127
1128 addRulesForGOpcs({G_ZEXTLOAD, G_SEXTLOAD}) // i8 and i16 zeroextending loads
1129 .Any({{DivS32, P0}, {{Vgpr32}, {VgprP0}}})
1130 .Any({{DivS16, P0}, {{Vgpr16}, {VgprP0}}}, usesTrue16)
1131
1132 .Any({{DivS32, P1}, {{Vgpr32}, {VgprP1}}})
1133 .Any({{DivS16, P1}, {{Vgpr16}, {VgprP1}}}, usesTrue16)
1134 .Any({{{UniS32, P1}, isAlign4 && isUL}, {{Sgpr32}, {SgprP1}, WidenMMOToS32}}, !hasSMRDSmall)
1135 .Any({{{UniS32, P1}, isNaturalAligned && isUL}, {{Sgpr32}, {SgprP1}}}, hasSMRDSmall)
1136 .Any({{{UniS32, P1}, !isAlign4 || !isUL}, {{UniInVgprS32}, {SgprP1}}}, !hasSMRDSmall)
1137 .Any({{{UniS32, P1}, !isNaturalAligned || !isUL}, {{UniInVgprS32}, {SgprP1}}}, hasSMRDSmall)
1138
1139 .Any({{DivS32, P3}, {{Vgpr32}, {VgprP3}}})
1140 .Any({{DivS16, P3}, {{Vgpr16}, {VgprP3}}}, usesTrue16)
1141 .Any({{UniS32, P3}, {{UniInVgprS32}, {VgprP3}}})
1142
1143 .Any({{DivS32, P4}, {{Vgpr32}, {VgprP4}}})
1144 .Any({{DivS16, P4}, {{Vgpr16}, {VgprP4}}}, usesTrue16)
1145 .Any({{{UniS32, P4}, isAlign4 && isUL}, {{Sgpr32}, {SgprP4}, WidenMMOToS32}}, !hasSMRDSmall)
1146 .Any({{{UniS32, P4}, isNaturalAligned && isUL}, {{Sgpr32}, {SgprP4}}}, hasSMRDSmall)
1147 .Any({{{UniS32, P4}, !isAlign4 || !isUL}, {{UniInVgprS32}, {SgprP4}}}, !hasSMRDSmall)
1148 .Any({{{UniS32, P4}, !isNaturalAligned || !isUL}, {{UniInVgprS32}, {SgprP4}}}, hasSMRDSmall)
1149
1150 .Any({{DivS32, P5}, {{Vgpr32}, {VgprP5}}})
1151 .Any({{DivS16, P5}, {{Vgpr16}, {VgprP5}}}, usesTrue16);
1152
1153 addRulesForGOpcs({G_STORE})
1154 // addrspace(0)
1155 .Any({{S16, P0}, {{}, {Vgpr16, VgprP0}}}, usesTrue16) // 16-bit store
1156 .Any({{B32, P0}, {{}, {VgprB32, VgprP0}}}) // 32-bit store, 8-bit and 16-bit truncating store
1157 .Any({{B64, P0}, {{}, {VgprB64, VgprP0}}})
1158 .Any({{B96, P0}, {{}, {VgprB96, VgprP0}}})
1159 .Any({{B128, P0}, {{}, {VgprB128, VgprP0}}})
1160
1161 // addrspace(1), there are no stores to addrspace(4)
1162 // For targets:
1163 // - with "+flat-for-global" - global_store
1164 // - without(-flat-for-global) - buffer_store addr64
1165 .Any({{S16, DivP1}, {{}, {Vgpr16, VgprP1}}}, usesTrue16) // 16-bit store
1166 .Any({{B32, DivP1}, {{}, {VgprB32, VgprP1}}}) // 32-bit store, 8-bit and 16-bit truncating store
1167 .Any({{B64, DivP1}, {{}, {VgprB64, VgprP1}}})
1168 .Any({{B96, DivP1}, {{}, {VgprB96, VgprP1}}})
1169 .Any({{B128, DivP1}, {{}, {VgprB128, VgprP1}}})
1170
1171 // For UniP1, use sgpr ptr to match flat-for-global patterns. Targets:
1172 // - with "+flat-for-global" - global_store for both sgpr and vgpr ptr
1173 // - without(-flat-for-global) - need sgpr ptr to select buffer_store
1174 .Any({{S16, UniP1}, {{}, {Vgpr16, SgprP1}}}, usesTrue16) // 16-bit store
1175 .Any({{B32, UniP1}, {{}, {VgprB32, SgprP1}}}) // 32-bit store, 8-bit and 16-bit truncating store
1176 .Any({{B64, UniP1}, {{}, {VgprB64, SgprP1}}})
1177 .Any({{B96, UniP1}, {{}, {VgprB96, SgprP1}}})
1178 .Any({{B128, UniP1}, {{}, {VgprB128, SgprP1}}})
1179
1180 // addrspace(3) and addrspace(5)
1181 .Any({{S16, Ptr32}, {{}, {Vgpr16, VgprPtr32}}}, usesTrue16) // 16-bit store
1182 .Any({{B32, Ptr32}, {{}, {VgprB32, VgprPtr32}}}) // 32-bit store, 8-bit and 16-bit truncating store
1183 .Any({{B64, Ptr32}, {{}, {VgprB64, VgprPtr32}}})
1184 .Any({{B96, Ptr32}, {{}, {VgprB96, VgprPtr32}}})
1185 .Any({{B128, Ptr32}, {{}, {VgprB128, VgprPtr32}}});
1186
1187 // clang-format on
1188
1189 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD, G_AMDGPU_BUFFER_LOAD_FORMAT,
1190 G_AMDGPU_TBUFFER_LOAD_FORMAT},
1191 StandardB)
1200
1201 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD_USHORT, G_AMDGPU_BUFFER_LOAD_UBYTE,
1202 G_AMDGPU_BUFFER_LOAD_SSHORT, G_AMDGPU_BUFFER_LOAD_SBYTE},
1203 StandardB)
1206
1207 addRulesForGOpcs(
1208 {G_AMDGPU_BUFFER_LOAD_UBYTE_TFE, G_AMDGPU_BUFFER_LOAD_USHORT_TFE},
1209 StandardB)
1212
1213 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD_TFE, G_AMDGPU_BUFFER_LOAD_FORMAT_TFE},
1214 StandardB)
1222 .Any({{UniB160},
1224
1225 addRulesForGOpcs(
1226 {G_AMDGPU_BUFFER_LOAD_FORMAT_D16, G_AMDGPU_TBUFFER_LOAD_FORMAT_D16},
1227 StandardB)
1234
1235 addRulesForGOpcs({G_AMDGPU_BUFFER_STORE, G_AMDGPU_BUFFER_STORE_BYTE,
1236 G_AMDGPU_BUFFER_STORE_SHORT, G_AMDGPU_BUFFER_STORE_FORMAT,
1237 G_AMDGPU_BUFFER_STORE_FORMAT_D16,
1238 G_AMDGPU_TBUFFER_STORE_FORMAT,
1239 G_AMDGPU_TBUFFER_STORE_FORMAT_D16})
1240 .Any({{B32}, {{}, {VgprB32, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1241 .Any({{B64}, {{}, {VgprB64, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1242 .Any({{B96}, {{}, {VgprB96, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1243 .Any({{B128}, {{}, {VgprB128, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}});
1244
1245 // Buffer atomics: resource descriptor + scalar offset are SGPR, data and
1246 // address components are VGPR.
1247 //
1248 // Operand order (SIInstructions.td BufferAtomicGenericInstruction):
1249 // dst = op vdata, rsrc, vindex, voffset, soffset, offset_imm, cachepolicy,
1250 // idxen_imm
1251 addRulesForGOpcs({G_AMDGPU_BUFFER_ATOMIC_FADD})
1252 .Any({{S32, S32, V4S32, S32, S32, S32},
1254 .Any({{S64, S64, V4S32, S32, S32, S32},
1256 .Any({{V2S16, V2S16, V4S32, S32, S32, S32},
1257 {{VgprV2S16},
1259
1260 addRulesForGOpcs({G_PTR_ADD})
1261 .Any({{UniPtr32}, {{SgprPtr32}, {SgprPtr32, Sgpr32}}})
1262 .Any({{DivPtr32}, {{VgprPtr32}, {VgprPtr32, Vgpr32}}})
1263 .Any({{UniPtr64}, {{SgprPtr64}, {SgprPtr64, Sgpr64}}})
1264 .Any({{DivPtr64}, {{VgprPtr64}, {VgprPtr64, Vgpr64}}});
1265
1266 addRulesForGOpcs({G_INTTOPTR})
1267 .Any({{UniPtr32}, {{SgprPtr32}, {Sgpr32}}})
1268 .Any({{DivPtr32}, {{VgprPtr32}, {Vgpr32}}})
1269 .Any({{UniPtr64}, {{SgprPtr64}, {Sgpr64}}})
1270 .Any({{DivPtr64}, {{VgprPtr64}, {Vgpr64}}})
1271 .Any({{UniPtr128}, {{SgprPtr128}, {Sgpr128}}})
1272 .Any({{DivPtr128}, {{VgprPtr128}, {Vgpr128}}});
1273
1274 addRulesForGOpcs({G_PTRTOINT})
1275 .Any({{UniS32}, {{Sgpr32}, {SgprPtr32}}})
1276 .Any({{DivS32}, {{Vgpr32}, {VgprPtr32}}})
1277 .Any({{UniS64}, {{Sgpr64}, {SgprPtr64}}})
1278 .Any({{DivS64}, {{Vgpr64}, {VgprPtr64}}})
1279 .Any({{UniS128}, {{Sgpr128}, {SgprPtr128}}})
1280 .Any({{DivS128}, {{Vgpr128}, {VgprPtr128}}});
1281
1282 // FIXME: Update llvm/test/CodeGen/AMDGPU/ptrmask.ll to use GlobalISel.
1283 // Currently crashes on P8 (buffer resource) tests due to legalizer issue.
1284 addRulesForGOpcs({G_PTRMASK})
1285 .Any({{UniP1}, {{SgprP1}, {SgprP1, Sgpr64}}})
1286 .Any({{DivP1}, {{VgprP1}, {VgprP1, Vgpr64}}})
1287 .Any({{UniP3}, {{SgprP3}, {SgprP3, Sgpr32}}})
1288 .Any({{DivP3}, {{VgprP3}, {VgprP3, Vgpr32}}});
1289
1290 addRulesForGOpcs({G_ABS}, Standard)
1291 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt}})
1292 .Div(S16, {{Vgpr16}, {Vgpr16}, AbsToNegMax})
1293 .Uni(S32, {{Sgpr32}, {Sgpr32}})
1294 .Div(S32, {{Vgpr32}, {Vgpr32}, AbsToNegMax})
1295 .Uni(V2S16, {{SgprV2S16}, {SgprV2S16}, AbsToS32})
1296 .Div(V2S16, {{VgprV2S16}, {VgprV2S16}, AbsToNegMax});
1297
1298 addRulesForGOpcs({G_BITREVERSE}, Standard)
1299 .Uni(S32, {{Sgpr32}, {Sgpr32}})
1300 .Div(S32, {{Vgpr32}, {Vgpr32}})
1301 .Uni(S64, {{Sgpr64}, {Sgpr64}})
1302 .Div(S64, {{Vgpr64}, {Vgpr64}});
1303
1304 addRulesForGOpcs({G_AMDGPU_FFBH_U32, G_AMDGPU_FFBL_B32, G_CTLZ_ZERO_POISON,
1305 G_CTTZ_ZERO_POISON})
1306 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}})
1307 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
1308 .Any({{UniS32, S64}, {{Sgpr32}, {Sgpr64}}})
1310
1311 addRulesForGOpcs({G_CTPOP})
1312 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}})
1313 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
1314 .Any({{UniS32, S64}, {{Sgpr32}, {Sgpr64}}})
1315 .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}, CtPop64To32}});
1316
1317 addRulesForGOpcs({G_FENCE}).Any({{{}}, {{}, {}}});
1318
1319 addRulesForGOpcs({G_READSTEADYCOUNTER, G_READCYCLECOUNTER}, Standard)
1320 .Uni(S64, {{Sgpr64}, {}});
1321
1322 addRulesForGOpcs({G_BLOCK_ADDR}).Any({{UniP0}, {{SgprP0}, {}}});
1323
1324 addRulesForGOpcs({G_GLOBAL_VALUE})
1325 .Any({{UniP0}, {{SgprP0}, {}}})
1326 .Any({{UniP1}, {{SgprP1}, {}}})
1327 .Any({{UniP3}, {{SgprP3}, {}}})
1328 .Any({{UniP4}, {{SgprP4}, {}}})
1329 .Any({{UniP8}, {{SgprP8}, {}}});
1330
1331 addRulesForGOpcs({G_AMDGPU_WAVE_ADDRESS}).Any({{UniP5}, {{SgprP5}, {}}});
1332
1333 addRulesForGOpcs({G_SI_CALL})
1334 .Any({{_, UniP0}, {{None}, {SgprP0}}})
1335 .Any({{_, DivP0}, {{None}, {SgprP0Call_WF}}})
1336 .Any({{_, UniP4}, {{None}, {SgprP4}}})
1337 .Any({{_, DivP4}, {{None}, {SgprP4Call_WF}}});
1338
1339 bool hasSALUFloat = ST->hasSALUFloatInsts();
1340
1341 addRulesForGOpcs({G_FADD, G_FMUL, G_STRICT_FADD, G_STRICT_FMUL}, Standard)
1342 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUFloat)
1343 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUFloat)
1344 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
1345 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUFloat)
1346 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUFloat)
1347 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
1348 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64}})
1349 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}})
1350 .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}}, !hasSALUFloat)
1352 hasSALUFloat)
1353 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
1354
1355 addRulesForGOpcs({G_FSUB, G_STRICT_FSUB}, Standard)
1356 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
1357 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
1358 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUFloat)
1359 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUFloat)
1360 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUFloat)
1361 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUFloat);
1362
1363 addRulesForGOpcs({G_FMAD}, Standard)
1364 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}})
1365 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
1366 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
1367 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
1368
1369 addRulesForGOpcs({G_FLDEXP, G_STRICT_FLDEXP}, Standard)
1370 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}})
1371 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
1372 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}})
1373 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
1374 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr32}})
1375 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
1376
1377 addRulesForGOpcs({G_FMA, G_STRICT_FMA}, Standard)
1378 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
1379 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
1380 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64, Vgpr64}})
1381 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64, Vgpr64}})
1385 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16, Sgpr16}}, hasSALUFloat)
1386 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}}, !hasSALUFloat)
1387 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}}, hasSALUFloat)
1388 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}}, !hasSALUFloat)
1389 .Uni(V2S16,
1391 hasSALUFloat)
1393 !hasSALUFloat);
1394
1395 addRulesForGOpcs({G_AMDGPU_FMED3}, Standard)
1396 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}})
1397 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
1398 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
1399 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
1400
1401 // TODO: This opcode is generated from the i64->i16 signed clamped pattern in
1402 // the PreLegalizerCombiner. Move the combine to RegBankCombiner to keep more
1403 // instructions on SALU.
1404 addRulesForGOpcs({G_AMDGPU_SMED3}, Standard)
1405 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
1406 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
1407
1408 // FNEG and FABS are either folded as source modifiers or can be selected as
1409 // bitwise XOR and AND with Mask. XOR and AND are available on SALU but for
1410 // targets without SALU float we still select them as VGPR since there would
1411 // be no real sgpr use.
1412 addRulesForGOpcs({G_FNEG, G_FABS}, Standard)
1413 .Uni(S16, {{UniInVgprS16}, {Vgpr16}}, !hasSALUFloat)
1414 .Uni(S16, {{Sgpr16}, {Sgpr16}}, hasSALUFloat)
1415 .Div(S16, {{Vgpr16}, {Vgpr16}})
1416 .Uni(S32, {{UniInVgprS32}, {Vgpr32}}, !hasSALUFloat)
1417 .Uni(S32, {{Sgpr32}, {Sgpr32}}, hasSALUFloat)
1418 .Div(S32, {{Vgpr32}, {Vgpr32}})
1419 .Uni(S64, {{UniInVgprS64}, {Vgpr64}})
1420 .Div(S64, {{Vgpr64}, {Vgpr64}})
1421 .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16}}, !hasSALUFloat)
1422 .Uni(V2S16, {{SgprV2S16}, {SgprV2S16}, ScalarizeToS16}, hasSALUFloat)
1423 .Div(V2S16, {{VgprV2S16}, {VgprV2S16}})
1424 .Any({{UniV2S32}, {{UniInVgprV2S32}, {VgprV2S32}}})
1425 .Any({{DivV2S32}, {{VgprV2S32}, {VgprV2S32}}});
1426
1427 addRulesForGOpcs({G_FCANONICALIZE}, Standard)
1428 .Uni(S32, {{UniInVgprS32}, {Vgpr32}})
1429 .Div(S32, {{Vgpr32}, {Vgpr32}})
1430 .Uni(S16, {{UniInVgprS16}, {Vgpr16}})
1431 .Div(S16, {{Vgpr16}, {Vgpr16}})
1432 .Uni(S64, {{UniInVgprS64}, {Vgpr64}})
1433 .Div(S64, {{Vgpr64}, {Vgpr64}})
1434 .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16}})
1435 .Div(V2S16, {{VgprV2S16}, {VgprV2S16}})
1436 .Any({{UniV2S32}, {{UniInVgprV2S32}, {VgprV2S32}}})
1437 .Any({{DivV2S32}, {{VgprV2S32}, {VgprV2S32}}});
1438
1439 bool hasPST = ST->hasPseudoScalarTrans();
1440 addRulesForGOpcs({G_FSQRT}, Standard)
1441 .Div(S16, {{Vgpr16}, {Vgpr16}})
1442 .Uni(S16, {{Sgpr16}, {Sgpr16}}, hasPST)
1443 .Uni(S16, {{UniInVgprS16}, {Vgpr16}}, !hasPST);
1444
1445 addRulesForGOpcs({G_FPTOUI, G_FPTOSI, G_FPTOUI_SAT, G_FPTOSI_SAT})
1446 .Any({{UniS16, S16}, {{UniInVgprS16}, {Vgpr16}}})
1447 .Any({{DivS16, S16}, {{Vgpr16}, {Vgpr16}}})
1448 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}}, hasSALUFloat)
1449 .Any({{UniS32, S16}, {{UniInVgprS32}, {Vgpr16}}}, !hasSALUFloat)
1450 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}})
1451 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
1452 .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat)
1453 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
1454 .Any({{UniS32, S64}, {{UniInVgprS32}, {Vgpr64}}})
1455 .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}});
1456
1457 addRulesForGOpcs({G_UITOFP, G_SITOFP})
1458 .Any({{UniS16, S16}, {{UniInVgprS16}, {Vgpr16}}})
1459 .Any({{DivS16, S16}, {{Vgpr16}, {Vgpr16}}})
1460 .Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}}, hasSALUFloat)
1461 .Any({{UniS16, S32}, {{UniInVgprS16}, {Vgpr32}}}, !hasSALUFloat)
1462 .Any({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}})
1463 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
1464 .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat)
1465 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
1466 .Any({{UniS64, S32}, {{UniInVgprS64}, {Vgpr32}}})
1467 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}}});
1468
1469 addRulesForGOpcs({G_AMDGPU_S_BUFFER_PREFETCH})
1471
1472 addRulesForGOpcs({G_FPEXT})
1473 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}})
1474 .Any({{UniS64, S32}, {{UniInVgprS64}, {Vgpr32}}})
1475 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}}})
1476 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}}, hasSALUFloat)
1477 .Any({{UniS32, S16}, {{UniInVgprS32}, {Vgpr16}}}, !hasSALUFloat);
1478
1479 addRulesForGOpcs({G_AMDGPU_CVT_PK_I16_I32}, Standard)
1480 .Uni(V2S16, {{UniInVgprV2S16}, {Vgpr32, Vgpr32}})
1481 .Div(V2S16, {{VgprV2S16}, {Vgpr32, Vgpr32}});
1482
1483 addRulesForGOpcs({G_AMDGPU_FMIN_LEGACY, G_AMDGPU_FMAX_LEGACY}, Standard)
1484 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}})
1485 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}});
1486
1487 bool hasSALUMinimumMaximumInsts = ST->hasSALUMinimumMaximumInsts();
1488
1489 addRulesForGOpcs({G_FMINIMUM, G_FMAXIMUM}, Standard)
1490 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUMinimumMaximumInsts)
1491 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUMinimumMaximumInsts)
1492 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
1493 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUMinimumMaximumInsts)
1494 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUMinimumMaximumInsts)
1495 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
1496 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64}})
1497 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}})
1499 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
1500
1501 addRulesForGOpcs({G_FMINNUM_IEEE, G_FMAXNUM_IEEE, G_FMINNUM, G_FMAXNUM,
1502 G_FMINIMUMNUM, G_FMAXIMUMNUM},
1503 Standard)
1504 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
1505 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
1506 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64}})
1507 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}})
1509 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
1510 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUFloat)
1511 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUFloat)
1512 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUFloat)
1513 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUFloat);
1514
1515 addRulesForGOpcs({G_FPTRUNC})
1516 .Any({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}})
1517 .Any({{UniS32, S64}, {{UniInVgprS32}, {Vgpr64}}})
1518 .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}})
1520 .Any({{DivV2S16, V2S32}, {{VgprV2S16}, {VgprV2S32}}})
1521 .Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}}, hasSALUFloat)
1522 .Any({{UniS16, S32}, {{UniInVgprS16}, {Vgpr32}}}, !hasSALUFloat);
1523
1524 addRulesForGOpcs({G_IS_FPCLASS})
1525 .Any({{DivS1, S16}, {{Vcc}, {Vgpr16}}})
1526 .Any({{UniS1, S16}, {{UniInVcc}, {Vgpr16}}})
1527 .Any({{DivS1, S32}, {{Vcc}, {Vgpr32}}})
1528 .Any({{UniS1, S32}, {{UniInVcc}, {Vgpr32}}})
1529 .Any({{DivS1, S64}, {{Vcc}, {Vgpr64}}})
1530 .Any({{UniS1, S64}, {{UniInVcc}, {Vgpr64}}});
1531
1532 addRulesForGOpcs({G_FCMP}, Standard)
1533 .Any({{UniS1, _, S16}, {{Sgpr32Trunc}, {None, Sgpr16, Sgpr16}}},
1534 hasSALUFloat)
1535 .Any({{UniS1, _, S16}, {{UniInVcc}, {None, Vgpr16, Vgpr16}}},
1536 !hasSALUFloat)
1537 .Any({{DivS1, _, S16}, {{Vcc}, {None, Vgpr16, Vgpr16}}})
1538 .Any({{UniS1, _, S32}, {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}},
1539 hasSALUFloat)
1540 .Any({{UniS1, _, S32}, {{UniInVcc}, {None, Vgpr32, Vgpr32}}},
1541 !hasSALUFloat)
1542 .Any({{DivS1, _, S32}, {{Vcc}, {None, Vgpr32, Vgpr32}}})
1543 .Any({{UniS1, _, S64}, {{UniInVcc}, {None, Vgpr64, Vgpr64}}})
1544 .Any({{DivS1, _, S64}, {{Vcc}, {None, Vgpr64, Vgpr64}}});
1545
1546 addRulesForGOpcs({G_INTRINSIC_TRUNC, G_INTRINSIC_ROUNDEVEN, G_FFLOOR, G_FCEIL,
1547 G_FEXP2, G_FLOG2},
1548 Standard)
1549 .Uni(S16, {{UniInVgprS16}, {Vgpr16}})
1550 .Div(S16, {{Vgpr16}, {Vgpr16}})
1551 .Uni(S32, {{UniInVgprS32}, {Vgpr32}})
1552 .Div(S32, {{Vgpr32}, {Vgpr32}})
1553 .Uni(S64, {{UniInVgprS64}, {Vgpr64}})
1554 .Div(S64, {{Vgpr64}, {Vgpr64}});
1555
1556 addRulesForGOpcs({G_AMDGPU_GLOBAL_LOAD_MONITOR, G_AMDGPU_FLAT_LOAD_MONITOR},
1557 StandardB)
1558 .Uni(B32, {{UniInVgprB32}, {SgprPtr64}})
1559 .Div(B32, {{VgprB32}, {VgprPtr64}})
1560 .Uni(B64, {{UniInVgprB64}, {SgprPtr64}})
1561 .Div(B64, {{VgprB64}, {VgprPtr64}})
1562 .Uni(B128, {{UniInVgprB128}, {SgprPtr64}})
1563 .Div(B128, {{VgprB128}, {VgprPtr64}});
1564
1565 using namespace Intrinsic;
1566
1567 addRulesForIOpcs({returnaddress}).Any({{UniP0}, {{SgprP0}, {}}});
1568
1569 addRulesForIOpcs({amdgcn_s_getpc}).Any({{UniS64, _}, {{Sgpr64}, {None}}});
1570
1571 addRulesForIOpcs({amdgcn_s_getreg}).Any({{}, {{Sgpr32}, {IntrId, Imm}}});
1572
1573 addRulesForIOpcs({amdgcn_s_setreg})
1574 .Any({{_, _, S32}, {{}, {IntrId, Imm, SgprB32_ReadFirstLane}}});
1575
1576 addRulesForIOpcs({amdgcn_s_sendmsg, amdgcn_s_sendmsghalt})
1577 .Any({{}, {{}, {IntrId, Imm, SgprB32_M0}}});
1578
1579 addRulesForIOpcs({amdgcn_s_sendmsg_rtn})
1580 .Any({{S32}, {{Sgpr32}, {}}})
1581 .Any({{S64}, {{Sgpr64}, {}}});
1582
1583 addRulesForIOpcs({amdgcn_s_memrealtime, amdgcn_s_memtime}, Standard)
1584 .Uni(S64, {{Sgpr64}, {IntrId}});
1585
1586 addRulesForIOpcs({amdgcn_groupstaticsize, amdgcn_pops_exiting_wave_id,
1587 amdgcn_reloc_constant, amdgcn_s_get_waveid_in_workgroup},
1588 Standard)
1589 .Uni(S32, {{Sgpr32}, {IntrId}});
1590
1591 // Intrinsics with no register operands.
1592 addRulesForIOpcs({amdgcn_asyncmark,
1593 amdgcn_endpgm,
1594 amdgcn_init_exec,
1595 amdgcn_s_barrier,
1596 amdgcn_s_barrier_leave,
1597 amdgcn_s_barrier_signal,
1598 amdgcn_s_barrier_wait,
1599 amdgcn_s_monitor_sleep,
1600 amdgcn_s_nop,
1601 amdgcn_s_sethalt,
1602 amdgcn_s_setprio,
1603 amdgcn_s_setprio_inc_wg,
1604 amdgcn_s_sleep,
1605 amdgcn_s_ttracedata_imm,
1606 amdgcn_s_wait_asynccnt,
1607 amdgcn_s_wait_bvhcnt,
1608 amdgcn_s_wait_dscnt,
1609 amdgcn_s_wait_event,
1610 amdgcn_s_wait_event_export_ready,
1611 amdgcn_s_wait_expcnt,
1612 amdgcn_s_wait_kmcnt,
1613 amdgcn_s_wait_loadcnt,
1614 amdgcn_s_wait_samplecnt,
1615 amdgcn_s_wait_storecnt,
1616 amdgcn_s_wait_tensorcnt,
1617 amdgcn_s_waitcnt,
1618 amdgcn_unreachable,
1619 amdgcn_wait_asyncmark,
1620 amdgcn_wave_barrier})
1621 .Any({{}, {{}, {}}});
1622
1623 addRulesForIOpcs({amdgcn_init_exec_from_input})
1624 .Any({{}, {{}, {IntrId, Sgpr32, Imm}}});
1625
1626 addRulesForIOpcs({amdgcn_s_ttracedata}).Any({{}, {{}, {IntrId, SgprB32_M0}}});
1627
1628 addRulesForIOpcs({amdgcn_s_sleep_var})
1629 .Any({{}, {{}, {IntrId, SgprB32_ReadFirstLane}}});
1630
1631 addRulesForIOpcs({amdgcn_s_barrier_join, amdgcn_s_wakeup_barrier})
1632 .Any({{}, {{}, {IntrId, SgprB32_M0}}});
1633
1634 addRulesForIOpcs({amdgcn_s_barrier_signal_var, amdgcn_s_barrier_init})
1635 .Any({{}, {{}, {IntrId, SgprB32_M0, SgprB32_M0}}});
1636
1637 addRulesForIOpcs({amdgcn_s_barrier_signal_isfirst})
1638 .Any({{UniS1}, {{Sgpr32Trunc}, {}}});
1639
1640 addRulesForIOpcs(
1641 {amdgcn_s_get_named_barrier_state, amdgcn_s_get_barrier_state}, Standard)
1642 .Uni(S32, {{Sgpr32}, {IntrId, SgprB32_M0}});
1643
1644 addRulesForIOpcs({amdgcn_flat_prefetch}).Any({{}, {{}, {IntrId, VgprP0}}});
1645
1646 addRulesForIOpcs({amdgcn_global_prefetch}).Any({{}, {{}, {IntrId, VgprP1}}});
1647
1648 addRulesForIOpcs({amdgcn_s_prefetch_data})
1650
1651 addRulesForIOpcs({amdgcn_class})
1652 .Any({{UniS1, _, S16}, {{UniInVcc}, {IntrId, Vgpr16, Vgpr32}}})
1653 .Any({{DivS1, _, S16}, {{Vcc}, {IntrId, Vgpr16, Vgpr32}}})
1654 .Any({{UniS1, _, S32}, {{UniInVcc}, {IntrId, Vgpr32, Vgpr32}}})
1655 .Any({{DivS1, _, S32}, {{Vcc}, {IntrId, Vgpr32, Vgpr32}}})
1656 .Any({{UniS1, _, S64}, {{UniInVcc}, {IntrId, Vgpr64, Vgpr32}}})
1657 .Any({{DivS1, _, S64}, {{Vcc}, {IntrId, Vgpr64, Vgpr32}}});
1658
1659 // This is "intrinsic lane mask" it was set to i32/i64 in llvm-ir.
1660 addRulesForIOpcs({amdgcn_end_cf})
1661 .Any({{_, UniS32}, {{}, {IntrId, Sgpr32}}})
1662 .Any({{_, UniS64}, {{}, {IntrId, Sgpr64}}});
1663
1664 addRulesForIOpcs({amdgcn_if_break}, Standard)
1665 .Uni(S64, {{Sgpr64}, {IntrId, Vcc, Sgpr64}})
1666 .Uni(S32, {{Sgpr32}, {IntrId, Vcc, Sgpr32}});
1667
1668 addRulesForIOpcs({amdgcn_exp})
1669 .Any({{_, _, _, S32, S32, S32, S32},
1670 {{}, {IntrId, Imm, Imm, Vgpr32, Vgpr32, Vgpr32, Vgpr32}}});
1671
1672 addRulesForIOpcs({amdgcn_exp_compr})
1673 .Any({{_, _, _, V2S16}, {{}, {IntrId, Imm, Imm, VgprV2S16, VgprV2S16}}});
1674
1675 addRulesForIOpcs({amdgcn_exp_row})
1676 .Any({{_, _, _, S32, S32, S32, S32, _, S32},
1677 {{},
1679 SgprB32_M0}}});
1680
1681 addRulesForIOpcs({amdgcn_lds_direct_load}, StandardB)
1682 .Div(B32, {{VgprB32}, {IntrId, SgprB32_M0}});
1683
1684 addRulesForIOpcs({amdgcn_lds_param_load}, Standard)
1685 .Div(S32, {{Vgpr32}, {IntrId, Imm, Imm, SgprB32_M0}});
1686
1687 addRulesForIOpcs({amdgcn_mbcnt_lo, amdgcn_mbcnt_hi}, Standard)
1688 .Div(S32, {{}, {Vgpr32, None, Vgpr32, Vgpr32}});
1689
1690 addRulesForIOpcs({amdgcn_readfirstlane})
1691 .Any({{UniB32, _, DivB32}, {{}, {SgprB32, None, VgprB32}}})
1692 // this should not exist in the first place, it is from call lowering
1693 // readfirstlaning just in case register is not in sgpr.
1694 .Any({{UniS32, _, UniS32}, {{}, {Sgpr32, None, Vgpr32}}});
1695
1696 addRulesForIOpcs({amdgcn_readlane}, StandardB)
1698
1699 addRulesForIOpcs({amdgcn_writelane}, StandardB)
1700 .Div(B32,
1701 {{VgprB32},
1703
1704 addRulesForIOpcs({amdgcn_add_max_i32, amdgcn_add_max_u32, amdgcn_add_min_i32,
1705 amdgcn_add_min_u32},
1706 Standard)
1707 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1708 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1709
1710 addRulesForIOpcs({amdgcn_pk_add_max_i16, amdgcn_pk_add_max_u16,
1711 amdgcn_pk_add_min_i16, amdgcn_pk_add_min_u16},
1712 Standard)
1715
1716 addRulesForIOpcs({amdgcn_permlane16, amdgcn_permlanex16}, Standard)
1717 .Div(S32, {{Vgpr32},
1720
1721 addRulesForIOpcs({amdgcn_permlane_bcast, amdgcn_permlane_up,
1722 amdgcn_permlane_down, amdgcn_permlane_xor},
1723 StandardB)
1724 .Div(B32,
1725 {{VgprB32},
1727
1728 addRulesForIOpcs({amdgcn_permlane_idx_gen}, Standard)
1730
1731 addRulesForIOpcs({amdgcn_perm}, Standard)
1732 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1733 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1734
1735 addRulesForIOpcs(
1736 {amdgcn_wave_reduce_add, amdgcn_wave_reduce_and, amdgcn_wave_reduce_fadd,
1737 amdgcn_wave_reduce_fmax, amdgcn_wave_reduce_fmin,
1738 amdgcn_wave_reduce_fsub, amdgcn_wave_reduce_max, amdgcn_wave_reduce_min,
1739 amdgcn_wave_reduce_or, amdgcn_wave_reduce_sub, amdgcn_wave_reduce_umax,
1740 amdgcn_wave_reduce_umin, amdgcn_wave_reduce_xor},
1741 Standard)
1742 .Uni(S32, {{Sgpr32}, {IntrId, Sgpr32}})
1743 .Div(S32, {{Sgpr32ToVgprDst}, {IntrId, VgprB32}})
1744 .Uni(S64, {{Sgpr64}, {IntrId, Sgpr64}})
1745 .Div(S64, {{Sgpr64ToVgprDst}, {IntrId, VgprB64}});
1746
1747 addRulesForIOpcs({amdgcn_wave_shuffle}, Standard)
1748 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32}})
1749 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32}});
1750
1751 addRulesForIOpcs({amdgcn_bitop3, amdgcn_fmad_ftz}, Standard)
1752 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr16, Vgpr16, Vgpr16}})
1753 .Div(S16, {{Vgpr16}, {IntrId, Vgpr16, Vgpr16, Vgpr16}})
1754 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1755 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1756
1757 addRulesForIOpcs({amdgcn_udot4, amdgcn_sdot4, amdgcn_udot8, amdgcn_sdot8,
1758 amdgcn_dot4_f32_bf8_bf8, amdgcn_dot4_f32_bf8_fp8,
1759 amdgcn_dot4_f32_fp8_fp8, amdgcn_dot4_f32_fp8_bf8},
1760 Standard)
1761 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1762 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1763
1764 addRulesForIOpcs({amdgcn_rsq, amdgcn_rsq_clamp}, Standard)
1765 .Uni(S16, {{Sgpr16}, {IntrId, Sgpr16}}, hasPST)
1766 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr16}}, !hasPST)
1767 .Div(S16, {{Vgpr16}, {IntrId, Vgpr16}})
1768 .Uni(S32, {{Sgpr32}, {IntrId, Sgpr32}}, hasPST)
1769 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32}}, !hasPST)
1770 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}})
1771 .Uni(S64, {{UniInVgprS64}, {IntrId, Vgpr64}})
1772 .Div(S64, {{Vgpr64}, {IntrId, Vgpr64}});
1773
1774 addRulesForIOpcs({amdgcn_mul_u24, amdgcn_mul_i24}, Standard)
1775 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32}})
1776 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32}})
1777 .Uni(S64, {{UniInVgprS64}, {IntrId, Vgpr32, Vgpr32}})
1778 .Div(S64, {{Vgpr64}, {IntrId, Vgpr32, Vgpr32}});
1779
1780 addRulesForIOpcs({amdgcn_ds_bpermute, amdgcn_ds_bpermute_fi_b32,
1781 amdgcn_ds_permute, amdgcn_fmul_legacy, amdgcn_mulhi_i24,
1782 amdgcn_mulhi_u24},
1783 Standard)
1784 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32}})
1785 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32}});
1786
1787 addRulesForIOpcs({amdgcn_cvt_sr_bf8_f32, amdgcn_cvt_sr_fp8_f32,
1788 amdgcn_cvt_sr_fp8_f32_e5m3, amdgcn_cvt_pk_bf8_f32,
1789 amdgcn_cvt_pk_fp8_f32, amdgcn_cvt_pk_fp8_f32_e5m3},
1790 Standard)
1791 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1792 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1793
1794 addRulesForIOpcs({amdgcn_cvt_off_f32_i4, amdgcn_cvt_f32_bf8,
1795 amdgcn_cvt_f32_fp8, amdgcn_cvt_f32_fp8_e5m3},
1796 Standard)
1797 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32}})
1798 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}});
1799
1800 addRulesForIOpcs({amdgcn_cvt_pk_f32_bf8, amdgcn_cvt_pk_f32_fp8})
1801 .Any({{UniV2S32}, {{UniInVgprV2S32}, {IntrId, Vgpr32}}})
1802 .Any({{DivV2S32}, {{VgprV2S32}, {IntrId, Vgpr32}}});
1803
1804 addRulesForIOpcs({amdgcn_cvt_scalef32_sr_fp8_f16})
1805 .Any({{DivS32},
1806 {{Vgpr32}, {IntrId, Vgpr32, Vgpr16, Vgpr32, Vgpr32, Imm}}});
1807
1808 addRulesForIOpcs({amdgcn_cvt_scalef32_sr_fp8_f32})
1809 .Any({{DivS32},
1810 {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32, Vgpr32, Imm}}});
1811
1812 addRulesForIOpcs({amdgcn_cubesc, amdgcn_cubetc, amdgcn_cubema, amdgcn_cubeid,
1813 amdgcn_fma_legacy},
1814 Standard)
1815 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1816 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1817
1818 addRulesForIOpcs({amdgcn_frexp_mant, amdgcn_fract}, Standard)
1819 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr16}})
1820 .Div(S16, {{Vgpr16}, {IntrId, Vgpr16}})
1821 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32}})
1822 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}})
1823 .Uni(S64, {{UniInVgprS64}, {IntrId, Vgpr64}})
1824 .Div(S64, {{Vgpr64}, {IntrId, Vgpr64}});
1825
1826 addRulesForIOpcs({amdgcn_prng_b32})
1827 .Any({{UniS32}, {{UniInVgprS32}, {IntrId, Vgpr32}}})
1828 .Any({{DivS32}, {{Vgpr32}, {IntrId, Vgpr32}}});
1829
1830 addRulesForIOpcs({amdgcn_sffbh}, Standard)
1831 .Uni(S32, {{Sgpr32}, {IntrId, Sgpr32}})
1832 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}});
1833
1834 addRulesForIOpcs({amdgcn_ubfe, amdgcn_sbfe}, Standard)
1835 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1836 .Uni(S32, {{Sgpr32}, {IntrId, Sgpr32, Sgpr32, Sgpr32}, S_BFE})
1837 .Uni(S64, {{Sgpr64}, {IntrId, Sgpr64, Sgpr32, Sgpr32}, S_BFE})
1838 .Div(S64, {{Vgpr64}, {IntrId, Vgpr64, Vgpr32, Vgpr32}, V_BFE});
1839
1840 addRulesForIOpcs({amdgcn_cvt_pk_i16, amdgcn_cvt_pk_u16, amdgcn_cvt_pknorm_i16,
1841 amdgcn_cvt_pknorm_u16, amdgcn_cvt_pkrtz},
1842 Standard)
1843 .Uni(V2S16, {{UniInVgprV2S16}, {IntrId, Vgpr32, Vgpr32}})
1844 .Div(V2S16, {{VgprV2S16}, {IntrId, Vgpr32, Vgpr32}});
1845
1846 addRulesForIOpcs({amdgcn_cvt_scalef32_sr_pk32_bf6_f16,
1847 amdgcn_cvt_scalef32_sr_pk32_fp6_f16,
1848 amdgcn_cvt_scalef32_sr_pk32_bf6_bf16,
1849 amdgcn_cvt_scalef32_sr_pk32_fp6_bf16},
1850 Standard)
1852
1853 addRulesForIOpcs({amdgcn_cvt_scalef32_sr_pk32_bf6_f32,
1854 amdgcn_cvt_scalef32_sr_pk32_fp6_f32},
1855 Standard)
1857
1858 addRulesForIOpcs({amdgcn_cvt_scalef32_sr_pk_fp4_f16}, Standard)
1860 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, VgprV2S16, Vgpr32, Vgpr32}});
1861
1862 addRulesForIOpcs({amdgcn_cvt_scalef32_sr_pk_fp4_f32}, Standard)
1864 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, VgprV2S32, Vgpr32, Vgpr32}});
1865
1866 addRulesForIOpcs({amdgcn_global_load_tr_b64})
1867 .Any({{DivB64, _, UniP1}, {{VgprB64}, {IntrId, SgprP1}}})
1868 .Any({{DivB64, _, DivP1}, {{VgprB64}, {IntrId, VgprP1}}})
1869 .Any({{DivB32, _, UniP1}, {{VgprB32}, {IntrId, SgprP1}}})
1870 .Any({{DivB32, _, DivP1}, {{VgprB32}, {IntrId, VgprP1}}});
1871
1872 addRulesForIOpcs({amdgcn_global_load_tr_b128})
1873 .Any({{DivB64, _, UniP1}, {{VgprB64}, {IntrId, SgprP1}}})
1874 .Any({{DivB64, _, DivP1}, {{VgprB64}, {IntrId, VgprP1}}})
1875 .Any({{DivB128, _, UniP1}, {{VgprB128}, {IntrId, SgprP1}}})
1876 .Any({{DivB128, _, DivP1}, {{VgprB128}, {IntrId, VgprP1}}});
1877
1878 addRulesForIOpcs({amdgcn_global_load_tr4_b64})
1879 .Any({{DivV2S32, _, UniP1}, {{VgprV2S32}, {IntrId, SgprP1}}})
1880 .Any({{DivV2S32, _, DivP1}, {{VgprV2S32}, {IntrId, VgprP1}}});
1881
1882 addRulesForIOpcs({amdgcn_global_load_tr6_b96})
1883 .Any({{DivV3S32, _, UniP1}, {{VgprV3S32}, {IntrId, SgprP1}}})
1884 .Any({{DivV3S32, _, DivP1}, {{VgprV3S32}, {IntrId, VgprP1}}});
1885
1886 addRulesForIOpcs({amdgcn_ds_load_tr4_b64, amdgcn_ds_load_tr8_b64})
1887 .Any({{DivV2S32}, {{VgprV2S32}, {IntrId, VgprP3}}});
1888
1889 addRulesForIOpcs({amdgcn_ds_load_tr6_b96})
1890 .Any({{DivV3S32}, {{VgprV3S32}, {IntrId, VgprP3}}});
1891
1892 addRulesForIOpcs({amdgcn_ds_load_tr16_b128})
1893 .Any({{DivB128}, {{VgprB128}, {IntrId, VgprP3}}});
1894
1895 addRulesForIOpcs({amdgcn_global_atomic_ordered_add_b64})
1896 .Any({{DivS64}, {{Vgpr64}, {IntrId, VgprP1, Vgpr64}}});
1897
1898 addRulesForIOpcs(
1899 {amdgcn_global_atomic_fmin_num, amdgcn_global_atomic_fmax_num}, Standard)
1900 .Div(S32, {{Vgpr32}, {IntrId, VgprP1, Vgpr32}});
1901
1902 addRulesForIOpcs({amdgcn_flat_atomic_fmin_num, amdgcn_flat_atomic_fmax_num},
1903 Standard)
1904 .Div(S32, {{Vgpr32}, {IntrId, VgprP0, Vgpr32}});
1905
1906 addRulesForIOpcs({amdgcn_raw_buffer_load_lds})
1907 .Any({{_}, {{}, {IntrId, SgprV4S32, SgprP3, Imm, Vgpr32, Sgpr32}}});
1908
1909 addRulesForIOpcs({amdgcn_struct_buffer_load_lds})
1910 .Any({{_},
1911 {{}, {IntrId, SgprV4S32, SgprP3, Imm, Vgpr32, Vgpr32, Sgpr32}}});
1912
1913 addRulesForIOpcs({amdgcn_raw_ptr_buffer_load_lds})
1914 .Any({{_}, {{}, {IntrId, SgprP8, SgprP3, Imm, Vgpr32, Sgpr32}}});
1915
1916 addRulesForIOpcs({amdgcn_struct_ptr_buffer_load_lds})
1917 .Any({{_}, {{}, {IntrId, SgprP8, SgprP3, Imm, Vgpr32, Vgpr32, Sgpr32}}});
1918
1919 addRulesForIOpcs({amdgcn_global_load_lds})
1920 .Any({{}, {{}, {IntrId, VgprP1, SgprB32_M0}}});
1921
1922 addRulesForIOpcs({amdgcn_global_load_async_to_lds_b8,
1923 amdgcn_global_load_async_to_lds_b32,
1924 amdgcn_global_load_async_to_lds_b64,
1925 amdgcn_global_load_async_to_lds_b128,
1926 amdgcn_global_store_async_from_lds_b8,
1927 amdgcn_global_store_async_from_lds_b32,
1928 amdgcn_global_store_async_from_lds_b64,
1929 amdgcn_global_store_async_from_lds_b128})
1930 .Any({{}, {{}, {IntrId, VgprP1, VgprP3}}});
1931
1932 addRulesForIOpcs({amdgcn_cluster_load_b32})
1934 .Any({{DivB32, _, UniP1}, {{VgprB32}, {IntrId, SgprP1, Imm, SgprB32_M0}}})
1935 .Any(
1936 {{DivB32, _, DivP1}, {{VgprB32}, {IntrId, VgprP1, Imm, SgprB32_M0}}});
1937
1938 addRulesForIOpcs({amdgcn_cluster_load_b64})
1940 .Any({{DivB64, _, UniP1}, {{VgprB64}, {IntrId, SgprP1, Imm, SgprB32_M0}}})
1941 .Any(
1942 {{DivB64, _, DivP1}, {{VgprB64}, {IntrId, VgprP1, Imm, SgprB32_M0}}});
1943
1944 addRulesForIOpcs({amdgcn_cluster_load_b128})
1946 .Any({{DivB128, _, UniP1},
1947 {{VgprB128}, {IntrId, SgprP1, Imm, SgprB32_M0}}})
1948 .Any({{DivB128, _, DivP1},
1949 {{VgprB128}, {IntrId, VgprP1, Imm, SgprB32_M0}}});
1950
1951 addRulesForIOpcs({amdgcn_cluster_load_async_to_lds_b8,
1952 amdgcn_cluster_load_async_to_lds_b32,
1953 amdgcn_cluster_load_async_to_lds_b64,
1954 amdgcn_cluster_load_async_to_lds_b128})
1955 .Any({{}, {{}, {IntrId, VgprP1, VgprP3, Imm, Imm, SgprB32_M0}}});
1956
1957 addRulesForIOpcs({amdgcn_perm_pk16_b4_u4}, StandardB)
1958 .Uni(B64, {{UniInVgprB64}, {IntrId, Vgpr32, Vgpr32, VgprV2S32}})
1959 .Div(B64, {{VgprB64}, {IntrId, Vgpr32, Vgpr32, VgprV2S32}});
1960
1961 addRulesForIOpcs({amdgcn_perm_pk16_b6_u4}, StandardB)
1963 .Div(B96, {{VgprB96}, {IntrId, Vgpr32, VgprB64, VgprV2S32}});
1964
1965 addRulesForIOpcs({amdgcn_perm_pk16_b8_u4}, StandardB)
1967 .Div(B128, {{VgprB128}, {IntrId, VgprB64, VgprB64, VgprV2S32}});
1968
1969 addRulesForIOpcs({amdgcn_wwm, amdgcn_strict_wwm, amdgcn_wqm, amdgcn_softwqm,
1970 amdgcn_strict_wqm},
1971 StandardB)
1972 .Div(B32, {{VgprB32}, {IntrId, VgprB32}})
1973 .Uni(B32, {{SgprB32}, {IntrId, SgprB32}})
1974 .Div(B64, {{VgprB64}, {IntrId, VgprB64}})
1975 .Uni(B64, {{SgprB64}, {IntrId, SgprB64}})
1976 .Div(B96, {{VgprB96}, {IntrId, VgprB96}})
1977 .Uni(B96, {{SgprB96}, {IntrId, SgprB96}})
1978 .Div(B128, {{VgprB128}, {IntrId, VgprB128}})
1979 .Uni(B128, {{SgprB128}, {IntrId, SgprB128}})
1980 .Any({{UniB256}, {{SgprB256}, {IntrId, SgprB256}}})
1981 .Any({{DivB256}, {{VgprB256}, {IntrId, VgprB256}}})
1982 .Any({{UniB512}, {{SgprB512}, {IntrId, SgprB512}}})
1983 .Any({{DivB512}, {{VgprB512}, {IntrId, VgprB512}}});
1984
1985 addRulesForIOpcs({amdgcn_kill, amdgcn_wqm_demote})
1986 .Any({{}, {{}, {IntrId, Vcc}}});
1987
1988 addRulesForIOpcs({amdgcn_ballot}, Standard)
1989 .Uni(S64, {{Sgpr64}, {IntrId, Vcc}})
1990 .Uni(S32, {{Sgpr32}, {IntrId, Vcc}});
1991
1992 addRulesForIOpcs({amdgcn_inverse_ballot})
1993 .Any({{DivS1, _, S32}, {{Vcc}, {IntrId, SgprB32_ReadFirstLane}}})
1994 .Any({{DivS1, _, S64}, {{Vcc}, {IntrId, SgprB64_ReadFirstLane}}});
1995
1996 addRulesForIOpcs({amdgcn_live_mask, amdgcn_ps_live})
1997 .Any({{DivS1}, {{Vcc}, {}}});
1998
1999 addRulesForIOpcs({amdgcn_mov_dpp, amdgcn_mov_dpp8}, StandardB)
2000 .Div(B32, {{VgprB32}, {IntrId, VgprB32}})
2001 .Div(B64, {{VgprB64}, {IntrId, VgprB64}});
2002
2003 addRulesForIOpcs({amdgcn_update_dpp}, StandardB)
2004 .Div(B32, {{VgprB32}, {IntrId, VgprB32, VgprB32}})
2005 .Div(B64, {{VgprB64}, {IntrId, VgprB64, VgprB64}});
2006
2007 addRulesForIOpcs({amdgcn_sin, amdgcn_cos}, Standard)
2008 .Div(S16, {{Vgpr16}, {IntrId, Vgpr16}})
2009 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr16}})
2010 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}})
2011 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32}});
2012
2013 addRulesForIOpcs({amdgcn_trig_preop}, Standard)
2014 .Div(S64, {{Vgpr64}, {IntrId, Vgpr64, Vgpr32}})
2015 .Uni(S64, {{UniInVgprS64}, {IntrId, Vgpr64, Vgpr32}});
2016
2017 addRulesForIOpcs({amdgcn_exp2}, Standard)
2018 .Div(S16, {{Vgpr16}, {IntrId, Vgpr16}})
2019 .Uni(S16, {{Sgpr16}, {IntrId, Sgpr16}}, hasPST)
2020 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr16}}, !hasPST)
2021 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}})
2022 .Uni(S32, {{Sgpr32}, {IntrId, Sgpr32}}, hasPST)
2023 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32}}, !hasPST);
2024
2025 addRulesForIOpcs({amdgcn_rcp, amdgcn_sqrt}, Standard)
2026 .Div(S16, {{Vgpr16}, {IntrId, Vgpr16}})
2027 .Uni(S16, {{Sgpr16}, {IntrId, Sgpr16}}, hasPST)
2028 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr16}}, !hasPST)
2029 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}})
2030 .Uni(S32, {{Sgpr32}, {IntrId, Sgpr32}}, hasPST)
2031 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32}}, !hasPST)
2032 .Div(S64, {{Vgpr64}, {IntrId, Vgpr64}})
2033 .Uni(S64, {{UniInVgprS64}, {IntrId, Vgpr64}});
2034
2035 addRulesForIOpcs({amdgcn_log}, Standard)
2036 .Div(S16, {{Vgpr16}, {IntrId, Vgpr16}})
2037 .Uni(S16, {{Sgpr16}, {IntrId, Sgpr16}}, hasPST)
2038 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr16}}, !hasPST)
2039 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}})
2040 .Uni(S32, {{Sgpr32}, {IntrId, Sgpr32}}, hasPST)
2041 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32}}, !hasPST);
2042
2043 addRulesForIOpcs({amdgcn_ds_atomic_async_barrier_arrive_b64})
2044 .Any({{}, {{}, {IntrId, VgprP3}}});
2045
2046 addRulesForIOpcs({amdgcn_ds_atomic_barrier_arrive_rtn_b64}, Standard)
2047 .Div(S64, {{Vgpr64}, {IntrId, VgprP3, Vgpr64}});
2048
2049 addRulesForIOpcs({amdgcn_ds_add_gs_reg_rtn, amdgcn_ds_sub_gs_reg_rtn},
2050 Standard)
2051 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}})
2052 .Div(S64, {{Vgpr64}, {IntrId, Vgpr32}});
2053
2054 addRulesForIOpcs({amdgcn_ds_append, amdgcn_ds_consume}, Standard)
2055 .Uni(S32, {{UniInVgprS32}, {IntrId, SgprB32_M0}})
2056 .Div(S32, {{Vgpr32}, {IntrId, SgprB32_M0}});
2057
2058 addRulesForIOpcs(
2059 {amdgcn_ds_bvh_stack_rtn, amdgcn_ds_bvh_stack_push4_pop1_rtn}, Standard)
2060 .Div(S32, {{Vgpr32, Vgpr32}, {IntrId, Vgpr32, Vgpr32, VgprV4S32}});
2061
2062 addRulesForIOpcs({amdgcn_ds_bvh_stack_push8_pop1_rtn}, Standard)
2063 .Div(S32, {{Vgpr32, Vgpr32}, {IntrId, Vgpr32, Vgpr32, VgprV8S32}});
2064
2065 addRulesForIOpcs({amdgcn_ds_bvh_stack_push8_pop2_rtn}, Standard)
2066 .Div(S64, {{Vgpr64, Vgpr32}, {IntrId, Vgpr32, Vgpr32, VgprV8S32}});
2067
2068 addRulesForIOpcs({amdgcn_ds_gws_sema_p, amdgcn_ds_gws_sema_v,
2069 amdgcn_ds_gws_sema_release_all})
2070 .Any({{}, {{}, {IntrId, SgprB32_M0}}});
2071
2072 addRulesForIOpcs(
2073 {amdgcn_ds_gws_barrier, amdgcn_ds_gws_init, amdgcn_ds_gws_sema_br})
2074 .Any({{}, {{}, {IntrId, Vgpr32, SgprB32_M0}}});
2075
2076 addRulesForIOpcs({amdgcn_ds_ordered_add, amdgcn_ds_ordered_swap}, Standard)
2077 .Div(S32, {{Vgpr32}, {IntrId, SgprB32_M0, Vgpr32}});
2078
2079 addRulesForIOpcs({amdgcn_ds_swizzle}, Standard)
2080 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32}})
2081 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}});
2082
2083 addRulesForIOpcs({amdgcn_permlane16_var, amdgcn_permlanex16_var}, Standard)
2084 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
2085
2086 addRulesForIOpcs({amdgcn_permlane16_swap, amdgcn_permlane32_swap}, Standard)
2087 .Div(S32, {{Vgpr32, Vgpr32}, {IntrId, Vgpr32, Vgpr32}});
2088
2089 addRulesForIOpcs({amdgcn_permlane64}, StandardB)
2090 .Div(B32, {{VgprB32}, {IntrId, VgprB32}});
2091
2092 addRulesForIOpcs({amdgcn_ds_read_tr4_b64, amdgcn_ds_read_tr8_b64})
2093 .Any({{DivV2S32}, {{VgprV2S32}, {IntrId, VgprP3}}});
2094
2095 addRulesForIOpcs({amdgcn_ds_read_tr6_b96})
2096 .Any({{DivV3S32}, {{VgprV3S32}, {IntrId, VgprP3}}});
2097
2098 addRulesForIOpcs({amdgcn_ds_read_tr16_b64})
2099 .Any({{DivV4S16}, {{VgprV4S16}, {IntrId, VgprP3}}});
2100
2101 addRulesForIOpcs({amdgcn_interp_p1}, Standard)
2102 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Imm, Imm, SgprB32_M0}});
2103
2104 addRulesForIOpcs({amdgcn_interp_p1_f16}, Standard)
2105 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Imm, Imm, Imm, SgprB32_M0}});
2106
2107 addRulesForIOpcs({amdgcn_interp_p2}, Standard)
2108 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Imm, Imm, SgprB32_M0}});
2109
2110 addRulesForIOpcs({amdgcn_interp_p2_f16}, Standard)
2111 .Div(S16,
2113
2114 addRulesForIOpcs({amdgcn_interp_mov}, Standard)
2115 .Div(S32, {{Vgpr32}, {IntrId, Imm, Imm, Imm, SgprB32_M0}});
2116
2117 addRulesForIOpcs({amdgcn_interp_inreg_p10, amdgcn_interp_inreg_p2,
2118 amdgcn_interp_inreg_p10_f16, amdgcn_interp_p10_rtz_f16},
2119 Standard)
2120 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
2121 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
2122
2123 addRulesForIOpcs({amdgcn_interp_inreg_p2_f16, amdgcn_interp_p2_rtz_f16},
2124 Standard)
2125 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
2126 .Div(S16, {{Vgpr16}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
2127
2128 addRulesForIOpcs({amdgcn_div_fmas}, Standard)
2129 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32, Vcc}})
2130 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32, Vcc}})
2131 .Div(S64, {{Vgpr64}, {IntrId, Vgpr64, Vgpr64, Vgpr64, Vcc}})
2132 .Uni(S64, {{UniInVgprS64}, {IntrId, Vgpr64, Vgpr64, Vgpr64, Vcc}});
2133
2134 addRulesForIOpcs({amdgcn_div_fixup}, Standard)
2135 .Div(S16, {{Vgpr16}, {IntrId, Vgpr16, Vgpr16, Vgpr16}})
2136 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr16, Vgpr16, Vgpr16}})
2137 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
2138 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
2139 .Div(S64, {{Vgpr64}, {IntrId, Vgpr64, Vgpr64, Vgpr64}})
2140 .Uni(S64, {{UniInVgprS64}, {IntrId, Vgpr64, Vgpr64, Vgpr64}});
2141
2142 addRulesForIOpcs({amdgcn_div_scale}, Standard)
2143 .Div(S32, {{Vgpr32, Vcc}, {IntrId, Vgpr32, Vgpr32}})
2144 .Uni(S32, {{UniInVgprS32, UniInVcc}, {IntrId, Vgpr32, Vgpr32}})
2145 .Div(S64, {{Vgpr64, Vcc}, {IntrId, Vgpr64, Vgpr64}})
2146 .Uni(S64, {{UniInVgprS64, UniInVcc}, {IntrId, Vgpr64, Vgpr64}});
2147
2148 addRulesForIOpcs({amdgcn_fdot2, amdgcn_sdot2, amdgcn_udot2}, Standard)
2150 .Div(S32, {{Vgpr32}, {IntrId, VgprV2S16, VgprV2S16, Vgpr32}});
2151
2152 addRulesForIOpcs({amdgcn_fdot2_f16_f16}, Standard)
2154 .Div(S16, {{Vgpr16}, {IntrId, VgprV2S16, VgprV2S16, Vgpr16}});
2155
2156 addRulesForIOpcs({amdgcn_sudot4, amdgcn_sudot8}, Standard)
2157 .Uni(S32, {{UniInVgprS32}, {IntrId, Imm, Vgpr32, Imm, Vgpr32, Vgpr32}})
2158 .Div(S32, {{Vgpr32}, {IntrId, Imm, Vgpr32, Imm, Vgpr32, Vgpr32}});
2159
2160 addRulesForIOpcs({amdgcn_s_alloc_vgpr})
2162
2163 addRulesForIOpcs({amdgcn_sat_pk4_i4_i8, amdgcn_sat_pk4_u4_u8}, Standard)
2164 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr32}})
2165 .Div(S16, {{Vgpr16}, {IntrId, Vgpr32}});
2166
2167 // TODO: Add handling for GFX90A+ which should use VGPRs instead of AGPRs.
2168 bool HasGFX90AInsts = ST->hasGFX90AInsts();
2169 addRulesForIOpcs({amdgcn_mfma_f32_32x32x1f32, amdgcn_mfma_f32_16x16x1f32,
2170 amdgcn_mfma_f32_4x4x1f32, amdgcn_mfma_f32_32x32x2f32,
2171 amdgcn_mfma_f32_16x16x4f32, amdgcn_mfma_f32_32x32x4f16,
2172 amdgcn_mfma_f32_16x16x4f16, amdgcn_mfma_f32_4x4x4f16,
2173 amdgcn_mfma_f32_32x32x8f16, amdgcn_mfma_f32_16x16x16f16,
2174 amdgcn_mfma_i32_32x32x4i8, amdgcn_mfma_i32_16x16x4i8,
2175 amdgcn_mfma_i32_4x4x4i8, amdgcn_mfma_i32_32x32x8i8,
2176 amdgcn_mfma_i32_16x16x16i8, amdgcn_mfma_f32_32x32x2bf16,
2177 amdgcn_mfma_f32_16x16x2bf16, amdgcn_mfma_f32_4x4x2bf16,
2178 amdgcn_mfma_f32_32x32x4bf16, amdgcn_mfma_f32_16x16x8bf16})
2179 .Any({{DivAnyTy},
2181 !HasGFX90AInsts);
2182
2183 // WMMA/SWMMAC intrinsics: all register operands map to VGPR.
2184 addRulesForIOpcs(
2185 {// WMMA GFX11+
2186 amdgcn_wmma_f32_16x16x16_f16, amdgcn_wmma_f32_16x16x16_bf16,
2187 amdgcn_wmma_f16_16x16x16_f16, amdgcn_wmma_bf16_16x16x16_bf16,
2188 amdgcn_wmma_f16_16x16x16_f16_tied, amdgcn_wmma_bf16_16x16x16_bf16_tied,
2189 amdgcn_wmma_i32_16x16x16_iu8, amdgcn_wmma_i32_16x16x16_iu4,
2190 // WMMA GFX12
2191 amdgcn_wmma_f32_16x16x16_fp8_fp8, amdgcn_wmma_f32_16x16x16_fp8_bf8,
2192 amdgcn_wmma_f32_16x16x16_bf8_fp8, amdgcn_wmma_f32_16x16x16_bf8_bf8,
2193 amdgcn_wmma_i32_16x16x32_iu4,
2194 // WMMA GFX1250
2195 amdgcn_wmma_f32_16x16x4_f32, amdgcn_wmma_f32_16x16x32_bf16,
2196 amdgcn_wmma_f32_16x16x32_f16, amdgcn_wmma_f16_16x16x32_f16,
2197 amdgcn_wmma_bf16_16x16x32_bf16, amdgcn_wmma_bf16f32_16x16x32_bf16,
2198 amdgcn_wmma_f32_16x16x64_fp8_fp8, amdgcn_wmma_f32_16x16x64_fp8_bf8,
2199 amdgcn_wmma_f32_16x16x64_bf8_fp8, amdgcn_wmma_f32_16x16x64_bf8_bf8,
2200 amdgcn_wmma_f16_16x16x64_fp8_fp8, amdgcn_wmma_f16_16x16x64_fp8_bf8,
2201 amdgcn_wmma_f16_16x16x64_bf8_fp8, amdgcn_wmma_f16_16x16x64_bf8_bf8,
2202 amdgcn_wmma_f16_16x16x128_fp8_fp8, amdgcn_wmma_f16_16x16x128_fp8_bf8,
2203 amdgcn_wmma_f16_16x16x128_bf8_fp8, amdgcn_wmma_f16_16x16x128_bf8_bf8,
2204 amdgcn_wmma_f32_16x16x128_fp8_fp8, amdgcn_wmma_f32_16x16x128_fp8_bf8,
2205 amdgcn_wmma_f32_16x16x128_bf8_fp8, amdgcn_wmma_f32_16x16x128_bf8_bf8,
2206 amdgcn_wmma_i32_16x16x64_iu8, amdgcn_wmma_f32_16x16x128_f8f6f4,
2207 amdgcn_wmma_scale_f32_16x16x128_f8f6f4,
2208 amdgcn_wmma_scale16_f32_16x16x128_f8f6f4, amdgcn_wmma_f32_32x16x128_f4,
2209 amdgcn_wmma_scale_f32_32x16x128_f4, amdgcn_wmma_scale16_f32_32x16x128_f4,
2210 // SWMMAC GFX12
2211 amdgcn_swmmac_f32_16x16x32_f16, amdgcn_swmmac_f32_16x16x32_bf16,
2212 amdgcn_swmmac_f16_16x16x32_f16, amdgcn_swmmac_bf16_16x16x32_bf16,
2213 amdgcn_swmmac_i32_16x16x32_iu8, amdgcn_swmmac_i32_16x16x32_iu4,
2214 amdgcn_swmmac_i32_16x16x64_iu4, amdgcn_swmmac_f32_16x16x32_fp8_fp8,
2215 amdgcn_swmmac_f32_16x16x32_fp8_bf8, amdgcn_swmmac_f32_16x16x32_bf8_fp8,
2216 amdgcn_swmmac_f32_16x16x32_bf8_bf8,
2217 // SWMMAC GFX1250
2218 amdgcn_swmmac_f32_16x16x64_f16, amdgcn_swmmac_f32_16x16x64_bf16,
2219 amdgcn_swmmac_f16_16x16x64_f16, amdgcn_swmmac_bf16_16x16x64_bf16,
2220 amdgcn_swmmac_bf16f32_16x16x64_bf16, amdgcn_swmmac_f32_16x16x128_fp8_fp8,
2221 amdgcn_swmmac_f32_16x16x128_fp8_bf8, amdgcn_swmmac_f32_16x16x128_bf8_fp8,
2222 amdgcn_swmmac_f32_16x16x128_bf8_bf8, amdgcn_swmmac_f16_16x16x128_fp8_fp8,
2223 amdgcn_swmmac_f16_16x16x128_fp8_bf8, amdgcn_swmmac_f16_16x16x128_bf8_fp8,
2224 amdgcn_swmmac_f16_16x16x128_bf8_bf8, amdgcn_swmmac_i32_16x16x128_iu8})
2225 .Any({{}, {{}, {}, ApplyAllVgpr}});
2226
2227} // end initialize rules
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
constexpr LLT S16
constexpr LLT S1
constexpr LLT V2S16
constexpr LLT S32
constexpr LLT V4S32
constexpr LLT V3S32
constexpr LLT S64
constexpr LLT V2S32
constexpr LLT S128
UniformityLLTOpPredicateID LLTToBId(LLT Ty)
bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID, const MachineUniformityInfo &MUI, const MachineRegisterInfo &MRI)
UniformityLLTOpPredicateID LLTToId(LLT Ty)
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
Register Reg
Register const TargetRegisterInfo * TRI
Machine IR instance of the generic uniformity analysis.
bool operator()(const MachineInstr &MI) const
Predicate operator||(const Predicate &RHS) const
Predicate operator&&(const Predicate &RHS) const
Predicate(std::function< bool(const MachineInstr &)> Pred)
Predicate operator!() const
RegBankLegalizeRules(const GCNSubtarget &ST, MachineRegisterInfo &MRI)
const SetOfRulesForOpcode * getRulesForOpc(MachineInstr &MI) const
const RegBankLLTMapping * findMappingForMI(const MachineInstr &MI, const MachineRegisterInfo &MRI, const MachineUniformityInfo &MUI) const
void addFastRuleDivergent(UniformityLLTOpPredicateID Ty, RegBankLLTMapping RuleApplyIDs)
void addFastRuleUniform(UniformityLLTOpPredicateID Ty, RegBankLLTMapping RuleApplyIDs)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
bool isSigned() const
Definition InstrTypes.h:993
bool isDivergentAtDef(ConstValueRefT V) const
Whether V is divergent at its definition.
bool isUniformAtDef(ConstValueRefT V) const
Whether V is uniform/non-divergent at its definition.
bool isEquality() const
Return true if this predicate is either EQ or NE.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
TypeSize getValue() const
Representation of each machine instruction.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
unsigned getAddrSpace() const
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
const TargetRegisterInfo * getTargetRegisterInfo() const
Wrapper class representing virtual and physical registers.
Definition Register.h:20
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void swap(SmallVectorImpl &RHS)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
bool isAnyPtr(LLT Ty, unsigned Width)
bool isUniformMMO(const MachineMemOperand *MMO)
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
This is an optimization pass for GlobalISel generic memory operations.
GenericUniformityInfo< MachineSSAContext > MachineUniformityInfo
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
Definition SIInstrInfo.h:44
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
SmallVector< UniformityLLTOpPredicateID, 4 > OpUniformityAndTypes
PredicateMapping(std::initializer_list< UniformityLLTOpPredicateID > OpList, std::function< bool(const MachineInstr &)> TestFunc=nullptr)
bool match(const MachineInstr &MI, const MachineUniformityInfo &MUI, const MachineRegisterInfo &MRI) const
std::function< bool(const MachineInstr &)> TestFunc
RegBankLLTMapping(std::initializer_list< RegBankLLTMappingApplyID > DstOpMappingList, std::initializer_list< RegBankLLTMappingApplyID > SrcOpMappingList, LoweringMethodID LoweringMethod=DoNotLower)
SmallVector< RegBankLLTMappingApplyID, 2 > DstOpMapping
SmallVector< RegBankLLTMappingApplyID, 4 > SrcOpMapping
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39