LLVM 23.0.0git
AMDGPURegBankLegalizeRules.cpp
Go to the documentation of this file.
1//===-- AMDGPURegBankLegalizeRules.cpp ------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// Definitions of RegBankLegalize Rules for all opcodes.
10/// Implementation of container for all the Rules and search.
11/// Fast search for most common case when Rule.Predicate checks LLT and
12/// uniformity of register in operand 0.
13//
14//===----------------------------------------------------------------------===//
15
17#include "AMDGPUInstrInfo.h"
18#include "GCNSubtarget.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
23
24#define DEBUG_TYPE "amdgpu-regbanklegalize"
25
26using namespace llvm;
27using namespace AMDGPU;
28
29bool AMDGPU::isAnyPtr(LLT Ty, unsigned Width) {
30 return Ty.isPointer() && Ty.getSizeInBits() == Width;
31}
32
34 std::initializer_list<RegBankLLTMappingApplyID> DstOpMappingList,
35 std::initializer_list<RegBankLLTMappingApplyID> SrcOpMappingList,
37 : DstOpMapping(DstOpMappingList), SrcOpMapping(SrcOpMappingList),
39
41 std::initializer_list<UniformityLLTOpPredicateID> OpList,
42 std::function<bool(const MachineInstr &)> TestFunc)
44
46 const MachineUniformityInfo &MUI,
47 const MachineRegisterInfo &MRI) {
48 switch (UniID) {
49 case S1:
50 return MRI.getType(Reg) == LLT::scalar(1);
51 case S16:
52 return MRI.getType(Reg) == LLT::scalar(16);
53 case S32:
54 return MRI.getType(Reg) == LLT::scalar(32);
55 case S64:
56 return MRI.getType(Reg) == LLT::scalar(64);
57 case S128:
58 return MRI.getType(Reg) == LLT::scalar(128);
59 case P0:
60 return MRI.getType(Reg) == LLT::pointer(0, 64);
61 case P1:
62 return MRI.getType(Reg) == LLT::pointer(1, 64);
63 case P2:
64 return MRI.getType(Reg) == LLT::pointer(2, 32);
65 case P3:
66 return MRI.getType(Reg) == LLT::pointer(3, 32);
67 case P4:
68 return MRI.getType(Reg) == LLT::pointer(4, 64);
69 case P5:
70 return MRI.getType(Reg) == LLT::pointer(5, 32);
71 case P8:
72 return MRI.getType(Reg) == LLT::pointer(8, 128);
73 case Ptr32:
74 return isAnyPtr(MRI.getType(Reg), 32);
75 case Ptr64:
76 return isAnyPtr(MRI.getType(Reg), 64);
77 case Ptr128:
78 return isAnyPtr(MRI.getType(Reg), 128);
79 case V2S32:
80 return MRI.getType(Reg) == LLT::fixed_vector(2, 32);
81 case V3S32:
82 return MRI.getType(Reg) == LLT::fixed_vector(3, 32);
83 case V4S32:
84 return MRI.getType(Reg) == LLT::fixed_vector(4, 32);
85 case B32:
86 return MRI.getType(Reg).getSizeInBits() == 32;
87 case B64:
88 return MRI.getType(Reg).getSizeInBits() == 64;
89 case B96:
90 return MRI.getType(Reg).getSizeInBits() == 96;
91 case B128:
92 return MRI.getType(Reg).getSizeInBits() == 128;
93 case B256:
94 return MRI.getType(Reg).getSizeInBits() == 256;
95 case B512:
96 return MRI.getType(Reg).getSizeInBits() == 512;
97 case UniS1:
98 return MRI.getType(Reg) == LLT::scalar(1) && MUI.isUniform(Reg);
99 case UniS16:
100 return MRI.getType(Reg) == LLT::scalar(16) && MUI.isUniform(Reg);
101 case UniS32:
102 return MRI.getType(Reg) == LLT::scalar(32) && MUI.isUniform(Reg);
103 case UniS64:
104 return MRI.getType(Reg) == LLT::scalar(64) && MUI.isUniform(Reg);
105 case UniS128:
106 return MRI.getType(Reg) == LLT::scalar(128) && MUI.isUniform(Reg);
107 case UniP0:
108 return MRI.getType(Reg) == LLT::pointer(0, 64) && MUI.isUniform(Reg);
109 case UniP1:
110 return MRI.getType(Reg) == LLT::pointer(1, 64) && MUI.isUniform(Reg);
111 case UniP2:
112 return MRI.getType(Reg) == LLT::pointer(2, 32) && MUI.isUniform(Reg);
113 case UniP3:
114 return MRI.getType(Reg) == LLT::pointer(3, 32) && MUI.isUniform(Reg);
115 case UniP4:
116 return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isUniform(Reg);
117 case UniP5:
118 return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isUniform(Reg);
119 case UniP8:
120 return MRI.getType(Reg) == LLT::pointer(8, 128) && MUI.isUniform(Reg);
121 case UniPtr32:
122 return isAnyPtr(MRI.getType(Reg), 32) && MUI.isUniform(Reg);
123 case UniPtr64:
124 return isAnyPtr(MRI.getType(Reg), 64) && MUI.isUniform(Reg);
125 case UniPtr128:
126 return isAnyPtr(MRI.getType(Reg), 128) && MUI.isUniform(Reg);
127 case UniV2S16:
128 return MRI.getType(Reg) == LLT::fixed_vector(2, 16) && MUI.isUniform(Reg);
129 case UniV2S32:
130 return MRI.getType(Reg) == LLT::fixed_vector(2, 32) && MUI.isUniform(Reg);
131 case UniB32:
132 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isUniform(Reg);
133 case UniB64:
134 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isUniform(Reg);
135 case UniB96:
136 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isUniform(Reg);
137 case UniB128:
138 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isUniform(Reg);
139 case UniB256:
140 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isUniform(Reg);
141 case UniB512:
142 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isUniform(Reg);
143 case DivS1:
144 return MRI.getType(Reg) == LLT::scalar(1) && MUI.isDivergent(Reg);
145 case DivS16:
146 return MRI.getType(Reg) == LLT::scalar(16) && MUI.isDivergent(Reg);
147 case DivS32:
148 return MRI.getType(Reg) == LLT::scalar(32) && MUI.isDivergent(Reg);
149 case DivS64:
150 return MRI.getType(Reg) == LLT::scalar(64) && MUI.isDivergent(Reg);
151 case DivS128:
152 return MRI.getType(Reg) == LLT::scalar(128) && MUI.isDivergent(Reg);
153 case DivP0:
154 return MRI.getType(Reg) == LLT::pointer(0, 64) && MUI.isDivergent(Reg);
155 case DivP1:
156 return MRI.getType(Reg) == LLT::pointer(1, 64) && MUI.isDivergent(Reg);
157 case DivP2:
158 return MRI.getType(Reg) == LLT::pointer(2, 32) && MUI.isDivergent(Reg);
159 case DivP3:
160 return MRI.getType(Reg) == LLT::pointer(3, 32) && MUI.isDivergent(Reg);
161 case DivP4:
162 return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isDivergent(Reg);
163 case DivP5:
164 return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isDivergent(Reg);
165 case DivPtr32:
166 return isAnyPtr(MRI.getType(Reg), 32) && MUI.isDivergent(Reg);
167 case DivPtr64:
168 return isAnyPtr(MRI.getType(Reg), 64) && MUI.isDivergent(Reg);
169 case DivPtr128:
170 return isAnyPtr(MRI.getType(Reg), 128) && MUI.isDivergent(Reg);
171 case DivV2S16:
172 return MRI.getType(Reg) == LLT::fixed_vector(2, 16) && MUI.isDivergent(Reg);
173 case DivV2S32:
174 return MRI.getType(Reg) == LLT::fixed_vector(2, 32) && MUI.isDivergent(Reg);
175 case DivB32:
176 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isDivergent(Reg);
177 case DivB64:
178 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isDivergent(Reg);
179 case DivB96:
180 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isDivergent(Reg);
181 case DivB128:
182 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isDivergent(Reg);
183 case DivB256:
184 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isDivergent(Reg);
185 case DivB512:
186 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isDivergent(Reg);
187 case _:
188 return true;
189 default:
190 llvm_unreachable("missing matchUniformityAndLLT");
191 }
192}
193
195 const MachineUniformityInfo &MUI,
196 const MachineRegisterInfo &MRI) const {
197 // Check LLT signature.
198 for (unsigned i = 0; i < OpUniformityAndTypes.size(); ++i) {
199 if (OpUniformityAndTypes[i] == _) {
200 if (MI.getOperand(i).isReg())
201 return false;
202 continue;
203 }
204
205 // Remaining IDs check registers.
206 if (!MI.getOperand(i).isReg())
207 return false;
208
209 if (!matchUniformityAndLLT(MI.getOperand(i).getReg(),
210 OpUniformityAndTypes[i], MUI, MRI))
211 return false;
212 }
213
214 // More complex check.
215 if (TestFunc)
216 return TestFunc(MI);
217
218 return true;
219}
220
222
224 : FastTypes(FastTypes) {}
225
227 if (Ty == LLT::scalar(16))
228 return S16;
229 if (Ty == LLT::scalar(32))
230 return S32;
231 if (Ty == LLT::scalar(64))
232 return S64;
233 if (Ty == LLT::fixed_vector(2, 16))
234 return V2S16;
235 if (Ty == LLT::fixed_vector(2, 32))
236 return V2S32;
237 if (Ty == LLT::fixed_vector(3, 32))
238 return V3S32;
239 if (Ty == LLT::fixed_vector(4, 32))
240 return V4S32;
241 return _;
242}
243
245 if (Ty == LLT::scalar(32) || Ty == LLT::fixed_vector(2, 16) ||
246 isAnyPtr(Ty, 32))
247 return B32;
248 if (Ty == LLT::scalar(64) || Ty == LLT::fixed_vector(2, 32) ||
249 Ty == LLT::fixed_vector(4, 16) || isAnyPtr(Ty, 64))
250 return B64;
251 if (Ty == LLT::fixed_vector(3, 32))
252 return B96;
253 if (Ty == LLT::fixed_vector(4, 32) || isAnyPtr(Ty, 128))
254 return B128;
255 return _;
256}
257
258const RegBankLLTMapping *
261 const MachineUniformityInfo &MUI) const {
262 // Search in "Fast Rules".
263 // Note: if fast rules are enabled, RegBankLLTMapping must be added in each
264 // slot that could "match fast Predicate". If not, InvalidMapping is
265 // returned which results in failure, does not search "Slow Rules".
266 if (FastTypes != NoFastRules) {
267 Register Reg = MI.getOperand(0).getReg();
268 int Slot;
269 if (FastTypes == StandardB)
270 Slot = getFastPredicateSlot(LLTToBId(MRI.getType(Reg)));
271 else
272 Slot = getFastPredicateSlot(LLTToId(MRI.getType(Reg)));
273
274 if (Slot != -1)
275 return MUI.isUniform(Reg) ? &Uni[Slot] : &Div[Slot];
276 }
277
278 // Slow search for more complex rules.
279 for (const RegBankLegalizeRule &Rule : Rules) {
280 if (Rule.Predicate.match(MI, MUI, MRI))
281 return &Rule.OperandMapping;
282 }
283
284 return nullptr;
285}
286
288 Rules.push_back(Rule);
289}
290
292 RegBankLLTMapping RuleApplyIDs) {
293 int Slot = getFastPredicateSlot(Ty);
294 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
295 Div[Slot] = RuleApplyIDs;
296}
297
299 RegBankLLTMapping RuleApplyIDs) {
300 int Slot = getFastPredicateSlot(Ty);
301 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
302 Uni[Slot] = RuleApplyIDs;
303}
304
305int SetOfRulesForOpcode::getFastPredicateSlot(
307 switch (FastTypes) {
308 case Standard: {
309 switch (Ty) {
310 case S32:
311 return 0;
312 case S16:
313 return 1;
314 case S64:
315 return 2;
316 case V2S16:
317 return 3;
318 default:
319 return -1;
320 }
321 }
322 case StandardB: {
323 switch (Ty) {
324 case B32:
325 return 0;
326 case B64:
327 return 1;
328 case B96:
329 return 2;
330 case B128:
331 return 3;
332 default:
333 return -1;
334 }
335 }
336 case Vector: {
337 switch (Ty) {
338 case S32:
339 return 0;
340 case V2S32:
341 return 1;
342 case V3S32:
343 return 2;
344 case V4S32:
345 return 3;
346 default:
347 return -1;
348 }
349 }
350 default:
351 return -1;
352 }
353}
354
355RegBankLegalizeRules::RuleSetInitializer
356RegBankLegalizeRules::addRulesForGOpcs(std::initializer_list<unsigned> OpcList,
357 FastRulesTypes FastTypes) {
358 return RuleSetInitializer(OpcList, GRulesAlias, GRules, FastTypes);
359}
360
361RegBankLegalizeRules::RuleSetInitializer
362RegBankLegalizeRules::addRulesForIOpcs(std::initializer_list<unsigned> OpcList,
363 FastRulesTypes FastTypes) {
364 return RuleSetInitializer(OpcList, IRulesAlias, IRules, FastTypes);
365}
366
369 unsigned Opc = MI.getOpcode();
370 if (Opc == AMDGPU::G_INTRINSIC || Opc == AMDGPU::G_INTRINSIC_CONVERGENT ||
371 Opc == AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS ||
372 Opc == AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS) {
373 unsigned IntrID = cast<GIntrinsic>(MI).getIntrinsicID();
374 auto IRAIt = IRulesAlias.find(IntrID);
375 if (IRAIt == IRulesAlias.end())
376 return nullptr;
377 return &IRules.at(IRAIt->second);
378 }
379
380 auto GRAIt = GRulesAlias.find(Opc);
381 if (GRAIt == GRulesAlias.end())
382 return nullptr;
383 return &GRules.at(GRAIt->second);
384}
385
386// Syntactic sugar wrapper for predicate lambda that enables '&&', '||' and '!'.
387class Predicate {
388private:
389 struct Elt {
390 // Save formula composed of Pred, '&&', '||' and '!' as a jump table.
391 // Sink ! to Pred. For example !((A && !B) || C) -> (!A || B) && !C
392 // Sequences of && and || will be represented by jumps, for example:
393 // (A && B && ... X) or (A && B && ... X) || Y
394 // A == true jump to B
395 // A == false jump to end or Y, result is A(false) or Y
396 // (A || B || ... X) or (A || B || ... X) && Y
397 // A == true jump to end or Y, result is A(true) or Y
398 // A == false jump to B
399 // Notice that when negating expression, we simply flip Neg on each Pred
400 // and swap TJumpOffset and FJumpOffset (&& becomes ||, || becomes &&).
401 std::function<bool(const MachineInstr &)> Pred;
402 bool Neg; // Neg of Pred is calculated before jump
403 unsigned TJumpOffset;
404 unsigned FJumpOffset;
405 };
406
407 SmallVector<Elt, 8> Expression;
408
409 Predicate(SmallVectorImpl<Elt> &&Expr) { Expression.swap(Expr); };
410
411public:
412 Predicate(std::function<bool(const MachineInstr &)> Pred) {
413 Expression.push_back({Pred, false, 1, 1});
414 };
415
416 bool operator()(const MachineInstr &MI) const {
417 unsigned Idx = 0;
418 unsigned ResultIdx = Expression.size();
419 bool Result;
420 do {
421 Result = Expression[Idx].Pred(MI);
422 Result = Expression[Idx].Neg ? !Result : Result;
423 if (Result) {
424 Idx += Expression[Idx].TJumpOffset;
425 } else {
426 Idx += Expression[Idx].FJumpOffset;
427 }
428 } while ((Idx != ResultIdx));
429
430 return Result;
431 };
432
433 Predicate operator!() const {
434 SmallVector<Elt, 8> NegExpression;
435 for (const Elt &ExprElt : Expression) {
436 NegExpression.push_back({ExprElt.Pred, !ExprElt.Neg, ExprElt.FJumpOffset,
437 ExprElt.TJumpOffset});
438 }
439 return Predicate(std::move(NegExpression));
440 };
441
442 Predicate operator&&(const Predicate &RHS) const {
443 SmallVector<Elt, 8> AndExpression = Expression;
444
445 unsigned RHSSize = RHS.Expression.size();
446 unsigned ResultIdx = Expression.size();
447 for (unsigned i = 0; i < ResultIdx; ++i) {
448 // LHS results in false, whole expression results in false.
449 if (i + AndExpression[i].FJumpOffset == ResultIdx)
450 AndExpression[i].FJumpOffset += RHSSize;
451 }
452
453 AndExpression.append(RHS.Expression);
454
455 return Predicate(std::move(AndExpression));
456 }
457
458 Predicate operator||(const Predicate &RHS) const {
459 SmallVector<Elt, 8> OrExpression = Expression;
460
461 unsigned RHSSize = RHS.Expression.size();
462 unsigned ResultIdx = Expression.size();
463 for (unsigned i = 0; i < ResultIdx; ++i) {
464 // LHS results in true, whole expression results in true.
465 if (i + OrExpression[i].TJumpOffset == ResultIdx)
466 OrExpression[i].TJumpOffset += RHSSize;
467 }
468
469 OrExpression.append(RHS.Expression);
470
471 return Predicate(std::move(OrExpression));
472 }
473};
474
475// Initialize rules
478 : ST(&_ST), MRI(&_MRI) {
479
480 addRulesForGOpcs({G_ADD, G_SUB}, Standard)
481 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}})
482 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
483 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
484 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
486 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
487 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr64}})
488 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}});
489
490 addRulesForGOpcs({G_UADDO, G_USUBO}, Standard)
491 .Uni(S32, {{Sgpr32, Sgpr32Trunc}, {Sgpr32, Sgpr32}})
492 .Div(S32, {{Vgpr32, Vcc}, {Vgpr32, Vgpr32}});
493
494 addRulesForGOpcs({G_UADDE, G_USUBE}, Standard)
496 .Div(S32, {{Vgpr32, Vcc}, {Vgpr32, Vgpr32, Vcc}});
497
498 addRulesForGOpcs({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT}, Standard)
499 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}})
500 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
501 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}})
502 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
504 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
505
506 bool HasVecMulU64 = ST->hasVectorMulU64();
507 addRulesForGOpcs({G_MUL}, Standard)
508 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
509 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
510 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
511 .Uni(S64, {{SgprB64}, {SgprB64, SgprB64}})
513 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
514 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}})
515 .Div(S64, {{VgprB64}, {VgprB64, VgprB64}}, HasVecMulU64)
516 .Div(S64, {{VgprB64}, {VgprB64, VgprB64}, SplitTo32Mul}, !HasVecMulU64);
517
518 bool hasMulHi = ST->hasScalarMulHiInsts();
519 addRulesForGOpcs({G_UMULH, G_SMULH}, Standard)
520 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
521 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasMulHi)
522 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasMulHi);
523
524 addRulesForGOpcs({G_AMDGPU_MAD_U64_U32}, Standard)
525 .Div(S64, {{Vgpr64, Vcc}, {Vgpr32, Vgpr32, Vgpr64}})
527
528 bool HasScalarSMulU64 = ST->hasScalarSMulU64();
529 addRulesForGOpcs({G_AMDGPU_S_MUL_U64_U32, G_AMDGPU_S_MUL_I64_I32}, Standard)
530 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr64}, UniMul64}, HasScalarSMulU64)
531 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}, DivSMulToMAD});
532
533 addRulesForGOpcs({G_XOR, G_OR, G_AND}, StandardB)
535 .Any({{DivS1}, {{Vcc}, {Vcc, Vcc}}})
536 .Any({{UniS16}, {{Sgpr16}, {Sgpr16, Sgpr16}}})
537 .Any({{DivS16}, {{Vgpr16}, {Vgpr16, Vgpr16}}})
538 .Uni(B32, {{SgprB32}, {SgprB32, SgprB32}})
539 .Div(B32, {{VgprB32}, {VgprB32, VgprB32}})
540 .Uni(B64, {{SgprB64}, {SgprB64, SgprB64}})
541 .Div(B64, {{VgprB64}, {VgprB64, VgprB64}, SplitTo32});
542
543 addRulesForGOpcs({G_SHL}, Standard)
544 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32ZExt}})
545 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
547 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
548 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
549 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
550 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
551 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
552
553 addRulesForGOpcs({G_LSHR}, Standard)
554 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
555 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
557 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
558 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
559 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
560 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
561 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
562
563 addRulesForGOpcs({G_ASHR}, Standard)
564 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32ZExt}})
565 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
567 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
568 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
569 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
570 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
571 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
572
573 addRulesForGOpcs({G_FSHR}, Standard)
574 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
575 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
576
577 addRulesForGOpcs({G_FRAME_INDEX}).Any({{UniP5, _}, {{SgprP5}, {None}}});
578
579 addRulesForGOpcs({G_UBFX, G_SBFX}, Standard)
580 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}, S_BFE})
581 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
582 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32, Sgpr32}, S_BFE})
583 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32, Vgpr32}, V_BFE});
584
585 addRulesForGOpcs({G_SMIN, G_SMAX}, Standard)
586 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32SExt}})
587 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
588 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
589 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
591 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
592
593 addRulesForGOpcs({G_UMIN, G_UMAX}, Standard)
594 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
595 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
596 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
597 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
599 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
600
601 // Note: we only write S1 rules for G_IMPLICIT_DEF, G_CONSTANT, G_FCONSTANT
602 // and G_FREEZE here, rest is trivially regbankselected earlier
603 addRulesForGOpcs({G_IMPLICIT_DEF}).Any({{UniS1}, {{Sgpr32Trunc}, {}}});
604 addRulesForGOpcs({G_CONSTANT})
605 .Any({{UniS1, _}, {{Sgpr32Trunc}, {None}, UniCstExt}});
606 addRulesForGOpcs({G_FREEZE}).Any({{DivS1}, {{Vcc}, {Vcc}}});
607
608 Predicate isSignedICmp([](const MachineInstr &MI) -> bool {
609 auto Pred =
610 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
611 return CmpInst::isSigned(Pred);
612 });
613
614 Predicate isEqualityICmp([](const MachineInstr &MI) -> bool {
615 auto Pred =
616 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
617 return ICmpInst::isEquality(Pred);
618 });
619
620 bool HasScalarCompareEq64 = ST->hasScalarCompareEq64();
621 // clang-format off
622 addRulesForGOpcs({G_ICMP})
623 .Any({{{UniS1, _, S16}, isEqualityICmp}, {{Sgpr32Trunc}, {None, Sgpr32ZExt, Sgpr32ZExt}}})
624 .Any({{{UniS1, _, S16}, !isEqualityICmp && isSignedICmp}, {{Sgpr32Trunc}, {None, Sgpr32SExt, Sgpr32SExt}}})
625 .Any({{{UniS1, _, S16}, !isEqualityICmp && !isSignedICmp}, {{Sgpr32Trunc}, {None, Sgpr32ZExt, Sgpr32ZExt}}})
626 .Any({{{DivS1, _, S16}}, {{Vcc}, {None, Vgpr16, Vgpr16}}})
627 .Any({{{UniS1, _, S32}}, {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}})
628 .Any({{{DivS1, _, S32}}, {{Vcc}, {None, Vgpr32, Vgpr32}}})
629 .Any({{{UniS1, _, S64}, isEqualityICmp}, {{Sgpr32Trunc}, {None, Sgpr64, Sgpr64}}}, HasScalarCompareEq64)
630 .Any({{{UniS1, _, S64}, isEqualityICmp}, {{UniInVcc}, {None, Vgpr64, Vgpr64}}}, !HasScalarCompareEq64)
631 .Any({{{UniS1, _, S64}, !isEqualityICmp}, {{UniInVcc}, {None, Vgpr64, Vgpr64}}})
632 .Any({{{DivS1, _, S64}}, {{Vcc}, {None, Vgpr64, Vgpr64}}})
633 .Any({{{UniS1, _, Ptr32}}, {{Sgpr32Trunc}, {None, SgprPtr32, SgprPtr32}}})
634 .Any({{{DivS1, _, Ptr32}}, {{Vcc}, {None, VgprPtr32, VgprPtr32}}})
635 .Any({{{UniS1, _, Ptr64}, isEqualityICmp}, {{Sgpr32Trunc}, {None, SgprPtr64, SgprPtr64}}}, HasScalarCompareEq64)
636 .Any({{{UniS1, _, Ptr64}, isEqualityICmp}, {{UniInVcc}, {None, VgprPtr64, VgprPtr64}}}, !HasScalarCompareEq64)
637 .Any({{{UniS1, _, Ptr64}, !isEqualityICmp}, {{UniInVcc}, {None, VgprPtr64, VgprPtr64}}})
638 .Any({{{DivS1, _, Ptr64}}, {{Vcc}, {None, VgprPtr64, VgprPtr64}}});
639 // clang-format on
640
641 addRulesForGOpcs({G_BRCOND})
642 .Any({{UniS1}, {{}, {Sgpr32AExtBoolInReg}}})
643 .Any({{DivS1}, {{}, {Vcc}}});
644
645 addRulesForGOpcs({G_BR}).Any({{_}, {{}, {None}}});
646
647 addRulesForGOpcs({G_SELECT}, StandardB)
648 .Any({{DivS16}, {{Vgpr16}, {Vcc, Vgpr16, Vgpr16}}})
650 .Div(B32, {{VgprB32}, {Vcc, VgprB32, VgprB32}})
654
655 addRulesForGOpcs({G_ANYEXT})
656 .Any({{UniS16, S1}, {{None}, {None}}}) // should be combined away
657 .Any({{UniS32, S1}, {{None}, {None}}}) // should be combined away
658 .Any({{UniS64, S1}, {{None}, {None}}}) // should be combined away
659 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
660 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
661 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
662 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
663 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
664 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
665 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
666
667 bool Has16bitCmp = ST->has16BitInsts();
668
669 // In global-isel G_TRUNC in-reg is treated as no-op, inst selected into COPY.
670 // It is up to user to deal with truncated bits.
671 addRulesForGOpcs({G_TRUNC})
672 .Any({{UniS1, UniS16}, {{None}, {None}}}) // should be combined away
673 .Any({{UniS1, UniS32}, {{None}, {None}}}) // should be combined away
674 .Any({{UniS1, UniS64}, {{None}, {None}}}) // should be combined away
675 .Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}})
676 .Any({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}})
677 .Any({{UniS32, S64}, {{Sgpr32}, {Sgpr64}}})
678 .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}})
679 .Any({{UniV2S16, V2S32}, {{SgprV2S16}, {SgprV2S32}}})
680 .Any({{DivV2S16, V2S32}, {{VgprV2S16}, {VgprV2S32}}})
681 // This is non-trivial. VgprToVccCopy is done using compare instruction.
682 .Any({{DivS1, DivS16}, {{Vcc}, {Vgpr16}, VgprToVccCopy}}, Has16bitCmp)
684 !Has16bitCmp)
685 .Any({{DivS1, DivS32}, {{Vcc}, {Vgpr32}, VgprToVccCopy}})
686 .Any({{DivS1, DivS64}, {{Vcc}, {Vgpr64}, VgprToVccCopy}});
687
688 addRulesForGOpcs({G_ZEXT})
692 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
693 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
694 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
695 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
696 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
697 // not extending S16 to S32 is questionable.
698 .Any({{UniS64, S16}, {{Sgpr64}, {Sgpr32ZExt}, Ext32To64}})
699 .Any({{DivS64, S16}, {{Vgpr64}, {Vgpr32ZExt}, Ext32To64}})
700 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
701 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
702
703 addRulesForGOpcs({G_SEXT})
707 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
708 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
709 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
710 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
711 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
712 // not extending S16 to S32 is questionable.
713 .Any({{UniS64, S16}, {{Sgpr64}, {Sgpr32SExt}, Ext32To64}})
714 .Any({{DivS64, S16}, {{Vgpr64}, {Vgpr32SExt}, Ext32To64}})
715 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
716 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
717
718 addRulesForGOpcs({G_SEXT_INREG})
719 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}})
720 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
721 .Any({{UniS64, S64}, {{Sgpr64}, {Sgpr64}}})
723
724 addRulesForGOpcs({G_ASSERT_ZEXT, G_ASSERT_SEXT}, Standard)
725 .Uni(S32, {{Sgpr32}, {Sgpr32, Imm}})
726 .Div(S32, {{Vgpr32}, {Vgpr32, Imm}})
727 .Uni(S64, {{Sgpr64}, {Sgpr64, Imm}})
728 .Div(S64, {{Vgpr64}, {Vgpr64, Imm}});
729
730 // Atomic read-modify-write operations: result and value are always VGPR,
731 // pointer varies by address space.
732 addRulesForGOpcs({G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_XCHG,
733 G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR})
734 .Any({{S32, P0}, {{Vgpr32}, {VgprP0, Vgpr32}}})
735 .Any({{S64, P0}, {{Vgpr64}, {VgprP0, Vgpr64}}})
736 .Any({{S32, P1}, {{Vgpr32}, {VgprP1, Vgpr32}}})
737 .Any({{S64, P1}, {{Vgpr64}, {VgprP1, Vgpr64}}})
738 .Any({{S32, P3}, {{Vgpr32}, {VgprP3, Vgpr32}}})
739 .Any({{S64, P3}, {{Vgpr64}, {VgprP3, Vgpr64}}});
740
741 addRulesForGOpcs({G_ATOMIC_CMPXCHG})
742 .Any({{DivS32, P2}, {{Vgpr32}, {VgprP2, Vgpr32, Vgpr32}}})
743 .Any({{DivS64, P2}, {{Vgpr64}, {VgprP2, Vgpr64, Vgpr64}}})
744 .Any({{DivS32, P3}, {{Vgpr32}, {VgprP3, Vgpr32, Vgpr32}}})
745 .Any({{DivS64, P3}, {{Vgpr64}, {VgprP3, Vgpr64, Vgpr64}}});
746
747 addRulesForGOpcs({G_AMDGPU_ATOMIC_CMPXCHG})
748 .Any({{DivS32, P0}, {{Vgpr32}, {VgprP0, VgprV2S32}}})
749 .Any({{DivS32, P1}, {{Vgpr32}, {VgprP1, VgprV2S32}}})
750 .Any({{DivS64, P0}, {{Vgpr64}, {VgprP0, VgprV2S64}}})
751 .Any({{DivS64, P1}, {{Vgpr64}, {VgprP1, VgprV2S64}}});
752
753 bool hasSMRDx3 = ST->hasScalarDwordx3Loads();
754 bool hasSMRDSmall = ST->hasScalarSubwordLoads();
755 bool usesTrue16 = ST->useRealTrue16Insts();
756
757 Predicate isAlign16([](const MachineInstr &MI) -> bool {
758 return (*MI.memoperands_begin())->getAlign() >= Align(16);
759 });
760
761 Predicate isAlign4([](const MachineInstr &MI) -> bool {
762 return (*MI.memoperands_begin())->getAlign() >= Align(4);
763 });
764
765 Predicate isAtomicMMO([](const MachineInstr &MI) -> bool {
766 return (*MI.memoperands_begin())->isAtomic();
767 });
768
769 Predicate isUniMMO([](const MachineInstr &MI) -> bool {
770 return AMDGPU::isUniformMMO(*MI.memoperands_begin());
771 });
772
773 Predicate isConst([](const MachineInstr &MI) -> bool {
774 // Address space in MMO be different then address space on pointer.
775 const MachineMemOperand *MMO = *MI.memoperands_begin();
776 const unsigned AS = MMO->getAddrSpace();
777 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
779 });
780
781 Predicate isVolatileMMO([](const MachineInstr &MI) -> bool {
782 return (*MI.memoperands_begin())->isVolatile();
783 });
784
785 Predicate isInvMMO([](const MachineInstr &MI) -> bool {
786 return (*MI.memoperands_begin())->isInvariant();
787 });
788
789 Predicate isNoClobberMMO([](const MachineInstr &MI) -> bool {
790 return (*MI.memoperands_begin())->getFlags() & MONoClobber;
791 });
792
793 Predicate isNaturalAligned([](const MachineInstr &MI) -> bool {
794 const MachineMemOperand *MMO = *MI.memoperands_begin();
795 return MMO->getAlign() >= Align(MMO->getSize().getValue());
796 });
797
798 Predicate is8Or16BitMMO([](const MachineInstr &MI) -> bool {
799 const MachineMemOperand *MMO = *MI.memoperands_begin();
800 const unsigned MemSize = 8 * MMO->getSize().getValue();
801 return MemSize == 16 || MemSize == 8;
802 });
803
804 Predicate is32BitMMO([](const MachineInstr &MI) -> bool {
805 const MachineMemOperand *MMO = *MI.memoperands_begin();
806 return 8 * MMO->getSize().getValue() == 32;
807 });
808
809 auto isUL = !isAtomicMMO && isUniMMO && (isConst || !isVolatileMMO) &&
810 (isConst || isInvMMO || isNoClobberMMO);
811
812 // clang-format off
813 // TODO: S32Dst, 16-bit any-extending load should not appear on True16 targets
814 addRulesForGOpcs({G_LOAD})
815 // flat, addrspace(0), never uniform - flat_load
816 .Any({{DivS16, P0}, {{Vgpr16}, {VgprP0}}}, usesTrue16)
817 .Any({{DivB32, P0}, {{VgprB32}, {VgprP0}}}) // 32-bit load, 8-bit and 16-bit any-extending load
818 .Any({{DivB64, P0}, {{VgprB64}, {VgprP0}}})
819 .Any({{DivB96, P0}, {{VgprB96}, {VgprP0}}})
820 .Any({{DivB128, P0}, {{VgprB128}, {VgprP0}}})
821
822 // global, addrspace(1)
823 // divergent - global_load
824 .Any({{DivS16, P1}, {{Vgpr16}, {VgprP1}}}, usesTrue16)
825 .Any({{DivB32, P1}, {{VgprB32}, {VgprP1}}}) //32-bit load, 8-bit and 16-bit any-extending load
826 .Any({{DivB64, P1}, {{VgprB64}, {VgprP1}}})
827 .Any({{DivB96, P1}, {{VgprB96}, {VgprP1}}})
828 .Any({{DivB128, P1}, {{VgprB128}, {VgprP1}}})
829 .Any({{DivB256, P1}, {{VgprB256}, {VgprP1}, SplitLoad}})
830 .Any({{DivB512, P1}, {{VgprB512}, {VgprP1}, SplitLoad}})
831
832 // uniform - s_load
833 .Any({{{UniS16, P1}, isNaturalAligned && isUL}, {{Sgpr32Trunc}, {SgprP1}}}, usesTrue16 && hasSMRDSmall) // s16 load
834 .Any({{{UniS16, P1}, isAlign4 && isUL}, {{Sgpr32Trunc}, {SgprP1}, WidenMMOToS32}}, usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
835 .Any({{{UniB32, P1}, isNaturalAligned && isUL}, {{SgprB32}, {SgprP1}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
836 // TODO: SplitLoad when !isNaturalAligned && isUL and target hasSMRDSmall
837 .Any({{{UniB32, P1}, is8Or16BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP1}, WidenMMOToS32}}, !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
838 .Any({{{UniB32, P1}, is32BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP1}}}) //32-bit load
839 .Any({{{UniB64, P1}, isAlign4 && isUL}, {{SgprB64}, {SgprP1}}})
840 .Any({{{UniB96, P1}, isAlign16 && isUL}, {{SgprB96}, {SgprP1}, WidenLoad}}, !hasSMRDx3)
841 .Any({{{UniB96, P1}, isAlign4 && !isAlign16 && isUL}, {{SgprB96}, {SgprP1}, SplitLoad}}, !hasSMRDx3)
842 .Any({{{UniB96, P1}, isAlign4 && isUL}, {{SgprB96}, {SgprP1}}}, hasSMRDx3)
843 .Any({{{UniB128, P1}, isAlign4 && isUL}, {{SgprB128}, {SgprP1}}})
844 .Any({{{UniB256, P1}, isAlign4 && isUL}, {{SgprB256}, {SgprP1}}})
845 .Any({{{UniB512, P1}, isAlign4 && isUL}, {{SgprB512}, {SgprP1}}})
846
847 // Uniform via global or buffer load, for example volatile or non-aligned
848 // uniform load. Not using standard {{UniInVgprTy}, {VgprP1}} since it is
849 // selected as global_load, use SgprP1 for pointer instead to match
850 // patterns without flat-for-global, default for GFX7 and older.
851 // -> +flat-for-global + {{UniInVgprTy}, {SgprP1}} - global_load
852 // -> -flat-for-global + {{UniInVgprTy}, {SgprP1}} - buffer_load
853 .Any({{{UniS16, P1}, !isNaturalAligned || !isUL}, {{UniInVgprS16}, {SgprP1}}}, usesTrue16 && hasSMRDSmall) // s16 load
854 .Any({{{UniS16, P1}, !isAlign4 || !isUL}, {{UniInVgprS16}, {SgprP1}}}, usesTrue16 && !hasSMRDSmall) // s16 load
855 .Any({{{UniB32, P1}, !isNaturalAligned || !isUL}, {{UniInVgprB32}, {SgprP1}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
856 .Any({{{UniB32, P1}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP1}}}, !hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
857 .Any({{{UniB64, P1}, !isAlign4 || !isUL}, {{UniInVgprB64}, {SgprP1}}})
858 .Any({{{UniB96, P1}, !isAlign4 || !isUL}, {{UniInVgprB96}, {SgprP1}}})
859 .Any({{{UniB128, P1}, !isAlign4 || !isUL}, {{UniInVgprB128}, {SgprP1}}})
860 .Any({{{UniB256, P1}, !isAlign4 || !isUL}, {{UniInVgprB256}, {SgprP1}, SplitLoad}})
861 .Any({{{UniB512, P1}, !isAlign4 || !isUL}, {{UniInVgprB512}, {SgprP1}, SplitLoad}})
862
863 // local, addrspace(3) - ds_load
864 .Any({{DivS16, P3}, {{Vgpr16}, {VgprP3}}}, usesTrue16)
865 .Any({{DivB32, P3}, {{VgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
866 .Any({{DivB64, P3}, {{VgprB64}, {VgprP3}}})
867 .Any({{DivB96, P3}, {{VgprB96}, {VgprP3}}})
868 .Any({{DivB128, P3}, {{VgprB128}, {VgprP3}}})
869
870 .Any({{UniS16, P3}, {{UniInVgprS16}, {SgprP3}}}, usesTrue16) // 16-bit load
871 .Any({{UniB32, P3}, {{UniInVgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
872 .Any({{UniB64, P3}, {{UniInVgprB64}, {VgprP3}}})
873 .Any({{UniB96, P3}, {{UniInVgprB96}, {VgprP3}}})
874 .Any({{UniB128, P3}, {{UniInVgprB128}, {VgprP3}}})
875
876 // constant, addrspace(4)
877 // divergent - global_load
878 .Any({{DivS16, P4}, {{Vgpr16}, {VgprP4}}}, usesTrue16)
879 .Any({{DivB32, P4}, {{VgprB32}, {VgprP4}}}) //32-bit load, 8-bit and 16-bit any-extending load
880 .Any({{DivB64, P4}, {{VgprB64}, {VgprP4}}})
881 .Any({{DivB96, P4}, {{VgprB96}, {VgprP4}}})
882 .Any({{DivB128, P4}, {{VgprB128}, {VgprP4}}})
883 .Any({{DivB256, P4}, {{VgprB256}, {VgprP4}, SplitLoad}})
884 .Any({{DivB512, P4}, {{VgprB512}, {VgprP4}, SplitLoad}})
885
886 // uniform - s_load
887 .Any({{{UniS16, P4}, isNaturalAligned && isUL}, {{Sgpr32Trunc}, {SgprP4}}}, usesTrue16 && hasSMRDSmall) // s16 load
888 .Any({{{UniS16, P4}, isAlign4 && isUL}, {{Sgpr32Trunc}, {SgprP4}, WidenMMOToS32}}, usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
889 .Any({{{UniB32, P4}, isNaturalAligned && isUL}, {{SgprB32}, {SgprP4}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
890 .Any({{{UniB32, P4}, is8Or16BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP4}, WidenMMOToS32}}, !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
891 .Any({{{UniB32, P4}, is32BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP4}}}) //32-bit load
892 .Any({{{UniB64, P4}, isAlign4 && isUL}, {{SgprB64}, {SgprP4}}})
893 .Any({{{UniB96, P4}, isAlign16 && isUL}, {{SgprB96}, {SgprP4}, WidenLoad}}, !hasSMRDx3)
894 .Any({{{UniB96, P4}, isAlign4 && !isAlign16 && isUL}, {{SgprB96}, {SgprP4}, SplitLoad}}, !hasSMRDx3)
895 .Any({{{UniB96, P4}, isAlign4 && isUL}, {{SgprB96}, {SgprP4}}}, hasSMRDx3)
896 .Any({{{UniB128, P4}, isAlign4 && isUL}, {{SgprB128}, {SgprP4}}})
897 .Any({{{UniB256, P4}, isAlign4 && isUL}, {{SgprB256}, {SgprP4}}})
898 .Any({{{UniB512, P4}, isAlign4 && isUL}, {{SgprB512}, {SgprP4}}})
899
900 // uniform in vgpr - global_load or buffer_load
901 .Any({{{UniS16, P4}, !isNaturalAligned || !isUL}, {{UniInVgprS16}, {SgprP4}}}, usesTrue16 && hasSMRDSmall) // s16 load
902 .Any({{{UniS16, P4}, !isAlign4 || !isUL}, {{UniInVgprS16}, {SgprP4}}}, usesTrue16 && !hasSMRDSmall) // s16 load
903 .Any({{{UniB32, P4}, !isNaturalAligned || !isUL}, {{UniInVgprB32}, {SgprP4}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
904 .Any({{{UniB32, P4}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP4}}}, !hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
905 .Any({{{UniB64, P4}, !isAlign4 || !isUL}, {{UniInVgprB64}, {SgprP4}}})
906 .Any({{{UniB96, P4}, !isAlign4 || !isUL}, {{UniInVgprB96}, {SgprP4}}})
907 .Any({{{UniB128, P4}, !isAlign4 || !isUL}, {{UniInVgprB128}, {SgprP4}}})
908 .Any({{{UniB256, P4}, !isAlign4 || !isUL}, {{UniInVgprB256}, {SgprP4}, SplitLoad}})
909 .Any({{{UniB512, P4}, !isAlign4 || !isUL}, {{UniInVgprB512}, {SgprP4}, SplitLoad}})
910
911 // private, addrspace(5), never uniform - scratch_load
912 .Any({{DivS16, P5}, {{Vgpr16}, {VgprP5}}}, usesTrue16)
913 .Any({{DivB32, P5}, {{VgprB32}, {VgprP5}}}) // 32-bit load, 8-bit and 16-bit any-extending load
914 .Any({{DivB64, P5}, {{VgprB64}, {VgprP5}}})
915 .Any({{DivB96, P5}, {{VgprB96}, {VgprP5}}})
916 .Any({{DivB128, P5}, {{VgprB128}, {VgprP5}}})
917
918 .Any({{DivS32, Ptr128}, {{Vgpr32}, {VgprPtr128}}});
919
920
921 addRulesForGOpcs({G_ZEXTLOAD, G_SEXTLOAD}) // i8 and i16 zeroextending loads
922 .Any({{DivS32, P0}, {{Vgpr32}, {VgprP0}}})
923
924 .Any({{DivS32, P1}, {{Vgpr32}, {VgprP1}}})
925 .Any({{{UniS32, P1}, isAlign4 && isUL}, {{Sgpr32}, {SgprP1}, WidenMMOToS32}}, !hasSMRDSmall)
926 .Any({{{UniS32, P1}, isNaturalAligned && isUL}, {{Sgpr32}, {SgprP1}}}, hasSMRDSmall)
927 .Any({{{UniS32, P1}, !isAlign4 || !isUL}, {{UniInVgprS32}, {SgprP1}}}, !hasSMRDSmall)
928 .Any({{{UniS32, P1}, !isNaturalAligned || !isUL}, {{UniInVgprS32}, {SgprP1}}}, hasSMRDSmall)
929
930 .Any({{DivS32, P3}, {{Vgpr32}, {VgprP3}}})
931 .Any({{UniS32, P3}, {{UniInVgprS32}, {VgprP3}}})
932
933 .Any({{DivS32, P4}, {{Vgpr32}, {VgprP4}}})
934 .Any({{{UniS32, P4}, isAlign4 && isUL}, {{Sgpr32}, {SgprP4}, WidenMMOToS32}}, !hasSMRDSmall)
935 .Any({{{UniS32, P4}, isNaturalAligned && isUL}, {{Sgpr32}, {SgprP4}}}, hasSMRDSmall)
936 .Any({{{UniS32, P4}, !isAlign4 || !isUL}, {{UniInVgprS32}, {SgprP4}}}, !hasSMRDSmall)
937 .Any({{{UniS32, P4}, !isNaturalAligned || !isUL}, {{UniInVgprS32}, {SgprP4}}}, hasSMRDSmall)
938
939 .Any({{DivS32, P5}, {{Vgpr32}, {VgprP5}}});
940
941 addRulesForGOpcs({G_STORE})
942 // addrspace(0)
943 .Any({{S16, P0}, {{}, {Vgpr16, VgprP0}}}, usesTrue16) // 16-bit store
944 .Any({{B32, P0}, {{}, {VgprB32, VgprP0}}}) // 32-bit store, 8-bit and 16-bit truncating store
945 .Any({{B64, P0}, {{}, {VgprB64, VgprP0}}})
946 .Any({{B96, P0}, {{}, {VgprB96, VgprP0}}})
947 .Any({{B128, P0}, {{}, {VgprB128, VgprP0}}})
948
949 // addrspace(1), there are no stores to addrspace(4)
950 // For targets:
951 // - with "+flat-for-global" - global_store
952 // - without(-flat-for-global) - buffer_store addr64
953 .Any({{S16, DivP1}, {{}, {Vgpr16, VgprP1}}}, usesTrue16) // 16-bit store
954 .Any({{B32, DivP1}, {{}, {VgprB32, VgprP1}}}) // 32-bit store, 8-bit and 16-bit truncating store
955 .Any({{B64, DivP1}, {{}, {VgprB64, VgprP1}}})
956 .Any({{B96, DivP1}, {{}, {VgprB96, VgprP1}}})
957 .Any({{B128, DivP1}, {{}, {VgprB128, VgprP1}}})
958
959 // For UniP1, use sgpr ptr to match flat-for-global patterns. Targets:
960 // - with "+flat-for-global" - global_store for both sgpr and vgpr ptr
961 // - without(-flat-for-global) - need sgpr ptr to select buffer_store
962 .Any({{S16, UniP1}, {{}, {Vgpr16, SgprP1}}}, usesTrue16) // 16-bit store
963 .Any({{B32, UniP1}, {{}, {VgprB32, SgprP1}}}) // 32-bit store, 8-bit and 16-bit truncating store
964 .Any({{B64, UniP1}, {{}, {VgprB64, SgprP1}}})
965 .Any({{B96, UniP1}, {{}, {VgprB96, SgprP1}}})
966 .Any({{B128, UniP1}, {{}, {VgprB128, SgprP1}}})
967
968 // addrspace(3) and addrspace(5)
969 .Any({{S16, Ptr32}, {{}, {Vgpr16, VgprPtr32}}}, usesTrue16) // 16-bit store
970 .Any({{B32, Ptr32}, {{}, {VgprB32, VgprPtr32}}}) // 32-bit store, 8-bit and 16-bit truncating store
971 .Any({{B64, Ptr32}, {{}, {VgprB64, VgprPtr32}}})
972 .Any({{B96, Ptr32}, {{}, {VgprB96, VgprPtr32}}})
973 .Any({{B128, Ptr32}, {{}, {VgprB128, VgprPtr32}}});
974 // clang-format on
975
976 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD, G_AMDGPU_BUFFER_LOAD_FORMAT,
977 G_AMDGPU_TBUFFER_LOAD_FORMAT},
978 StandardB)
987
988 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD_USHORT, G_AMDGPU_BUFFER_LOAD_UBYTE,
989 G_AMDGPU_BUFFER_LOAD_SSHORT, G_AMDGPU_BUFFER_LOAD_SBYTE},
990 StandardB)
993
994 addRulesForGOpcs({G_AMDGPU_BUFFER_STORE, G_AMDGPU_BUFFER_STORE_FORMAT,
995 G_AMDGPU_BUFFER_STORE_FORMAT_D16,
996 G_AMDGPU_TBUFFER_STORE_FORMAT})
1000 .Any(
1002 .Any(
1004 .Any({{V4S32},
1006
1007 addRulesForGOpcs({G_PTR_ADD})
1008 .Any({{UniPtr32}, {{SgprPtr32}, {SgprPtr32, Sgpr32}}})
1009 .Any({{DivPtr32}, {{VgprPtr32}, {VgprPtr32, Vgpr32}}})
1010 .Any({{UniPtr64}, {{SgprPtr64}, {SgprPtr64, Sgpr64}}})
1011 .Any({{DivPtr64}, {{VgprPtr64}, {VgprPtr64, Vgpr64}}});
1012
1013 addRulesForGOpcs({G_INTTOPTR})
1014 .Any({{UniPtr32}, {{SgprPtr32}, {Sgpr32}}})
1015 .Any({{DivPtr32}, {{VgprPtr32}, {Vgpr32}}})
1016 .Any({{UniPtr64}, {{SgprPtr64}, {Sgpr64}}})
1017 .Any({{DivPtr64}, {{VgprPtr64}, {Vgpr64}}})
1018 .Any({{UniPtr128}, {{SgprPtr128}, {Sgpr128}}})
1019 .Any({{DivPtr128}, {{VgprPtr128}, {Vgpr128}}});
1020
1021 addRulesForGOpcs({G_PTRTOINT})
1022 .Any({{UniS32}, {{Sgpr32}, {SgprPtr32}}})
1023 .Any({{DivS32}, {{Vgpr32}, {VgprPtr32}}})
1024 .Any({{UniS64}, {{Sgpr64}, {SgprPtr64}}})
1025 .Any({{DivS64}, {{Vgpr64}, {VgprPtr64}}})
1026 .Any({{UniS128}, {{Sgpr128}, {SgprPtr128}}})
1027 .Any({{DivS128}, {{Vgpr128}, {VgprPtr128}}});
1028
1029 // FIXME: Update llvm/test/CodeGen/AMDGPU/ptrmask.ll to use GlobalISel.
1030 // Currently crashes on P8 (buffer resource) tests due to legalizer issue.
1031 addRulesForGOpcs({G_PTRMASK})
1032 .Any({{UniP1}, {{SgprP1}, {SgprP1, Sgpr64}}})
1033 .Any({{DivP1}, {{VgprP1}, {VgprP1, Vgpr64}}})
1034 .Any({{UniP3}, {{SgprP3}, {SgprP3, Sgpr32}}})
1035 .Any({{DivP3}, {{VgprP3}, {VgprP3, Vgpr32}}});
1036
1037 addRulesForGOpcs({G_ABS}, Standard).Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt}});
1038
1039 addRulesForGOpcs({G_BITREVERSE}, Standard)
1040 .Uni(S32, {{Sgpr32}, {Sgpr32}})
1041 .Div(S32, {{Vgpr32}, {Vgpr32}})
1042 .Uni(S64, {{Sgpr64}, {Sgpr64}})
1043 .Div(S64, {{Vgpr64}, {Vgpr64}});
1044
1045 addRulesForGOpcs({G_FENCE}).Any({{{}}, {{}, {}}});
1046
1047 addRulesForGOpcs({G_READSTEADYCOUNTER, G_READCYCLECOUNTER}, Standard)
1048 .Uni(S64, {{Sgpr64}, {}});
1049
1050 addRulesForGOpcs({G_BLOCK_ADDR}).Any({{UniP0}, {{SgprP0}, {}}});
1051
1052 addRulesForGOpcs({G_GLOBAL_VALUE})
1053 .Any({{UniP0}, {{SgprP0}, {}}})
1054 .Any({{UniP1}, {{SgprP1}, {}}})
1055 .Any({{UniP3}, {{SgprP3}, {}}})
1056 .Any({{UniP4}, {{SgprP4}, {}}})
1057 .Any({{UniP8}, {{SgprP8}, {}}});
1058
1059 addRulesForGOpcs({G_AMDGPU_WAVE_ADDRESS}).Any({{UniP5}, {{SgprP5}, {}}});
1060
1061 bool hasSALUFloat = ST->hasSALUFloatInsts();
1062
1063 addRulesForGOpcs({G_FADD, G_FMUL, G_STRICT_FADD, G_STRICT_FMUL}, Standard)
1064 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUFloat)
1065 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUFloat)
1066 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
1067 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUFloat)
1068 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUFloat)
1069 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
1070 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64}})
1071 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}})
1072 .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}}, !hasSALUFloat)
1074 hasSALUFloat)
1075 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
1076
1077 addRulesForGOpcs({G_FSUB, G_STRICT_FSUB}, Standard)
1078 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
1079 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
1080 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUFloat)
1081 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUFloat)
1082 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUFloat)
1083 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUFloat);
1084
1085 addRulesForGOpcs({G_FMAD}, Standard)
1086 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}})
1087 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
1088 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
1089 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
1090
1091 addRulesForGOpcs({G_FLDEXP, G_STRICT_FLDEXP}, Standard)
1092 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}})
1093 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
1094 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}})
1095 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
1096 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr32}})
1097 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
1098
1099 addRulesForGOpcs({G_FMA}, Standard)
1100 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
1101 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
1102 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64, Vgpr64}})
1103 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64, Vgpr64}})
1107 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16, Sgpr16}}, hasSALUFloat)
1108 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}}, !hasSALUFloat)
1109 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}}, hasSALUFloat)
1110 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}}, !hasSALUFloat)
1111 .Uni(V2S16,
1113 hasSALUFloat)
1115 !hasSALUFloat);
1116
1117 addRulesForGOpcs({G_AMDGPU_FMED3}, Standard)
1118 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}})
1119 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
1120 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
1121 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
1122
1123 // TODO: This opcode is generated from the i64->i16 signed clamped pattern in
1124 // the PreLegalizerCombiner. Move the combine to RegBankCombiner to keep more
1125 // instructions on SALU.
1126 addRulesForGOpcs({G_AMDGPU_SMED3}, Standard)
1127 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
1128 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
1129
1130 // FNEG and FABS are either folded as source modifiers or can be selected as
1131 // bitwise XOR and AND with Mask. XOR and AND are available on SALU but for
1132 // targets without SALU float we still select them as VGPR since there would
1133 // be no real sgpr use.
1134 addRulesForGOpcs({G_FNEG, G_FABS}, Standard)
1135 .Uni(S16, {{UniInVgprS16}, {Vgpr16}}, !hasSALUFloat)
1136 .Uni(S16, {{Sgpr16}, {Sgpr16}}, hasSALUFloat)
1137 .Div(S16, {{Vgpr16}, {Vgpr16}})
1138 .Uni(S32, {{UniInVgprS32}, {Vgpr32}}, !hasSALUFloat)
1139 .Uni(S32, {{Sgpr32}, {Sgpr32}}, hasSALUFloat)
1140 .Div(S32, {{Vgpr32}, {Vgpr32}})
1141 .Uni(S64, {{UniInVgprS64}, {Vgpr64}})
1142 .Div(S64, {{Vgpr64}, {Vgpr64}})
1143 .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16}}, !hasSALUFloat)
1144 .Uni(V2S16, {{SgprV2S16}, {SgprV2S16}, ScalarizeToS16}, hasSALUFloat)
1145 .Div(V2S16, {{VgprV2S16}, {VgprV2S16}})
1146 .Any({{UniV2S32}, {{UniInVgprV2S32}, {VgprV2S32}}})
1147 .Any({{DivV2S32}, {{VgprV2S32}, {VgprV2S32}}});
1148
1149 addRulesForGOpcs({G_FCANONICALIZE}, Standard)
1150 .Uni(S32, {{UniInVgprS32}, {Vgpr32}})
1151 .Div(S32, {{Vgpr32}, {Vgpr32}})
1152 .Uni(S16, {{UniInVgprS16}, {Vgpr16}})
1153 .Div(S16, {{Vgpr16}, {Vgpr16}})
1154 .Uni(S64, {{UniInVgprS64}, {Vgpr64}})
1155 .Div(S64, {{Vgpr64}, {Vgpr64}})
1156 .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16}})
1157 .Div(V2S16, {{VgprV2S16}, {VgprV2S16}})
1158 .Any({{UniV2S32}, {{UniInVgprV2S32}, {VgprV2S32}}})
1159 .Any({{DivV2S32}, {{VgprV2S32}, {VgprV2S32}}});
1160
1161 addRulesForGOpcs({G_FPTOUI})
1162 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
1163 .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat);
1164
1165 addRulesForGOpcs({G_UITOFP})
1166 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
1167 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
1168 .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat);
1169
1170 addRulesForGOpcs({G_FPEXT})
1171 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}})
1172 .Any({{UniS64, S32}, {{UniInVgprS64}, {Vgpr32}}})
1173 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}}})
1174 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}}, hasSALUFloat)
1175 .Any({{UniS32, S16}, {{UniInVgprS32}, {Vgpr16}}}, !hasSALUFloat);
1176
1177 addRulesForGOpcs({G_AMDGPU_CVT_PK_I16_I32}, Standard)
1178 .Uni(V2S16, {{UniInVgprV2S16}, {Vgpr32, Vgpr32}})
1179 .Div(V2S16, {{VgprV2S16}, {Vgpr32, Vgpr32}});
1180
1181 addRulesForGOpcs({G_AMDGPU_FMIN_LEGACY, G_AMDGPU_FMAX_LEGACY}, Standard)
1182 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}})
1183 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}});
1184
1185 addRulesForGOpcs({G_FPTRUNC})
1186 .Any({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}})
1187 .Any({{UniS32, S64}, {{UniInVgprS32}, {Vgpr64}}})
1188 .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}})
1190 .Any({{DivV2S16, V2S32}, {{VgprV2S16}, {VgprV2S32}}})
1191 .Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}}, hasSALUFloat)
1192 .Any({{UniS16, S32}, {{UniInVgprS16}, {Vgpr32}}}, !hasSALUFloat);
1193
1194 addRulesForGOpcs({G_IS_FPCLASS})
1195 .Any({{DivS1, S16}, {{Vcc}, {Vgpr16}}})
1196 .Any({{UniS1, S16}, {{UniInVcc}, {Vgpr16}}})
1197 .Any({{DivS1, S32}, {{Vcc}, {Vgpr32}}})
1198 .Any({{UniS1, S32}, {{UniInVcc}, {Vgpr32}}})
1199 .Any({{DivS1, S64}, {{Vcc}, {Vgpr64}}})
1200 .Any({{UniS1, S64}, {{UniInVcc}, {Vgpr64}}});
1201
1202 addRulesForGOpcs({G_FCMP}, Standard)
1203 .Any({{UniS1, _, S16}, {{Sgpr32Trunc}, {None, Sgpr16, Sgpr16}}},
1204 hasSALUFloat)
1205 .Any({{UniS1, _, S16}, {{UniInVcc}, {None, Vgpr16, Vgpr16}}},
1206 !hasSALUFloat)
1207 .Any({{DivS1, _, S16}, {{Vcc}, {None, Vgpr16, Vgpr16}}})
1208 .Any({{UniS1, _, S32}, {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}},
1209 hasSALUFloat)
1210 .Any({{UniS1, _, S32}, {{UniInVcc}, {None, Vgpr32, Vgpr32}}},
1211 !hasSALUFloat)
1212 .Any({{DivS1, _, S32}, {{Vcc}, {None, Vgpr32, Vgpr32}}})
1213 .Any({{UniS1, _, S64}, {{UniInVcc}, {None, Vgpr64, Vgpr64}}})
1214 .Any({{DivS1, _, S64}, {{Vcc}, {None, Vgpr64, Vgpr64}}});
1215
1216 using namespace Intrinsic;
1217
1218 addRulesForIOpcs({amdgcn_s_getpc}).Any({{UniS64, _}, {{Sgpr64}, {None}}});
1219
1220 addRulesForIOpcs({amdgcn_groupstaticsize}).Any({{S32}, {{Sgpr32}, {IntrId}}});
1221
1222 // This is "intrinsic lane mask" it was set to i32/i64 in llvm-ir.
1223 addRulesForIOpcs({amdgcn_end_cf})
1224 .Any({{_, UniS32}, {{}, {IntrId, Sgpr32}}})
1225 .Any({{_, UniS64}, {{}, {IntrId, Sgpr64}}});
1226
1227 addRulesForIOpcs({amdgcn_if_break}, Standard)
1228 .Uni(S64, {{Sgpr64}, {IntrId, Vcc, Sgpr64}})
1229 .Uni(S32, {{Sgpr32}, {IntrId, Vcc, Sgpr32}});
1230
1231 addRulesForIOpcs({amdgcn_mbcnt_lo, amdgcn_mbcnt_hi}, Standard)
1232 .Div(S32, {{}, {Vgpr32, None, Vgpr32, Vgpr32}});
1233
1234 addRulesForIOpcs({amdgcn_readfirstlane})
1235 .Any({{UniS32, _, DivS32}, {{}, {Sgpr32, None, Vgpr32}}})
1236 // this should not exist in the first place, it is from call lowering
1237 // readfirstlaning just in case register is not in sgpr.
1238 .Any({{UniS32, _, UniS32}, {{}, {Sgpr32, None, Vgpr32}}});
1239
1240 addRulesForIOpcs({amdgcn_mul_u24, amdgcn_mul_i24}, Standard)
1241 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32}})
1242 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32}})
1243 .Uni(S64, {{UniInVgprS64}, {IntrId, Vgpr32, Vgpr32}})
1244 .Div(S64, {{Vgpr64}, {IntrId, Vgpr32, Vgpr32}});
1245
1246 addRulesForIOpcs({amdgcn_mulhi_u24, amdgcn_mulhi_i24, amdgcn_fmul_legacy},
1247 Standard)
1248 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32}})
1249 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32}});
1250
1251 addRulesForIOpcs({amdgcn_frexp_mant, amdgcn_fract}, Standard)
1252 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr16}})
1253 .Div(S16, {{Vgpr16}, {IntrId, Vgpr16}})
1254 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32}})
1255 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}})
1256 .Uni(S64, {{UniInVgprS64}, {IntrId, Vgpr64}})
1257 .Div(S64, {{Vgpr64}, {IntrId, Vgpr64}});
1258
1259 addRulesForIOpcs({amdgcn_ubfe, amdgcn_sbfe}, Standard)
1260 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1261 .Uni(S32, {{Sgpr32}, {IntrId, Sgpr32, Sgpr32, Sgpr32}, S_BFE})
1262 .Uni(S64, {{Sgpr64}, {IntrId, Sgpr64, Sgpr32, Sgpr32}, S_BFE})
1263 .Div(S64, {{Vgpr64}, {IntrId, Vgpr64, Vgpr32, Vgpr32}, V_BFE});
1264
1265 addRulesForIOpcs({amdgcn_global_load_tr_b64})
1266 .Any({{DivB64}, {{VgprB64}, {IntrId, SgprP1}}})
1267 .Any({{DivB32}, {{VgprB32}, {IntrId, SgprP1}}});
1268
1269 addRulesForIOpcs({amdgcn_global_load_tr_b128})
1270 .Any({{DivB64}, {{VgprB64}, {IntrId, SgprP1}}})
1271 .Any({{DivB128}, {{VgprB128}, {IntrId, SgprP1}}});
1272
1273} // end initialize rules
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
constexpr LLT S16
constexpr LLT S1
constexpr LLT V2S16
constexpr LLT S32
constexpr LLT V4S32
constexpr LLT V3S32
constexpr LLT S64
constexpr LLT V2S32
constexpr LLT S128
UniformityLLTOpPredicateID LLTToBId(LLT Ty)
bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID, const MachineUniformityInfo &MUI, const MachineRegisterInfo &MRI)
UniformityLLTOpPredicateID LLTToId(LLT Ty)
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
Register Reg
Machine IR instance of the generic uniformity analysis.
bool operator()(const MachineInstr &MI) const
Predicate operator||(const Predicate &RHS) const
Predicate operator&&(const Predicate &RHS) const
Predicate(std::function< bool(const MachineInstr &)> Pred)
Predicate operator!() const
RegBankLegalizeRules(const GCNSubtarget &ST, MachineRegisterInfo &MRI)
const SetOfRulesForOpcode * getRulesForOpc(MachineInstr &MI) const
const RegBankLLTMapping * findMappingForMI(const MachineInstr &MI, const MachineRegisterInfo &MRI, const MachineUniformityInfo &MUI) const
void addFastRuleDivergent(UniformityLLTOpPredicateID Ty, RegBankLLTMapping RuleApplyIDs)
void addFastRuleUniform(UniformityLLTOpPredicateID Ty, RegBankLLTMapping RuleApplyIDs)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
bool isSigned() const
Definition InstrTypes.h:930
bool isDivergent(ConstValueRefT V) const
Whether V is divergent at its definition.
bool isUniform(ConstValueRefT V) const
Whether V is uniform/non-divergent.
bool isEquality() const
Return true if this predicate is either EQ or NE.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
TypeSize getValue() const
Representation of each machine instruction.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
unsigned getAddrSpace() const
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void swap(SmallVectorImpl &RHS)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
bool isAnyPtr(LLT Ty, unsigned Width)
bool isUniformMMO(const MachineMemOperand *MMO)
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
GenericUniformityInfo< MachineSSAContext > MachineUniformityInfo
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
Definition SIInstrInfo.h:44
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
SmallVector< UniformityLLTOpPredicateID, 4 > OpUniformityAndTypes
PredicateMapping(std::initializer_list< UniformityLLTOpPredicateID > OpList, std::function< bool(const MachineInstr &)> TestFunc=nullptr)
bool match(const MachineInstr &MI, const MachineUniformityInfo &MUI, const MachineRegisterInfo &MRI) const
std::function< bool(const MachineInstr &)> TestFunc
RegBankLLTMapping(std::initializer_list< RegBankLLTMappingApplyID > DstOpMappingList, std::initializer_list< RegBankLLTMappingApplyID > SrcOpMappingList, LoweringMethodID LoweringMethod=DoNotLower)
SmallVector< RegBankLLTMappingApplyID, 2 > DstOpMapping
SmallVector< RegBankLLTMappingApplyID, 4 > SrcOpMapping
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39