LLVM 22.0.0git
AMDGPURegBankLegalizeRules.cpp
Go to the documentation of this file.
1//===-- AMDGPURegBankLegalizeRules.cpp ------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// Definitions of RegBankLegalize Rules for all opcodes.
10/// Implementation of container for all the Rules and search.
11/// Fast search for most common case when Rule.Predicate checks LLT and
12/// uniformity of register in operand 0.
13//
14//===----------------------------------------------------------------------===//
15
17#include "AMDGPUInstrInfo.h"
18#include "GCNSubtarget.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
23
24#define DEBUG_TYPE "amdgpu-regbanklegalize"
25
26using namespace llvm;
27using namespace AMDGPU;
28
29bool AMDGPU::isAnyPtr(LLT Ty, unsigned Width) {
30 return Ty.isPointer() && Ty.getSizeInBits() == Width;
31}
32
34 std::initializer_list<RegBankLLTMappingApplyID> DstOpMappingList,
35 std::initializer_list<RegBankLLTMappingApplyID> SrcOpMappingList,
37 : DstOpMapping(DstOpMappingList), SrcOpMapping(SrcOpMappingList),
39
41 std::initializer_list<UniformityLLTOpPredicateID> OpList,
42 std::function<bool(const MachineInstr &)> TestFunc)
44
46 const MachineUniformityInfo &MUI,
47 const MachineRegisterInfo &MRI) {
48 switch (UniID) {
49 case S1:
50 return MRI.getType(Reg) == LLT::scalar(1);
51 case S16:
52 return MRI.getType(Reg) == LLT::scalar(16);
53 case S32:
54 return MRI.getType(Reg) == LLT::scalar(32);
55 case S64:
56 return MRI.getType(Reg) == LLT::scalar(64);
57 case S128:
58 return MRI.getType(Reg) == LLT::scalar(128);
59 case P0:
60 return MRI.getType(Reg) == LLT::pointer(0, 64);
61 case P1:
62 return MRI.getType(Reg) == LLT::pointer(1, 64);
63 case P3:
64 return MRI.getType(Reg) == LLT::pointer(3, 32);
65 case P4:
66 return MRI.getType(Reg) == LLT::pointer(4, 64);
67 case P5:
68 return MRI.getType(Reg) == LLT::pointer(5, 32);
69 case P8:
70 return MRI.getType(Reg) == LLT::pointer(8, 128);
71 case Ptr32:
72 return isAnyPtr(MRI.getType(Reg), 32);
73 case Ptr64:
74 return isAnyPtr(MRI.getType(Reg), 64);
75 case Ptr128:
76 return isAnyPtr(MRI.getType(Reg), 128);
77 case V2S32:
78 return MRI.getType(Reg) == LLT::fixed_vector(2, 32);
79 case V4S32:
80 return MRI.getType(Reg) == LLT::fixed_vector(4, 32);
81 case B32:
82 return MRI.getType(Reg).getSizeInBits() == 32;
83 case B64:
84 return MRI.getType(Reg).getSizeInBits() == 64;
85 case B96:
86 return MRI.getType(Reg).getSizeInBits() == 96;
87 case B128:
88 return MRI.getType(Reg).getSizeInBits() == 128;
89 case B256:
90 return MRI.getType(Reg).getSizeInBits() == 256;
91 case B512:
92 return MRI.getType(Reg).getSizeInBits() == 512;
93 case UniS1:
94 return MRI.getType(Reg) == LLT::scalar(1) && MUI.isUniform(Reg);
95 case UniS16:
96 return MRI.getType(Reg) == LLT::scalar(16) && MUI.isUniform(Reg);
97 case UniS32:
98 return MRI.getType(Reg) == LLT::scalar(32) && MUI.isUniform(Reg);
99 case UniS64:
100 return MRI.getType(Reg) == LLT::scalar(64) && MUI.isUniform(Reg);
101 case UniS128:
102 return MRI.getType(Reg) == LLT::scalar(128) && MUI.isUniform(Reg);
103 case UniP0:
104 return MRI.getType(Reg) == LLT::pointer(0, 64) && MUI.isUniform(Reg);
105 case UniP1:
106 return MRI.getType(Reg) == LLT::pointer(1, 64) && MUI.isUniform(Reg);
107 case UniP3:
108 return MRI.getType(Reg) == LLT::pointer(3, 32) && MUI.isUniform(Reg);
109 case UniP4:
110 return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isUniform(Reg);
111 case UniP5:
112 return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isUniform(Reg);
113 case UniP8:
114 return MRI.getType(Reg) == LLT::pointer(8, 128) && MUI.isUniform(Reg);
115 case UniPtr32:
116 return isAnyPtr(MRI.getType(Reg), 32) && MUI.isUniform(Reg);
117 case UniPtr64:
118 return isAnyPtr(MRI.getType(Reg), 64) && MUI.isUniform(Reg);
119 case UniPtr128:
120 return isAnyPtr(MRI.getType(Reg), 128) && MUI.isUniform(Reg);
121 case UniV2S16:
122 return MRI.getType(Reg) == LLT::fixed_vector(2, 16) && MUI.isUniform(Reg);
123 case UniV2S32:
124 return MRI.getType(Reg) == LLT::fixed_vector(2, 32) && MUI.isUniform(Reg);
125 case UniB32:
126 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isUniform(Reg);
127 case UniB64:
128 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isUniform(Reg);
129 case UniB96:
130 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isUniform(Reg);
131 case UniB128:
132 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isUniform(Reg);
133 case UniB256:
134 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isUniform(Reg);
135 case UniB512:
136 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isUniform(Reg);
137 case DivS1:
138 return MRI.getType(Reg) == LLT::scalar(1) && MUI.isDivergent(Reg);
139 case DivS16:
140 return MRI.getType(Reg) == LLT::scalar(16) && MUI.isDivergent(Reg);
141 case DivS32:
142 return MRI.getType(Reg) == LLT::scalar(32) && MUI.isDivergent(Reg);
143 case DivS64:
144 return MRI.getType(Reg) == LLT::scalar(64) && MUI.isDivergent(Reg);
145 case DivS128:
146 return MRI.getType(Reg) == LLT::scalar(128) && MUI.isDivergent(Reg);
147 case DivP0:
148 return MRI.getType(Reg) == LLT::pointer(0, 64) && MUI.isDivergent(Reg);
149 case DivP1:
150 return MRI.getType(Reg) == LLT::pointer(1, 64) && MUI.isDivergent(Reg);
151 case DivP3:
152 return MRI.getType(Reg) == LLT::pointer(3, 32) && MUI.isDivergent(Reg);
153 case DivP4:
154 return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isDivergent(Reg);
155 case DivP5:
156 return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isDivergent(Reg);
157 case DivPtr32:
158 return isAnyPtr(MRI.getType(Reg), 32) && MUI.isDivergent(Reg);
159 case DivPtr64:
160 return isAnyPtr(MRI.getType(Reg), 64) && MUI.isDivergent(Reg);
161 case DivPtr128:
162 return isAnyPtr(MRI.getType(Reg), 128) && MUI.isDivergent(Reg);
163 case DivV2S16:
164 return MRI.getType(Reg) == LLT::fixed_vector(2, 16) && MUI.isDivergent(Reg);
165 case DivV2S32:
166 return MRI.getType(Reg) == LLT::fixed_vector(2, 32) && MUI.isDivergent(Reg);
167 case DivB32:
168 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isDivergent(Reg);
169 case DivB64:
170 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isDivergent(Reg);
171 case DivB96:
172 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isDivergent(Reg);
173 case DivB128:
174 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isDivergent(Reg);
175 case DivB256:
176 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isDivergent(Reg);
177 case DivB512:
178 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isDivergent(Reg);
179 case _:
180 return true;
181 default:
182 llvm_unreachable("missing matchUniformityAndLLT");
183 }
184}
185
187 const MachineUniformityInfo &MUI,
188 const MachineRegisterInfo &MRI) const {
189 // Check LLT signature.
190 for (unsigned i = 0; i < OpUniformityAndTypes.size(); ++i) {
191 if (OpUniformityAndTypes[i] == _) {
192 if (MI.getOperand(i).isReg())
193 return false;
194 continue;
195 }
196
197 // Remaining IDs check registers.
198 if (!MI.getOperand(i).isReg())
199 return false;
200
201 if (!matchUniformityAndLLT(MI.getOperand(i).getReg(),
202 OpUniformityAndTypes[i], MUI, MRI))
203 return false;
204 }
205
206 // More complex check.
207 if (TestFunc)
208 return TestFunc(MI);
209
210 return true;
211}
212
214
216 : FastTypes(FastTypes) {}
217
219 if (Ty == LLT::scalar(16))
220 return S16;
221 if (Ty == LLT::scalar(32))
222 return S32;
223 if (Ty == LLT::scalar(64))
224 return S64;
225 if (Ty == LLT::fixed_vector(2, 16))
226 return V2S16;
227 if (Ty == LLT::fixed_vector(2, 32))
228 return V2S32;
229 if (Ty == LLT::fixed_vector(3, 32))
230 return V3S32;
231 if (Ty == LLT::fixed_vector(4, 32))
232 return V4S32;
233 return _;
234}
235
237 if (Ty == LLT::scalar(32) || Ty == LLT::fixed_vector(2, 16) ||
238 isAnyPtr(Ty, 32))
239 return B32;
240 if (Ty == LLT::scalar(64) || Ty == LLT::fixed_vector(2, 32) ||
241 Ty == LLT::fixed_vector(4, 16) || isAnyPtr(Ty, 64))
242 return B64;
243 if (Ty == LLT::fixed_vector(3, 32))
244 return B96;
245 if (Ty == LLT::fixed_vector(4, 32) || isAnyPtr(Ty, 128))
246 return B128;
247 return _;
248}
249
250const RegBankLLTMapping *
253 const MachineUniformityInfo &MUI) const {
254 // Search in "Fast Rules".
255 // Note: if fast rules are enabled, RegBankLLTMapping must be added in each
256 // slot that could "match fast Predicate". If not, InvalidMapping is
257 // returned which results in failure, does not search "Slow Rules".
258 if (FastTypes != NoFastRules) {
259 Register Reg = MI.getOperand(0).getReg();
260 int Slot;
261 if (FastTypes == StandardB)
262 Slot = getFastPredicateSlot(LLTToBId(MRI.getType(Reg)));
263 else
264 Slot = getFastPredicateSlot(LLTToId(MRI.getType(Reg)));
265
266 if (Slot != -1)
267 return MUI.isUniform(Reg) ? &Uni[Slot] : &Div[Slot];
268 }
269
270 // Slow search for more complex rules.
271 for (const RegBankLegalizeRule &Rule : Rules) {
272 if (Rule.Predicate.match(MI, MUI, MRI))
273 return &Rule.OperandMapping;
274 }
275
276 return nullptr;
277}
278
280 Rules.push_back(Rule);
281}
282
284 RegBankLLTMapping RuleApplyIDs) {
285 int Slot = getFastPredicateSlot(Ty);
286 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
287 Div[Slot] = RuleApplyIDs;
288}
289
291 RegBankLLTMapping RuleApplyIDs) {
292 int Slot = getFastPredicateSlot(Ty);
293 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
294 Uni[Slot] = RuleApplyIDs;
295}
296
297int SetOfRulesForOpcode::getFastPredicateSlot(
299 switch (FastTypes) {
300 case Standard: {
301 switch (Ty) {
302 case S32:
303 return 0;
304 case S16:
305 return 1;
306 case S64:
307 return 2;
308 case V2S16:
309 return 3;
310 default:
311 return -1;
312 }
313 }
314 case StandardB: {
315 switch (Ty) {
316 case B32:
317 return 0;
318 case B64:
319 return 1;
320 case B96:
321 return 2;
322 case B128:
323 return 3;
324 default:
325 return -1;
326 }
327 }
328 case Vector: {
329 switch (Ty) {
330 case S32:
331 return 0;
332 case V2S32:
333 return 1;
334 case V3S32:
335 return 2;
336 case V4S32:
337 return 3;
338 default:
339 return -1;
340 }
341 }
342 default:
343 return -1;
344 }
345}
346
347RegBankLegalizeRules::RuleSetInitializer
348RegBankLegalizeRules::addRulesForGOpcs(std::initializer_list<unsigned> OpcList,
349 FastRulesTypes FastTypes) {
350 return RuleSetInitializer(OpcList, GRulesAlias, GRules, FastTypes);
351}
352
353RegBankLegalizeRules::RuleSetInitializer
354RegBankLegalizeRules::addRulesForIOpcs(std::initializer_list<unsigned> OpcList,
355 FastRulesTypes FastTypes) {
356 return RuleSetInitializer(OpcList, IRulesAlias, IRules, FastTypes);
357}
358
361 unsigned Opc = MI.getOpcode();
362 if (Opc == AMDGPU::G_INTRINSIC || Opc == AMDGPU::G_INTRINSIC_CONVERGENT ||
363 Opc == AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS ||
364 Opc == AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS) {
365 unsigned IntrID = cast<GIntrinsic>(MI).getIntrinsicID();
366 auto IRAIt = IRulesAlias.find(IntrID);
367 if (IRAIt == IRulesAlias.end())
368 return nullptr;
369 return &IRules.at(IRAIt->second);
370 }
371
372 auto GRAIt = GRulesAlias.find(Opc);
373 if (GRAIt == GRulesAlias.end())
374 return nullptr;
375 return &GRules.at(GRAIt->second);
376}
377
378// Syntactic sugar wrapper for predicate lambda that enables '&&', '||' and '!'.
379class Predicate {
380private:
381 struct Elt {
382 // Save formula composed of Pred, '&&', '||' and '!' as a jump table.
383 // Sink ! to Pred. For example !((A && !B) || C) -> (!A || B) && !C
384 // Sequences of && and || will be represented by jumps, for example:
385 // (A && B && ... X) or (A && B && ... X) || Y
386 // A == true jump to B
387 // A == false jump to end or Y, result is A(false) or Y
388 // (A || B || ... X) or (A || B || ... X) && Y
389 // A == true jump to end or Y, result is A(true) or Y
390 // A == false jump to B
391 // Notice that when negating expression, we simply flip Neg on each Pred
392 // and swap TJumpOffset and FJumpOffset (&& becomes ||, || becomes &&).
393 std::function<bool(const MachineInstr &)> Pred;
394 bool Neg; // Neg of Pred is calculated before jump
395 unsigned TJumpOffset;
396 unsigned FJumpOffset;
397 };
398
399 SmallVector<Elt, 8> Expression;
400
401 Predicate(SmallVectorImpl<Elt> &&Expr) { Expression.swap(Expr); };
402
403public:
404 Predicate(std::function<bool(const MachineInstr &)> Pred) {
405 Expression.push_back({Pred, false, 1, 1});
406 };
407
408 bool operator()(const MachineInstr &MI) const {
409 unsigned Idx = 0;
410 unsigned ResultIdx = Expression.size();
411 bool Result;
412 do {
413 Result = Expression[Idx].Pred(MI);
414 Result = Expression[Idx].Neg ? !Result : Result;
415 if (Result) {
416 Idx += Expression[Idx].TJumpOffset;
417 } else {
418 Idx += Expression[Idx].FJumpOffset;
419 }
420 } while ((Idx != ResultIdx));
421
422 return Result;
423 };
424
425 Predicate operator!() const {
426 SmallVector<Elt, 8> NegExpression;
427 for (const Elt &ExprElt : Expression) {
428 NegExpression.push_back({ExprElt.Pred, !ExprElt.Neg, ExprElt.FJumpOffset,
429 ExprElt.TJumpOffset});
430 }
431 return Predicate(std::move(NegExpression));
432 };
433
434 Predicate operator&&(const Predicate &RHS) const {
435 SmallVector<Elt, 8> AndExpression = Expression;
436
437 unsigned RHSSize = RHS.Expression.size();
438 unsigned ResultIdx = Expression.size();
439 for (unsigned i = 0; i < ResultIdx; ++i) {
440 // LHS results in false, whole expression results in false.
441 if (i + AndExpression[i].FJumpOffset == ResultIdx)
442 AndExpression[i].FJumpOffset += RHSSize;
443 }
444
445 AndExpression.append(RHS.Expression);
446
447 return Predicate(std::move(AndExpression));
448 }
449
450 Predicate operator||(const Predicate &RHS) const {
451 SmallVector<Elt, 8> OrExpression = Expression;
452
453 unsigned RHSSize = RHS.Expression.size();
454 unsigned ResultIdx = Expression.size();
455 for (unsigned i = 0; i < ResultIdx; ++i) {
456 // LHS results in true, whole expression results in true.
457 if (i + OrExpression[i].TJumpOffset == ResultIdx)
458 OrExpression[i].TJumpOffset += RHSSize;
459 }
460
461 OrExpression.append(RHS.Expression);
462
463 return Predicate(std::move(OrExpression));
464 }
465};
466
467// Initialize rules
470 : ST(&_ST), MRI(&_MRI) {
471
472 addRulesForGOpcs({G_ADD, G_SUB}, Standard)
473 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}})
474 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
475 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
476 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
478 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
479 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr64}})
480 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}});
481
482 addRulesForGOpcs({G_UADDO, G_USUBO}, Standard)
483 .Uni(S32, {{Sgpr32, Sgpr32Trunc}, {Sgpr32, Sgpr32}})
484 .Div(S32, {{Vgpr32, Vcc}, {Vgpr32, Vgpr32}});
485
486 addRulesForGOpcs({G_UADDE, G_USUBE}, Standard)
488 .Div(S32, {{Vgpr32, Vcc}, {Vgpr32, Vgpr32, Vcc}});
489
490 addRulesForGOpcs({G_MUL}, Standard).Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}});
491
492 bool hasMulHi = ST->hasScalarMulHiInsts();
493 addRulesForGOpcs({G_UMULH, G_SMULH}, Standard)
494 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
495 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasMulHi)
496 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasMulHi);
497
498 addRulesForGOpcs({G_XOR, G_OR, G_AND}, StandardB)
500 .Any({{DivS1}, {{Vcc}, {Vcc, Vcc}}})
501 .Any({{UniS16}, {{Sgpr16}, {Sgpr16, Sgpr16}}})
502 .Any({{DivS16}, {{Vgpr16}, {Vgpr16, Vgpr16}}})
503 .Uni(B32, {{SgprB32}, {SgprB32, SgprB32}})
504 .Div(B32, {{VgprB32}, {VgprB32, VgprB32}})
505 .Uni(B64, {{SgprB64}, {SgprB64, SgprB64}})
506 .Div(B64, {{VgprB64}, {VgprB64, VgprB64}, SplitTo32});
507
508 addRulesForGOpcs({G_SHL}, Standard)
509 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32ZExt}})
510 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
512 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
513 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
514 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
515 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
516 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
517
518 addRulesForGOpcs({G_LSHR}, Standard)
519 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
520 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
522 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
523 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
524 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
525 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
526 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
527
528 addRulesForGOpcs({G_ASHR}, Standard)
529 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32ZExt}})
530 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
532 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
533 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
534 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
535 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
536 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
537
538 addRulesForGOpcs({G_FSHR}, Standard)
539 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
540 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
541
542 addRulesForGOpcs({G_FRAME_INDEX}).Any({{UniP5, _}, {{SgprP5}, {None}}});
543
544 addRulesForGOpcs({G_UBFX, G_SBFX}, Standard)
545 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}, S_BFE})
546 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
547 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32, Sgpr32}, S_BFE})
548 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32, Vgpr32}, V_BFE});
549
550 addRulesForGOpcs({G_SMIN, G_SMAX}, Standard)
551 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32SExt}})
552 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
553 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
554 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
556 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
557
558 addRulesForGOpcs({G_UMIN, G_UMAX}, Standard)
559 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
560 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
561 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
562 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
564 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
565
566 // Note: we only write S1 rules for G_IMPLICIT_DEF, G_CONSTANT, G_FCONSTANT
567 // and G_FREEZE here, rest is trivially regbankselected earlier
568 addRulesForGOpcs({G_IMPLICIT_DEF}).Any({{UniS1}, {{Sgpr32Trunc}, {}}});
569 addRulesForGOpcs({G_CONSTANT})
570 .Any({{UniS1, _}, {{Sgpr32Trunc}, {None}, UniCstExt}});
571 addRulesForGOpcs({G_FREEZE}).Any({{DivS1}, {{Vcc}, {Vcc}}});
572
573 addRulesForGOpcs({G_ICMP})
574 .Any({{UniS1, _, S32}, {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}})
575 .Any({{DivS1, _, S32}, {{Vcc}, {None, Vgpr32, Vgpr32}}})
576 .Any({{DivS1, _, S64}, {{Vcc}, {None, Vgpr64, Vgpr64}}});
577
578 addRulesForGOpcs({G_FCMP})
579 .Any({{UniS1, _, S32}, {{UniInVcc}, {None, Vgpr32, Vgpr32}}})
580 .Any({{DivS1, _, S32}, {{Vcc}, {None, Vgpr32, Vgpr32}}});
581
582 addRulesForGOpcs({G_BRCOND})
583 .Any({{UniS1}, {{}, {Sgpr32AExtBoolInReg}}})
584 .Any({{DivS1}, {{}, {Vcc}}});
585
586 addRulesForGOpcs({G_BR}).Any({{_}, {{}, {None}}});
587
588 addRulesForGOpcs({G_SELECT}, StandardB)
589 .Any({{DivS16}, {{Vgpr16}, {Vcc, Vgpr16, Vgpr16}}})
591 .Div(B32, {{VgprB32}, {Vcc, VgprB32, VgprB32}})
595
596 addRulesForGOpcs({G_ANYEXT})
597 .Any({{UniS16, S1}, {{None}, {None}}}) // should be combined away
598 .Any({{UniS32, S1}, {{None}, {None}}}) // should be combined away
599 .Any({{UniS64, S1}, {{None}, {None}}}) // should be combined away
600 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
601 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
602 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
603 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
604 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
605 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
606 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
607
608 // In global-isel G_TRUNC in-reg is treated as no-op, inst selected into COPY.
609 // It is up to user to deal with truncated bits.
610 addRulesForGOpcs({G_TRUNC})
611 .Any({{UniS1, UniS16}, {{None}, {None}}}) // should be combined away
612 .Any({{UniS1, UniS32}, {{None}, {None}}}) // should be combined away
613 .Any({{UniS1, UniS64}, {{None}, {None}}}) // should be combined away
614 .Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}})
615 .Any({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}})
616 .Any({{UniS32, S64}, {{Sgpr32}, {Sgpr64}}})
617 .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}})
618 .Any({{UniV2S16, V2S32}, {{SgprV2S16}, {SgprV2S32}}})
619 .Any({{DivV2S16, V2S32}, {{VgprV2S16}, {VgprV2S32}}})
620 // This is non-trivial. VgprToVccCopy is done using compare instruction.
621 .Any({{DivS1, DivS16}, {{Vcc}, {Vgpr16}, VgprToVccCopy}})
622 .Any({{DivS1, DivS32}, {{Vcc}, {Vgpr32}, VgprToVccCopy}})
623 .Any({{DivS1, DivS64}, {{Vcc}, {Vgpr64}, VgprToVccCopy}});
624
625 addRulesForGOpcs({G_ZEXT})
629 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
630 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
631 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
632 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
633 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
634 // not extending S16 to S32 is questionable.
635 .Any({{UniS64, S16}, {{Sgpr64}, {Sgpr32ZExt}, Ext32To64}})
636 .Any({{DivS64, S16}, {{Vgpr64}, {Vgpr32ZExt}, Ext32To64}})
637 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
638 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
639
640 addRulesForGOpcs({G_SEXT})
644 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
645 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
646 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
647 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
648 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
649 // not extending S16 to S32 is questionable.
650 .Any({{UniS64, S16}, {{Sgpr64}, {Sgpr32SExt}, Ext32To64}})
651 .Any({{DivS64, S16}, {{Vgpr64}, {Vgpr32SExt}, Ext32To64}})
652 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
653 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
654
655 addRulesForGOpcs({G_SEXT_INREG})
656 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}})
657 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
658 .Any({{UniS64, S64}, {{Sgpr64}, {Sgpr64}}})
660
661 addRulesForGOpcs({G_ASSERT_ZEXT, G_ASSERT_SEXT}, Standard)
662 .Uni(S32, {{Sgpr32}, {Sgpr32, Imm}})
663 .Div(S32, {{Vgpr32}, {Vgpr32, Imm}})
664 .Uni(S64, {{Sgpr64}, {Sgpr64, Imm}})
665 .Div(S64, {{Vgpr64}, {Vgpr64, Imm}});
666
667 bool hasSMRDx3 = ST->hasScalarDwordx3Loads();
668 bool hasSMRDSmall = ST->hasScalarSubwordLoads();
669 bool usesTrue16 = ST->useRealTrue16Insts();
670
671 Predicate isAlign16([](const MachineInstr &MI) -> bool {
672 return (*MI.memoperands_begin())->getAlign() >= Align(16);
673 });
674
675 Predicate isAlign4([](const MachineInstr &MI) -> bool {
676 return (*MI.memoperands_begin())->getAlign() >= Align(4);
677 });
678
679 Predicate isAtomicMMO([](const MachineInstr &MI) -> bool {
680 return (*MI.memoperands_begin())->isAtomic();
681 });
682
683 Predicate isUniMMO([](const MachineInstr &MI) -> bool {
684 return AMDGPU::isUniformMMO(*MI.memoperands_begin());
685 });
686
687 Predicate isConst([](const MachineInstr &MI) -> bool {
688 // Address space in MMO be different then address space on pointer.
689 const MachineMemOperand *MMO = *MI.memoperands_begin();
690 const unsigned AS = MMO->getAddrSpace();
691 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
693 });
694
695 Predicate isVolatileMMO([](const MachineInstr &MI) -> bool {
696 return (*MI.memoperands_begin())->isVolatile();
697 });
698
699 Predicate isInvMMO([](const MachineInstr &MI) -> bool {
700 return (*MI.memoperands_begin())->isInvariant();
701 });
702
703 Predicate isNoClobberMMO([](const MachineInstr &MI) -> bool {
704 return (*MI.memoperands_begin())->getFlags() & MONoClobber;
705 });
706
707 Predicate isNaturalAligned([](const MachineInstr &MI) -> bool {
708 const MachineMemOperand *MMO = *MI.memoperands_begin();
709 return MMO->getAlign() >= Align(MMO->getSize().getValue());
710 });
711
712 Predicate is8Or16BitMMO([](const MachineInstr &MI) -> bool {
713 const MachineMemOperand *MMO = *MI.memoperands_begin();
714 const unsigned MemSize = 8 * MMO->getSize().getValue();
715 return MemSize == 16 || MemSize == 8;
716 });
717
718 Predicate is32BitMMO([](const MachineInstr &MI) -> bool {
719 const MachineMemOperand *MMO = *MI.memoperands_begin();
720 return 8 * MMO->getSize().getValue() == 32;
721 });
722
723 auto isUL = !isAtomicMMO && isUniMMO && (isConst || !isVolatileMMO) &&
724 (isConst || isInvMMO || isNoClobberMMO);
725
726 // clang-format off
727 // TODO: S32Dst, 16-bit any-extending load should not appear on True16 targets
728 addRulesForGOpcs({G_LOAD})
729 // flat, addrspace(0), never uniform - flat_load
730 .Any({{DivS16, P0}, {{Vgpr16}, {VgprP0}}}, usesTrue16)
731 .Any({{DivB32, P0}, {{VgprB32}, {VgprP0}}}) // 32-bit load, 8-bit and 16-bit any-extending load
732 .Any({{DivB64, P0}, {{VgprB64}, {VgprP0}}})
733 .Any({{DivB96, P0}, {{VgprB96}, {VgprP0}}})
734 .Any({{DivB128, P0}, {{VgprB128}, {VgprP0}}})
735
736 // global, addrspace(1)
737 // divergent - global_load
738 .Any({{DivS16, P1}, {{Vgpr16}, {VgprP1}}}, usesTrue16)
739 .Any({{DivB32, P1}, {{VgprB32}, {VgprP1}}}) //32-bit load, 8-bit and 16-bit any-extending load
740 .Any({{DivB64, P1}, {{VgprB64}, {VgprP1}}})
741 .Any({{DivB96, P1}, {{VgprB96}, {VgprP1}}})
742 .Any({{DivB128, P1}, {{VgprB128}, {VgprP1}}})
743 .Any({{DivB256, P1}, {{VgprB256}, {VgprP1}, SplitLoad}})
744 .Any({{DivB512, P1}, {{VgprB512}, {VgprP1}, SplitLoad}})
745
746 // uniform - s_load
747 .Any({{{UniS16, P1}, isNaturalAligned && isUL}, {{Sgpr32Trunc}, {SgprP1}}}, usesTrue16 && hasSMRDSmall) // s16 load
748 .Any({{{UniS16, P1}, isAlign4 && isUL}, {{Sgpr32Trunc}, {SgprP1}, WidenMMOToS32}}, usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
749 .Any({{{UniB32, P1}, isNaturalAligned && isUL}, {{SgprB32}, {SgprP1}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
750 // TODO: SplitLoad when !isNaturalAligned && isUL and target hasSMRDSmall
751 .Any({{{UniB32, P1}, is8Or16BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP1}, WidenMMOToS32}}, !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
752 .Any({{{UniB32, P1}, is32BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP1}}}) //32-bit load
753 .Any({{{UniB64, P1}, isAlign4 && isUL}, {{SgprB64}, {SgprP1}}})
754 .Any({{{UniB96, P1}, isAlign16 && isUL}, {{SgprB96}, {SgprP1}, WidenLoad}}, !hasSMRDx3)
755 .Any({{{UniB96, P1}, isAlign4 && !isAlign16 && isUL}, {{SgprB96}, {SgprP1}, SplitLoad}}, !hasSMRDx3)
756 .Any({{{UniB96, P1}, isAlign4 && isUL}, {{SgprB96}, {SgprP1}}}, hasSMRDx3)
757 .Any({{{UniB128, P1}, isAlign4 && isUL}, {{SgprB128}, {SgprP1}}})
758 .Any({{{UniB256, P1}, isAlign4 && isUL}, {{SgprB256}, {SgprP1}}})
759 .Any({{{UniB512, P1}, isAlign4 && isUL}, {{SgprB512}, {SgprP1}}})
760
761 // Uniform via global or buffer load, for example volatile or non-aligned
762 // uniform load. Not using standard {{UniInVgprTy}, {VgprP1}} since it is
763 // selected as global_load, use SgprP1 for pointer instead to match
764 // patterns without flat-for-global, default for GFX7 and older.
765 // -> +flat-for-global + {{UniInVgprTy}, {SgprP1}} - global_load
766 // -> -flat-for-global + {{UniInVgprTy}, {SgprP1}} - buffer_load
767 .Any({{{UniS16, P1}, !isNaturalAligned || !isUL}, {{UniInVgprS16}, {SgprP1}}}, usesTrue16 && hasSMRDSmall) // s16 load
768 .Any({{{UniS16, P1}, !isAlign4 || !isUL}, {{UniInVgprS16}, {SgprP1}}}, usesTrue16 && !hasSMRDSmall) // s16 load
769 .Any({{{UniB32, P1}, !isNaturalAligned || !isUL}, {{UniInVgprB32}, {SgprP1}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
770 .Any({{{UniB32, P1}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP1}}}, !hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
771 .Any({{{UniB64, P1}, !isAlign4 || !isUL}, {{UniInVgprB64}, {SgprP1}}})
772 .Any({{{UniB96, P1}, !isAlign4 || !isUL}, {{UniInVgprB96}, {SgprP1}}})
773 .Any({{{UniB128, P1}, !isAlign4 || !isUL}, {{UniInVgprB128}, {SgprP1}}})
774 .Any({{{UniB256, P1}, !isAlign4 || !isUL}, {{UniInVgprB256}, {SgprP1}, SplitLoad}})
775 .Any({{{UniB512, P1}, !isAlign4 || !isUL}, {{UniInVgprB512}, {SgprP1}, SplitLoad}})
776
777 // local, addrspace(3) - ds_load
778 .Any({{DivS16, P3}, {{Vgpr16}, {VgprP3}}}, usesTrue16)
779 .Any({{DivB32, P3}, {{VgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
780 .Any({{DivB64, P3}, {{VgprB64}, {VgprP3}}})
781 .Any({{DivB96, P3}, {{VgprB96}, {VgprP3}}})
782 .Any({{DivB128, P3}, {{VgprB128}, {VgprP3}}})
783
784 .Any({{UniS16, P3}, {{UniInVgprS16}, {SgprP3}}}, usesTrue16) // 16-bit load
785 .Any({{UniB32, P3}, {{UniInVgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
786 .Any({{UniB64, P3}, {{UniInVgprB64}, {VgprP3}}})
787 .Any({{UniB96, P3}, {{UniInVgprB96}, {VgprP3}}})
788 .Any({{UniB128, P3}, {{UniInVgprB128}, {VgprP3}}})
789
790 // constant, addrspace(4)
791 // divergent - global_load
792 .Any({{DivS16, P4}, {{Vgpr16}, {VgprP4}}}, usesTrue16)
793 .Any({{DivB32, P4}, {{VgprB32}, {VgprP4}}}) //32-bit load, 8-bit and 16-bit any-extending load
794 .Any({{DivB64, P4}, {{VgprB64}, {VgprP4}}})
795 .Any({{DivB96, P4}, {{VgprB96}, {VgprP4}}})
796 .Any({{DivB128, P4}, {{VgprB128}, {VgprP4}}})
797 .Any({{DivB256, P4}, {{VgprB256}, {VgprP4}, SplitLoad}})
798 .Any({{DivB512, P4}, {{VgprB512}, {VgprP4}, SplitLoad}})
799
800 // uniform - s_load
801 .Any({{{UniS16, P4}, isNaturalAligned && isUL}, {{Sgpr32Trunc}, {SgprP4}}}, usesTrue16 && hasSMRDSmall) // s16 load
802 .Any({{{UniS16, P4}, isAlign4 && isUL}, {{Sgpr32Trunc}, {SgprP4}, WidenMMOToS32}}, usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
803 .Any({{{UniB32, P4}, isNaturalAligned && isUL}, {{SgprB32}, {SgprP4}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
804 .Any({{{UniB32, P4}, is8Or16BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP4}, WidenMMOToS32}}, !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
805 .Any({{{UniB32, P4}, is32BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP4}}}) //32-bit load
806 .Any({{{UniB64, P4}, isAlign4 && isUL}, {{SgprB64}, {SgprP4}}})
807 .Any({{{UniB96, P4}, isAlign16 && isUL}, {{SgprB96}, {SgprP4}, WidenLoad}}, !hasSMRDx3)
808 .Any({{{UniB96, P4}, isAlign4 && !isAlign16 && isUL}, {{SgprB96}, {SgprP4}, SplitLoad}}, !hasSMRDx3)
809 .Any({{{UniB96, P4}, isAlign4 && isUL}, {{SgprB96}, {SgprP4}}}, hasSMRDx3)
810 .Any({{{UniB128, P4}, isAlign4 && isUL}, {{SgprB128}, {SgprP4}}})
811 .Any({{{UniB256, P4}, isAlign4 && isUL}, {{SgprB256}, {SgprP4}}})
812 .Any({{{UniB512, P4}, isAlign4 && isUL}, {{SgprB512}, {SgprP4}}})
813
814 // uniform in vgpr - global_load or buffer_load
815 .Any({{{UniS16, P4}, !isNaturalAligned || !isUL}, {{UniInVgprS16}, {SgprP4}}}, usesTrue16 && hasSMRDSmall) // s16 load
816 .Any({{{UniS16, P4}, !isAlign4 || !isUL}, {{UniInVgprS16}, {SgprP4}}}, usesTrue16 && !hasSMRDSmall) // s16 load
817 .Any({{{UniB32, P4}, !isNaturalAligned || !isUL}, {{UniInVgprB32}, {SgprP4}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
818 .Any({{{UniB32, P4}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP4}}}, !hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
819 .Any({{{UniB64, P4}, !isAlign4 || !isUL}, {{UniInVgprB64}, {SgprP4}}})
820 .Any({{{UniB96, P4}, !isAlign4 || !isUL}, {{UniInVgprB96}, {SgprP4}}})
821 .Any({{{UniB128, P4}, !isAlign4 || !isUL}, {{UniInVgprB128}, {SgprP4}}})
822 .Any({{{UniB256, P4}, !isAlign4 || !isUL}, {{UniInVgprB256}, {SgprP4}, SplitLoad}})
823 .Any({{{UniB512, P4}, !isAlign4 || !isUL}, {{UniInVgprB512}, {SgprP4}, SplitLoad}})
824
825 // private, addrspace(5), never uniform - scratch_load
826 .Any({{DivS16, P5}, {{Vgpr16}, {VgprP5}}}, usesTrue16)
827 .Any({{DivB32, P5}, {{VgprB32}, {VgprP5}}}) // 32-bit load, 8-bit and 16-bit any-extending load
828 .Any({{DivB64, P5}, {{VgprB64}, {VgprP5}}})
829 .Any({{DivB96, P5}, {{VgprB96}, {VgprP5}}})
830 .Any({{DivB128, P5}, {{VgprB128}, {VgprP5}}})
831
832 .Any({{DivS32, Ptr128}, {{Vgpr32}, {VgprPtr128}}});
833
834
835 addRulesForGOpcs({G_ZEXTLOAD, G_SEXTLOAD}) // i8 and i16 zeroextending loads
836 .Any({{DivS32, P0}, {{Vgpr32}, {VgprP0}}})
837
838 .Any({{DivS32, P1}, {{Vgpr32}, {VgprP1}}})
839 .Any({{{UniS32, P1}, isAlign4 && isUL}, {{Sgpr32}, {SgprP1}, WidenMMOToS32}}, !hasSMRDSmall)
840 .Any({{{UniS32, P1}, isNaturalAligned && isUL}, {{Sgpr32}, {SgprP1}}}, hasSMRDSmall)
841 .Any({{{UniS32, P1}, !isAlign4 || !isUL}, {{UniInVgprS32}, {SgprP1}}}, !hasSMRDSmall)
842 .Any({{{UniS32, P1}, !isNaturalAligned || !isUL}, {{UniInVgprS32}, {SgprP1}}}, hasSMRDSmall)
843
844 .Any({{DivS32, P3}, {{Vgpr32}, {VgprP3}}})
845 .Any({{UniS32, P3}, {{UniInVgprS32}, {VgprP3}}})
846
847 .Any({{DivS32, P4}, {{Vgpr32}, {VgprP4}}})
848 .Any({{{UniS32, P4}, isAlign4 && isUL}, {{Sgpr32}, {SgprP4}, WidenMMOToS32}}, !hasSMRDSmall)
849 .Any({{{UniS32, P4}, isNaturalAligned && isUL}, {{Sgpr32}, {SgprP4}}}, hasSMRDSmall)
850 .Any({{{UniS32, P4}, !isAlign4 || !isUL}, {{UniInVgprS32}, {SgprP4}}}, !hasSMRDSmall)
851 .Any({{{UniS32, P4}, !isNaturalAligned || !isUL}, {{UniInVgprS32}, {SgprP4}}}, hasSMRDSmall)
852
853 .Any({{DivS32, P5}, {{Vgpr32}, {VgprP5}}});
854
855 addRulesForGOpcs({G_STORE})
856 // addrspace(0)
857 .Any({{S16, P0}, {{}, {Vgpr16, VgprP0}}}, usesTrue16) // 16-bit store
858 .Any({{B32, P0}, {{}, {VgprB32, VgprP0}}}) // 32-bit store, 8-bit and 16-bit truncating store
859 .Any({{B64, P0}, {{}, {VgprB64, VgprP0}}})
860 .Any({{B96, P0}, {{}, {VgprB96, VgprP0}}})
861 .Any({{B128, P0}, {{}, {VgprB128, VgprP0}}})
862
863 // addrspace(1), there are no stores to addrspace(4)
864 // For targets:
865 // - with "+flat-for-global" - global_store
866 // - without(-flat-for-global) - buffer_store addr64
867 .Any({{S16, DivP1}, {{}, {Vgpr16, VgprP1}}}, usesTrue16) // 16-bit store
868 .Any({{B32, DivP1}, {{}, {VgprB32, VgprP1}}}) // 32-bit store, 8-bit and 16-bit truncating store
869 .Any({{B64, DivP1}, {{}, {VgprB64, VgprP1}}})
870 .Any({{B96, DivP1}, {{}, {VgprB96, VgprP1}}})
871 .Any({{B128, DivP1}, {{}, {VgprB128, VgprP1}}})
872
873 // For UniP1, use sgpr ptr to match flat-for-global patterns. Targets:
874 // - with "+flat-for-global" - global_store for both sgpr and vgpr ptr
875 // - without(-flat-for-global) - need sgpr ptr to select buffer_store
876 .Any({{S16, UniP1}, {{}, {Vgpr16, SgprP1}}}, usesTrue16) // 16-bit store
877 .Any({{B32, UniP1}, {{}, {VgprB32, SgprP1}}}) // 32-bit store, 8-bit and 16-bit truncating store
878 .Any({{B64, UniP1}, {{}, {VgprB64, SgprP1}}})
879 .Any({{B96, UniP1}, {{}, {VgprB96, SgprP1}}})
880 .Any({{B128, UniP1}, {{}, {VgprB128, SgprP1}}})
881
882 // addrspace(3) and addrspace(5)
883 .Any({{S16, Ptr32}, {{}, {Vgpr16, VgprPtr32}}}, usesTrue16) // 16-bit store
884 .Any({{B32, Ptr32}, {{}, {VgprB32, VgprPtr32}}}) // 32-bit store, 8-bit and 16-bit truncating store
885 .Any({{B64, Ptr32}, {{}, {VgprB64, VgprPtr32}}})
886 .Any({{B96, Ptr32}, {{}, {VgprB96, VgprPtr32}}})
887 .Any({{B128, Ptr32}, {{}, {VgprB128, VgprPtr32}}});
888 // clang-format on
889
890 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD, G_AMDGPU_BUFFER_LOAD_FORMAT,
891 G_AMDGPU_TBUFFER_LOAD_FORMAT},
892 StandardB)
901
902 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD_USHORT, G_AMDGPU_BUFFER_LOAD_UBYTE,
903 G_AMDGPU_BUFFER_LOAD_SSHORT, G_AMDGPU_BUFFER_LOAD_SBYTE},
904 StandardB)
907
908 addRulesForGOpcs({G_AMDGPU_BUFFER_STORE})
909 .Any({{S32}, {{}, {Vgpr32, SgprV4S32, Vgpr32, Vgpr32, Sgpr32}}});
910
911 addRulesForGOpcs({G_PTR_ADD})
912 .Any({{UniPtr32}, {{SgprPtr32}, {SgprPtr32, Sgpr32}}})
913 .Any({{DivPtr32}, {{VgprPtr32}, {VgprPtr32, Vgpr32}}})
914 .Any({{UniPtr64}, {{SgprPtr64}, {SgprPtr64, Sgpr64}}})
915 .Any({{DivPtr64}, {{VgprPtr64}, {VgprPtr64, Vgpr64}}});
916
917 addRulesForGOpcs({G_INTTOPTR})
918 .Any({{UniPtr32}, {{SgprPtr32}, {Sgpr32}}})
919 .Any({{DivPtr32}, {{VgprPtr32}, {Vgpr32}}})
920 .Any({{UniPtr64}, {{SgprPtr64}, {Sgpr64}}})
921 .Any({{DivPtr64}, {{VgprPtr64}, {Vgpr64}}})
922 .Any({{UniPtr128}, {{SgprPtr128}, {Sgpr128}}})
923 .Any({{DivPtr128}, {{VgprPtr128}, {Vgpr128}}});
924
925 addRulesForGOpcs({G_PTRTOINT})
926 .Any({{UniS32}, {{Sgpr32}, {SgprPtr32}}})
927 .Any({{DivS32}, {{Vgpr32}, {VgprPtr32}}})
928 .Any({{UniS64}, {{Sgpr64}, {SgprPtr64}}})
929 .Any({{DivS64}, {{Vgpr64}, {VgprPtr64}}})
930 .Any({{UniS128}, {{Sgpr128}, {SgprPtr128}}})
931 .Any({{DivS128}, {{Vgpr128}, {VgprPtr128}}});
932
933 addRulesForGOpcs({G_ABS}, Standard).Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt}});
934
935 addRulesForGOpcs({G_BITREVERSE}, Standard)
936 .Uni(S32, {{Sgpr32}, {Sgpr32}})
937 .Div(S32, {{Vgpr32}, {Vgpr32}})
938 .Uni(S64, {{Sgpr64}, {Sgpr64}})
939 .Div(S64, {{Vgpr64}, {Vgpr64}});
940
941 addRulesForGOpcs({G_FENCE}).Any({{{}}, {{}, {}}});
942
943 addRulesForGOpcs({G_READSTEADYCOUNTER, G_READCYCLECOUNTER}, Standard)
944 .Uni(S64, {{Sgpr64}, {}});
945
946 addRulesForGOpcs({G_BLOCK_ADDR}).Any({{UniP0}, {{SgprP0}, {}}});
947
948 addRulesForGOpcs({G_GLOBAL_VALUE})
949 .Any({{UniP0}, {{SgprP0}, {}}})
950 .Any({{UniP1}, {{SgprP1}, {}}})
951 .Any({{UniP3}, {{SgprP3}, {}}})
952 .Any({{UniP4}, {{SgprP4}, {}}})
953 .Any({{UniP8}, {{SgprP8}, {}}});
954
955 addRulesForGOpcs({G_AMDGPU_WAVE_ADDRESS}).Any({{UniP5}, {{SgprP5}, {}}});
956
957 bool hasSALUFloat = ST->hasSALUFloatInsts();
958
959 addRulesForGOpcs({G_FADD, G_FMUL, G_STRICT_FADD, G_STRICT_FMUL}, Standard)
960 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUFloat)
961 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUFloat)
962 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
963 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUFloat)
964 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUFloat)
965 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
966 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64}})
967 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}})
968 .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}}, !hasSALUFloat)
970 hasSALUFloat)
971 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
972
973 addRulesForGOpcs({G_FSUB, G_STRICT_FSUB}, Standard)
974 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
975 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
976 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUFloat)
977 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUFloat)
978 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUFloat)
979 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUFloat);
980
981 addRulesForGOpcs({G_FMAD}, Standard)
982 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}})
983 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
984 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
985 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
986
987 addRulesForGOpcs({G_FMA}, Standard)
988 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
989 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
990 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64, Vgpr64}})
991 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64, Vgpr64}})
995 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16, Sgpr16}}, hasSALUFloat)
996 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}}, !hasSALUFloat)
997 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}}, hasSALUFloat)
998 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}}, !hasSALUFloat)
999 .Uni(V2S16,
1001 hasSALUFloat)
1003 !hasSALUFloat);
1004
1005 addRulesForGOpcs({G_AMDGPU_FMED3}, Standard)
1006 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}})
1007 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
1008 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
1009 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
1010
1011 // FNEG and FABS are either folded as source modifiers or can be selected as
1012 // bitwise XOR and AND with Mask. XOR and AND are available on SALU but for
1013 // targets without SALU float we still select them as VGPR since there would
1014 // be no real sgpr use.
1015 addRulesForGOpcs({G_FNEG, G_FABS}, Standard)
1016 .Uni(S16, {{UniInVgprS16}, {Vgpr16}}, !hasSALUFloat)
1017 .Uni(S16, {{Sgpr16}, {Sgpr16}}, hasSALUFloat)
1018 .Div(S16, {{Vgpr16}, {Vgpr16}})
1019 .Uni(S32, {{UniInVgprS32}, {Vgpr32}}, !hasSALUFloat)
1020 .Uni(S32, {{Sgpr32}, {Sgpr32}}, hasSALUFloat)
1021 .Div(S32, {{Vgpr32}, {Vgpr32}})
1022 .Uni(S64, {{UniInVgprS64}, {Vgpr64}})
1023 .Div(S64, {{Vgpr64}, {Vgpr64}})
1024 .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16}}, !hasSALUFloat)
1025 .Uni(V2S16, {{SgprV2S16}, {SgprV2S16}, ScalarizeToS16}, hasSALUFloat)
1026 .Div(V2S16, {{VgprV2S16}, {VgprV2S16}})
1027 .Any({{UniV2S32}, {{UniInVgprV2S32}, {VgprV2S32}}})
1028 .Any({{DivV2S32}, {{VgprV2S32}, {VgprV2S32}}});
1029
1030 addRulesForGOpcs({G_FPTOUI})
1031 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
1032 .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat);
1033
1034 addRulesForGOpcs({G_UITOFP})
1035 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
1036 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
1037 .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat);
1038
1039 addRulesForGOpcs({G_FPEXT})
1040 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}})
1041 .Any({{UniS64, S32}, {{UniInVgprS64}, {Vgpr32}}})
1042 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}}})
1043 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}}, hasSALUFloat)
1044 .Any({{UniS32, S16}, {{UniInVgprS32}, {Vgpr16}}}, !hasSALUFloat);
1045
1046 addRulesForGOpcs({G_FPTRUNC})
1047 .Any({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}})
1048 .Any({{UniS32, S64}, {{UniInVgprS32}, {Vgpr64}}})
1049 .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}})
1051 .Any({{DivV2S16, V2S32}, {{VgprV2S16}, {VgprV2S32}}})
1052 .Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}}, hasSALUFloat)
1053 .Any({{UniS16, S32}, {{UniInVgprS16}, {Vgpr32}}}, !hasSALUFloat);
1054
1055 addRulesForGOpcs({G_IS_FPCLASS})
1056 .Any({{DivS1, S16}, {{Vcc}, {Vgpr16}}})
1057 .Any({{UniS1, S16}, {{UniInVcc}, {Vgpr16}}})
1058 .Any({{DivS1, S32}, {{Vcc}, {Vgpr32}}})
1059 .Any({{UniS1, S32}, {{UniInVcc}, {Vgpr32}}})
1060 .Any({{DivS1, S64}, {{Vcc}, {Vgpr64}}})
1061 .Any({{UniS1, S64}, {{UniInVcc}, {Vgpr64}}});
1062
1063 using namespace Intrinsic;
1064
1065 addRulesForIOpcs({amdgcn_s_getpc}).Any({{UniS64, _}, {{Sgpr64}, {None}}});
1066
1067 // This is "intrinsic lane mask" it was set to i32/i64 in llvm-ir.
1068 addRulesForIOpcs({amdgcn_end_cf}).Any({{_, S32}, {{}, {None, Sgpr32}}});
1069
1070 addRulesForIOpcs({amdgcn_if_break}, Standard)
1071 .Uni(S32, {{Sgpr32}, {IntrId, Vcc, Sgpr32}});
1072
1073 addRulesForIOpcs({amdgcn_mbcnt_lo, amdgcn_mbcnt_hi}, Standard)
1074 .Div(S32, {{}, {Vgpr32, None, Vgpr32, Vgpr32}});
1075
1076 addRulesForIOpcs({amdgcn_readfirstlane})
1077 .Any({{UniS32, _, DivS32}, {{}, {Sgpr32, None, Vgpr32}}})
1078 // this should not exist in the first place, it is from call lowering
1079 // readfirstlaning just in case register is not in sgpr.
1080 .Any({{UniS32, _, UniS32}, {{}, {Sgpr32, None, Vgpr32}}});
1081
1082 addRulesForIOpcs({amdgcn_mul_u24, amdgcn_mul_i24}, Standard)
1083 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32}})
1084 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32}})
1085 .Uni(S64, {{UniInVgprS64}, {IntrId, Vgpr32, Vgpr32}})
1086 .Div(S64, {{Vgpr64}, {IntrId, Vgpr32, Vgpr32}});
1087
1088 addRulesForIOpcs({amdgcn_mulhi_u24, amdgcn_mulhi_i24, amdgcn_fmul_legacy},
1089 Standard)
1090 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32}})
1091 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32}});
1092
1093} // end initialize rules
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
constexpr LLT S16
constexpr LLT S1
constexpr LLT V2S16
constexpr LLT S32
constexpr LLT V4S32
constexpr LLT V3S32
constexpr LLT S64
constexpr LLT V2S32
constexpr LLT S128
UniformityLLTOpPredicateID LLTToBId(LLT Ty)
bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID, const MachineUniformityInfo &MUI, const MachineRegisterInfo &MRI)
UniformityLLTOpPredicateID LLTToId(LLT Ty)
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
Register Reg
Machine IR instance of the generic uniformity analysis.
bool operator()(const MachineInstr &MI) const
Predicate operator||(const Predicate &RHS) const
Predicate operator&&(const Predicate &RHS) const
Predicate(std::function< bool(const MachineInstr &)> Pred)
Predicate operator!() const
RegBankLegalizeRules(const GCNSubtarget &ST, MachineRegisterInfo &MRI)
const SetOfRulesForOpcode * getRulesForOpc(MachineInstr &MI) const
const RegBankLLTMapping * findMappingForMI(const MachineInstr &MI, const MachineRegisterInfo &MRI, const MachineUniformityInfo &MUI) const
void addFastRuleDivergent(UniformityLLTOpPredicateID Ty, RegBankLLTMapping RuleApplyIDs)
void addFastRuleUniform(UniformityLLTOpPredicateID Ty, RegBankLLTMapping RuleApplyIDs)
bool isDivergent(ConstValueRefT V) const
Whether V is divergent at its definition.
bool isUniform(ConstValueRefT V) const
Whether V is uniform/non-divergent.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
TypeSize getValue() const
Representation of each machine instruction.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
unsigned getAddrSpace() const
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void swap(SmallVectorImpl &RHS)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
bool isAnyPtr(LLT Ty, unsigned Width)
bool isUniformMMO(const MachineMemOperand *MMO)
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
GenericUniformityInfo< MachineSSAContext > MachineUniformityInfo
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
Definition SIInstrInfo.h:44
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
SmallVector< UniformityLLTOpPredicateID, 4 > OpUniformityAndTypes
PredicateMapping(std::initializer_list< UniformityLLTOpPredicateID > OpList, std::function< bool(const MachineInstr &)> TestFunc=nullptr)
bool match(const MachineInstr &MI, const MachineUniformityInfo &MUI, const MachineRegisterInfo &MRI) const
std::function< bool(const MachineInstr &)> TestFunc
RegBankLLTMapping(std::initializer_list< RegBankLLTMappingApplyID > DstOpMappingList, std::initializer_list< RegBankLLTMappingApplyID > SrcOpMappingList, LoweringMethodID LoweringMethod=DoNotLower)
SmallVector< RegBankLLTMappingApplyID, 2 > DstOpMapping
SmallVector< RegBankLLTMappingApplyID, 4 > SrcOpMapping
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39