LLVM 22.0.0git
AMDGPURegBankLegalizeRules.cpp
Go to the documentation of this file.
1//===-- AMDGPURegBankLegalizeRules.cpp ------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// Definitions of RegBankLegalize Rules for all opcodes.
10/// Implementation of container for all the Rules and search.
11/// Fast search for most common case when Rule.Predicate checks LLT and
12/// uniformity of register in operand 0.
13//
14//===----------------------------------------------------------------------===//
15
17#include "AMDGPUInstrInfo.h"
18#include "GCNSubtarget.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
23
24#define DEBUG_TYPE "amdgpu-regbanklegalize"
25
26using namespace llvm;
27using namespace AMDGPU;
28
29bool AMDGPU::isAnyPtr(LLT Ty, unsigned Width) {
30 return Ty.isPointer() && Ty.getSizeInBits() == Width;
31}
32
34 std::initializer_list<RegBankLLTMappingApplyID> DstOpMappingList,
35 std::initializer_list<RegBankLLTMappingApplyID> SrcOpMappingList,
37 : DstOpMapping(DstOpMappingList), SrcOpMapping(SrcOpMappingList),
39
41 std::initializer_list<UniformityLLTOpPredicateID> OpList,
42 std::function<bool(const MachineInstr &)> TestFunc)
44
46 const MachineUniformityInfo &MUI,
47 const MachineRegisterInfo &MRI) {
48 switch (UniID) {
49 case S1:
50 return MRI.getType(Reg) == LLT::scalar(1);
51 case S16:
52 return MRI.getType(Reg) == LLT::scalar(16);
53 case S32:
54 return MRI.getType(Reg) == LLT::scalar(32);
55 case S64:
56 return MRI.getType(Reg) == LLT::scalar(64);
57 case S128:
58 return MRI.getType(Reg) == LLT::scalar(128);
59 case P0:
60 return MRI.getType(Reg) == LLT::pointer(0, 64);
61 case P1:
62 return MRI.getType(Reg) == LLT::pointer(1, 64);
63 case P3:
64 return MRI.getType(Reg) == LLT::pointer(3, 32);
65 case P4:
66 return MRI.getType(Reg) == LLT::pointer(4, 64);
67 case P5:
68 return MRI.getType(Reg) == LLT::pointer(5, 32);
69 case P8:
70 return MRI.getType(Reg) == LLT::pointer(8, 128);
71 case Ptr32:
72 return isAnyPtr(MRI.getType(Reg), 32);
73 case Ptr64:
74 return isAnyPtr(MRI.getType(Reg), 64);
75 case Ptr128:
76 return isAnyPtr(MRI.getType(Reg), 128);
77 case V2S32:
78 return MRI.getType(Reg) == LLT::fixed_vector(2, 32);
79 case V4S32:
80 return MRI.getType(Reg) == LLT::fixed_vector(4, 32);
81 case B32:
82 return MRI.getType(Reg).getSizeInBits() == 32;
83 case B64:
84 return MRI.getType(Reg).getSizeInBits() == 64;
85 case B96:
86 return MRI.getType(Reg).getSizeInBits() == 96;
87 case B128:
88 return MRI.getType(Reg).getSizeInBits() == 128;
89 case B256:
90 return MRI.getType(Reg).getSizeInBits() == 256;
91 case B512:
92 return MRI.getType(Reg).getSizeInBits() == 512;
93 case UniS1:
94 return MRI.getType(Reg) == LLT::scalar(1) && MUI.isUniform(Reg);
95 case UniS16:
96 return MRI.getType(Reg) == LLT::scalar(16) && MUI.isUniform(Reg);
97 case UniS32:
98 return MRI.getType(Reg) == LLT::scalar(32) && MUI.isUniform(Reg);
99 case UniS64:
100 return MRI.getType(Reg) == LLT::scalar(64) && MUI.isUniform(Reg);
101 case UniS128:
102 return MRI.getType(Reg) == LLT::scalar(128) && MUI.isUniform(Reg);
103 case UniP0:
104 return MRI.getType(Reg) == LLT::pointer(0, 64) && MUI.isUniform(Reg);
105 case UniP1:
106 return MRI.getType(Reg) == LLT::pointer(1, 64) && MUI.isUniform(Reg);
107 case UniP3:
108 return MRI.getType(Reg) == LLT::pointer(3, 32) && MUI.isUniform(Reg);
109 case UniP4:
110 return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isUniform(Reg);
111 case UniP5:
112 return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isUniform(Reg);
113 case UniP8:
114 return MRI.getType(Reg) == LLT::pointer(8, 128) && MUI.isUniform(Reg);
115 case UniPtr32:
116 return isAnyPtr(MRI.getType(Reg), 32) && MUI.isUniform(Reg);
117 case UniPtr64:
118 return isAnyPtr(MRI.getType(Reg), 64) && MUI.isUniform(Reg);
119 case UniPtr128:
120 return isAnyPtr(MRI.getType(Reg), 128) && MUI.isUniform(Reg);
121 case UniV2S16:
122 return MRI.getType(Reg) == LLT::fixed_vector(2, 16) && MUI.isUniform(Reg);
123 case UniB32:
124 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isUniform(Reg);
125 case UniB64:
126 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isUniform(Reg);
127 case UniB96:
128 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isUniform(Reg);
129 case UniB128:
130 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isUniform(Reg);
131 case UniB256:
132 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isUniform(Reg);
133 case UniB512:
134 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isUniform(Reg);
135 case DivS1:
136 return MRI.getType(Reg) == LLT::scalar(1) && MUI.isDivergent(Reg);
137 case DivS16:
138 return MRI.getType(Reg) == LLT::scalar(16) && MUI.isDivergent(Reg);
139 case DivS32:
140 return MRI.getType(Reg) == LLT::scalar(32) && MUI.isDivergent(Reg);
141 case DivS64:
142 return MRI.getType(Reg) == LLT::scalar(64) && MUI.isDivergent(Reg);
143 case DivS128:
144 return MRI.getType(Reg) == LLT::scalar(128) && MUI.isDivergent(Reg);
145 case DivP0:
146 return MRI.getType(Reg) == LLT::pointer(0, 64) && MUI.isDivergent(Reg);
147 case DivP1:
148 return MRI.getType(Reg) == LLT::pointer(1, 64) && MUI.isDivergent(Reg);
149 case DivP3:
150 return MRI.getType(Reg) == LLT::pointer(3, 32) && MUI.isDivergent(Reg);
151 case DivP4:
152 return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isDivergent(Reg);
153 case DivP5:
154 return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isDivergent(Reg);
155 case DivPtr32:
156 return isAnyPtr(MRI.getType(Reg), 32) && MUI.isDivergent(Reg);
157 case DivPtr64:
158 return isAnyPtr(MRI.getType(Reg), 64) && MUI.isDivergent(Reg);
159 case DivPtr128:
160 return isAnyPtr(MRI.getType(Reg), 128) && MUI.isDivergent(Reg);
161 case DivV2S16:
162 return MRI.getType(Reg) == LLT::fixed_vector(2, 16) && MUI.isDivergent(Reg);
163 case DivB32:
164 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isDivergent(Reg);
165 case DivB64:
166 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isDivergent(Reg);
167 case DivB96:
168 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isDivergent(Reg);
169 case DivB128:
170 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isDivergent(Reg);
171 case DivB256:
172 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isDivergent(Reg);
173 case DivB512:
174 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isDivergent(Reg);
175 case _:
176 return true;
177 default:
178 llvm_unreachable("missing matchUniformityAndLLT");
179 }
180}
181
183 const MachineUniformityInfo &MUI,
184 const MachineRegisterInfo &MRI) const {
185 // Check LLT signature.
186 for (unsigned i = 0; i < OpUniformityAndTypes.size(); ++i) {
187 if (OpUniformityAndTypes[i] == _) {
188 if (MI.getOperand(i).isReg())
189 return false;
190 continue;
191 }
192
193 // Remaining IDs check registers.
194 if (!MI.getOperand(i).isReg())
195 return false;
196
197 if (!matchUniformityAndLLT(MI.getOperand(i).getReg(),
198 OpUniformityAndTypes[i], MUI, MRI))
199 return false;
200 }
201
202 // More complex check.
203 if (TestFunc)
204 return TestFunc(MI);
205
206 return true;
207}
208
210
212 : FastTypes(FastTypes) {}
213
215 if (Ty == LLT::scalar(16))
216 return S16;
217 if (Ty == LLT::scalar(32))
218 return S32;
219 if (Ty == LLT::scalar(64))
220 return S64;
221 if (Ty == LLT::fixed_vector(2, 16))
222 return V2S16;
223 if (Ty == LLT::fixed_vector(2, 32))
224 return V2S32;
225 if (Ty == LLT::fixed_vector(3, 32))
226 return V3S32;
227 if (Ty == LLT::fixed_vector(4, 32))
228 return V4S32;
229 return _;
230}
231
233 if (Ty == LLT::scalar(32) || Ty == LLT::fixed_vector(2, 16) ||
234 isAnyPtr(Ty, 32))
235 return B32;
236 if (Ty == LLT::scalar(64) || Ty == LLT::fixed_vector(2, 32) ||
237 Ty == LLT::fixed_vector(4, 16) || isAnyPtr(Ty, 64))
238 return B64;
239 if (Ty == LLT::fixed_vector(3, 32))
240 return B96;
241 if (Ty == LLT::fixed_vector(4, 32) || isAnyPtr(Ty, 128))
242 return B128;
243 return _;
244}
245
246const RegBankLLTMapping &
249 const MachineUniformityInfo &MUI) const {
250 // Search in "Fast Rules".
251 // Note: if fast rules are enabled, RegBankLLTMapping must be added in each
252 // slot that could "match fast Predicate". If not, InvalidMapping is
253 // returned which results in failure, does not search "Slow Rules".
254 if (FastTypes != NoFastRules) {
255 Register Reg = MI.getOperand(0).getReg();
256 int Slot;
257 if (FastTypes == StandardB)
258 Slot = getFastPredicateSlot(LLTToBId(MRI.getType(Reg)));
259 else
260 Slot = getFastPredicateSlot(LLTToId(MRI.getType(Reg)));
261
262 if (Slot != -1)
263 return MUI.isUniform(Reg) ? Uni[Slot] : Div[Slot];
264 }
265
266 // Slow search for more complex rules.
267 for (const RegBankLegalizeRule &Rule : Rules) {
268 if (Rule.Predicate.match(MI, MUI, MRI))
269 return Rule.OperandMapping;
270 }
271
272 LLVM_DEBUG(dbgs() << "MI: "; MI.dump(););
273 llvm_unreachable("None of the rules defined for MI's opcode matched MI");
274}
275
277 Rules.push_back(Rule);
278}
279
281 RegBankLLTMapping RuleApplyIDs) {
282 int Slot = getFastPredicateSlot(Ty);
283 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
284 Div[Slot] = RuleApplyIDs;
285}
286
288 RegBankLLTMapping RuleApplyIDs) {
289 int Slot = getFastPredicateSlot(Ty);
290 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
291 Uni[Slot] = RuleApplyIDs;
292}
293
294int SetOfRulesForOpcode::getFastPredicateSlot(
296 switch (FastTypes) {
297 case Standard: {
298 switch (Ty) {
299 case S32:
300 return 0;
301 case S16:
302 return 1;
303 case S64:
304 return 2;
305 case V2S16:
306 return 3;
307 default:
308 return -1;
309 }
310 }
311 case StandardB: {
312 switch (Ty) {
313 case B32:
314 return 0;
315 case B64:
316 return 1;
317 case B96:
318 return 2;
319 case B128:
320 return 3;
321 default:
322 return -1;
323 }
324 }
325 case Vector: {
326 switch (Ty) {
327 case S32:
328 return 0;
329 case V2S32:
330 return 1;
331 case V3S32:
332 return 2;
333 case V4S32:
334 return 3;
335 default:
336 return -1;
337 }
338 }
339 default:
340 return -1;
341 }
342}
343
344RegBankLegalizeRules::RuleSetInitializer
345RegBankLegalizeRules::addRulesForGOpcs(std::initializer_list<unsigned> OpcList,
346 FastRulesTypes FastTypes) {
347 return RuleSetInitializer(OpcList, GRulesAlias, GRules, FastTypes);
348}
349
350RegBankLegalizeRules::RuleSetInitializer
351RegBankLegalizeRules::addRulesForIOpcs(std::initializer_list<unsigned> OpcList,
352 FastRulesTypes FastTypes) {
353 return RuleSetInitializer(OpcList, IRulesAlias, IRules, FastTypes);
354}
355
358 unsigned Opc = MI.getOpcode();
359 if (Opc == AMDGPU::G_INTRINSIC || Opc == AMDGPU::G_INTRINSIC_CONVERGENT ||
360 Opc == AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS ||
361 Opc == AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS) {
362 unsigned IntrID = cast<GIntrinsic>(MI).getIntrinsicID();
363 auto IRAIt = IRulesAlias.find(IntrID);
364 if (IRAIt == IRulesAlias.end()) {
365 LLVM_DEBUG(dbgs() << "MI: "; MI.dump(););
366 llvm_unreachable("No rules defined for intrinsic opcode");
367 }
368 return IRules.at(IRAIt->second);
369 }
370
371 auto GRAIt = GRulesAlias.find(Opc);
372 if (GRAIt == GRulesAlias.end()) {
373 LLVM_DEBUG(dbgs() << "MI: "; MI.dump(););
374 llvm_unreachable("No rules defined for generic opcode");
375 }
376 return GRules.at(GRAIt->second);
377}
378
379// Syntactic sugar wrapper for predicate lambda that enables '&&', '||' and '!'.
380class Predicate {
381private:
382 struct Elt {
383 // Save formula composed of Pred, '&&', '||' and '!' as a jump table.
384 // Sink ! to Pred. For example !((A && !B) || C) -> (!A || B) && !C
385 // Sequences of && and || will be represented by jumps, for example:
386 // (A && B && ... X) or (A && B && ... X) || Y
387 // A == true jump to B
388 // A == false jump to end or Y, result is A(false) or Y
389 // (A || B || ... X) or (A || B || ... X) && Y
390 // A == true jump to end or Y, result is A(true) or Y
391 // A == false jump to B
392 // Notice that when negating expression, we simply flip Neg on each Pred
393 // and swap TJumpOffset and FJumpOffset (&& becomes ||, || becomes &&).
394 std::function<bool(const MachineInstr &)> Pred;
395 bool Neg; // Neg of Pred is calculated before jump
396 unsigned TJumpOffset;
397 unsigned FJumpOffset;
398 };
399
400 SmallVector<Elt, 8> Expression;
401
402 Predicate(SmallVectorImpl<Elt> &&Expr) { Expression.swap(Expr); };
403
404public:
405 Predicate(std::function<bool(const MachineInstr &)> Pred) {
406 Expression.push_back({Pred, false, 1, 1});
407 };
408
409 bool operator()(const MachineInstr &MI) const {
410 unsigned Idx = 0;
411 unsigned ResultIdx = Expression.size();
412 bool Result;
413 do {
414 Result = Expression[Idx].Pred(MI);
415 Result = Expression[Idx].Neg ? !Result : Result;
416 if (Result) {
417 Idx += Expression[Idx].TJumpOffset;
418 } else {
419 Idx += Expression[Idx].FJumpOffset;
420 }
421 } while ((Idx != ResultIdx));
422
423 return Result;
424 };
425
426 Predicate operator!() const {
427 SmallVector<Elt, 8> NegExpression;
428 for (const Elt &ExprElt : Expression) {
429 NegExpression.push_back({ExprElt.Pred, !ExprElt.Neg, ExprElt.FJumpOffset,
430 ExprElt.TJumpOffset});
431 }
432 return Predicate(std::move(NegExpression));
433 };
434
435 Predicate operator&&(const Predicate &RHS) const {
436 SmallVector<Elt, 8> AndExpression = Expression;
437
438 unsigned RHSSize = RHS.Expression.size();
439 unsigned ResultIdx = Expression.size();
440 for (unsigned i = 0; i < ResultIdx; ++i) {
441 // LHS results in false, whole expression results in false.
442 if (i + AndExpression[i].FJumpOffset == ResultIdx)
443 AndExpression[i].FJumpOffset += RHSSize;
444 }
445
446 AndExpression.append(RHS.Expression);
447
448 return Predicate(std::move(AndExpression));
449 }
450
451 Predicate operator||(const Predicate &RHS) const {
452 SmallVector<Elt, 8> OrExpression = Expression;
453
454 unsigned RHSSize = RHS.Expression.size();
455 unsigned ResultIdx = Expression.size();
456 for (unsigned i = 0; i < ResultIdx; ++i) {
457 // LHS results in true, whole expression results in true.
458 if (i + OrExpression[i].TJumpOffset == ResultIdx)
459 OrExpression[i].TJumpOffset += RHSSize;
460 }
461
462 OrExpression.append(RHS.Expression);
463
464 return Predicate(std::move(OrExpression));
465 }
466};
467
468// Initialize rules
471 : ST(&_ST), MRI(&_MRI) {
472
473 addRulesForGOpcs({G_ADD, G_SUB}, Standard)
474 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}})
475 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
476 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
477 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
479 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
480 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr64}})
481 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}});
482
483 addRulesForGOpcs({G_UADDO, G_USUBO}, Standard)
484 .Uni(S32, {{Sgpr32, Sgpr32Trunc}, {Sgpr32, Sgpr32}})
485 .Div(S32, {{Vgpr32, Vcc}, {Vgpr32, Vgpr32}});
486
487 addRulesForGOpcs({G_UADDE, G_USUBE}, Standard)
489 .Div(S32, {{Vgpr32, Vcc}, {Vgpr32, Vgpr32, Vcc}});
490
491 addRulesForGOpcs({G_MUL}, Standard).Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}});
492
493 addRulesForGOpcs({G_XOR, G_OR, G_AND}, StandardB)
495 .Any({{DivS1}, {{Vcc}, {Vcc, Vcc}}})
496 .Any({{UniS16}, {{Sgpr16}, {Sgpr16, Sgpr16}}})
497 .Any({{DivS16}, {{Vgpr16}, {Vgpr16, Vgpr16}}})
498 .Uni(B32, {{SgprB32}, {SgprB32, SgprB32}})
499 .Div(B32, {{VgprB32}, {VgprB32, VgprB32}})
500 .Uni(B64, {{SgprB64}, {SgprB64, SgprB64}})
501 .Div(B64, {{VgprB64}, {VgprB64, VgprB64}, SplitTo32});
502
503 addRulesForGOpcs({G_SHL}, Standard)
504 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32ZExt}})
505 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
507 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
508 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
509 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
510 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
511 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
512
513 addRulesForGOpcs({G_LSHR}, Standard)
514 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
515 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
517 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
518 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
519 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
520 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
521 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
522
523 addRulesForGOpcs({G_ASHR}, Standard)
524 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32ZExt}})
525 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
527 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
528 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
529 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
530 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
531 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
532
533 addRulesForGOpcs({G_FRAME_INDEX}).Any({{UniP5, _}, {{SgprP5}, {None}}});
534
535 addRulesForGOpcs({G_UBFX, G_SBFX}, Standard)
536 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}, S_BFE})
537 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
538 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32, Sgpr32}, S_BFE})
539 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32, Vgpr32}, V_BFE});
540
541 addRulesForGOpcs({G_SMIN, G_SMAX}, Standard)
542 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32SExt}})
543 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
544 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
545 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
547 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
548
549 addRulesForGOpcs({G_UMIN, G_UMAX}, Standard)
550 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
551 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
552 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
553 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
555 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
556
557 // Note: we only write S1 rules for G_IMPLICIT_DEF, G_CONSTANT, G_FCONSTANT
558 // and G_FREEZE here, rest is trivially regbankselected earlier
559 addRulesForGOpcs({G_IMPLICIT_DEF}).Any({{UniS1}, {{Sgpr32Trunc}, {}}});
560 addRulesForGOpcs({G_CONSTANT})
561 .Any({{UniS1, _}, {{Sgpr32Trunc}, {None}, UniCstExt}});
562 addRulesForGOpcs({G_FREEZE}).Any({{DivS1}, {{Vcc}, {Vcc}}});
563
564 addRulesForGOpcs({G_ICMP})
565 .Any({{UniS1, _, S32}, {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}})
566 .Any({{DivS1, _, S32}, {{Vcc}, {None, Vgpr32, Vgpr32}}})
567 .Any({{DivS1, _, S64}, {{Vcc}, {None, Vgpr64, Vgpr64}}});
568
569 addRulesForGOpcs({G_FCMP})
570 .Any({{UniS1, _, S32}, {{UniInVcc}, {None, Vgpr32, Vgpr32}}})
571 .Any({{DivS1, _, S32}, {{Vcc}, {None, Vgpr32, Vgpr32}}});
572
573 addRulesForGOpcs({G_BRCOND})
574 .Any({{UniS1}, {{}, {Sgpr32AExtBoolInReg}}})
575 .Any({{DivS1}, {{}, {Vcc}}});
576
577 addRulesForGOpcs({G_BR}).Any({{_}, {{}, {None}}});
578
579 addRulesForGOpcs({G_SELECT}, StandardB)
580 .Any({{DivS16}, {{Vgpr16}, {Vcc, Vgpr16, Vgpr16}}})
582 .Div(B32, {{VgprB32}, {Vcc, VgprB32, VgprB32}})
586
587 addRulesForGOpcs({G_ANYEXT})
588 .Any({{UniS16, S1}, {{None}, {None}}}) // should be combined away
589 .Any({{UniS32, S1}, {{None}, {None}}}) // should be combined away
590 .Any({{UniS64, S1}, {{None}, {None}}}) // should be combined away
591 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
592 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
593 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
594 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
595 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
596 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
597 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
598
599 // In global-isel G_TRUNC in-reg is treated as no-op, inst selected into COPY.
600 // It is up to user to deal with truncated bits.
601 addRulesForGOpcs({G_TRUNC})
602 .Any({{UniS1, UniS16}, {{None}, {None}}}) // should be combined away
603 .Any({{UniS1, UniS32}, {{None}, {None}}}) // should be combined away
604 .Any({{UniS1, UniS64}, {{None}, {None}}}) // should be combined away
605 .Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}})
606 .Any({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}})
607 .Any({{UniS32, S64}, {{Sgpr32}, {Sgpr64}}})
608 .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}})
609 .Any({{UniV2S16, V2S32}, {{SgprV2S16}, {SgprV2S32}}})
610 .Any({{DivV2S16, V2S32}, {{VgprV2S16}, {VgprV2S32}}})
611 // This is non-trivial. VgprToVccCopy is done using compare instruction.
612 .Any({{DivS1, DivS16}, {{Vcc}, {Vgpr16}, VgprToVccCopy}})
613 .Any({{DivS1, DivS32}, {{Vcc}, {Vgpr32}, VgprToVccCopy}})
614 .Any({{DivS1, DivS64}, {{Vcc}, {Vgpr64}, VgprToVccCopy}});
615
616 addRulesForGOpcs({G_ZEXT})
620 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
621 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
622 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
623 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
624 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
625 // not extending S16 to S32 is questionable.
626 .Any({{UniS64, S16}, {{Sgpr64}, {Sgpr32ZExt}, Ext32To64}})
627 .Any({{DivS64, S16}, {{Vgpr64}, {Vgpr32ZExt}, Ext32To64}})
628 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
629 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
630
631 addRulesForGOpcs({G_SEXT})
635 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
636 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
637 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
638 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
639 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
640 // not extending S16 to S32 is questionable.
641 .Any({{UniS64, S16}, {{Sgpr64}, {Sgpr32SExt}, Ext32To64}})
642 .Any({{DivS64, S16}, {{Vgpr64}, {Vgpr32SExt}, Ext32To64}})
643 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
644 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
645
646 addRulesForGOpcs({G_SEXT_INREG})
647 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}})
648 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
649 .Any({{UniS64, S64}, {{Sgpr64}, {Sgpr64}}})
651
652 addRulesForGOpcs({G_ASSERT_ZEXT, G_ASSERT_SEXT}, Standard)
653 .Uni(S32, {{Sgpr32}, {Sgpr32, Imm}})
654 .Div(S32, {{Vgpr32}, {Vgpr32, Imm}})
655 .Uni(S64, {{Sgpr64}, {Sgpr64, Imm}})
656 .Div(S64, {{Vgpr64}, {Vgpr64, Imm}});
657
658 bool hasSMRDx3 = ST->hasScalarDwordx3Loads();
659 bool hasSMRDSmall = ST->hasScalarSubwordLoads();
660 bool usesTrue16 = ST->useRealTrue16Insts();
661
662 Predicate isAlign16([](const MachineInstr &MI) -> bool {
663 return (*MI.memoperands_begin())->getAlign() >= Align(16);
664 });
665
666 Predicate isAlign4([](const MachineInstr &MI) -> bool {
667 return (*MI.memoperands_begin())->getAlign() >= Align(4);
668 });
669
670 Predicate isAtomicMMO([](const MachineInstr &MI) -> bool {
671 return (*MI.memoperands_begin())->isAtomic();
672 });
673
674 Predicate isUniMMO([](const MachineInstr &MI) -> bool {
675 return AMDGPU::isUniformMMO(*MI.memoperands_begin());
676 });
677
678 Predicate isConst([](const MachineInstr &MI) -> bool {
679 // Address space in MMO be different then address space on pointer.
680 const MachineMemOperand *MMO = *MI.memoperands_begin();
681 const unsigned AS = MMO->getAddrSpace();
682 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
684 });
685
686 Predicate isVolatileMMO([](const MachineInstr &MI) -> bool {
687 return (*MI.memoperands_begin())->isVolatile();
688 });
689
690 Predicate isInvMMO([](const MachineInstr &MI) -> bool {
691 return (*MI.memoperands_begin())->isInvariant();
692 });
693
694 Predicate isNoClobberMMO([](const MachineInstr &MI) -> bool {
695 return (*MI.memoperands_begin())->getFlags() & MONoClobber;
696 });
697
698 Predicate isNaturalAligned([](const MachineInstr &MI) -> bool {
699 const MachineMemOperand *MMO = *MI.memoperands_begin();
700 return MMO->getAlign() >= Align(MMO->getSize().getValue());
701 });
702
703 Predicate is8Or16BitMMO([](const MachineInstr &MI) -> bool {
704 const MachineMemOperand *MMO = *MI.memoperands_begin();
705 const unsigned MemSize = 8 * MMO->getSize().getValue();
706 return MemSize == 16 || MemSize == 8;
707 });
708
709 Predicate is32BitMMO([](const MachineInstr &MI) -> bool {
710 const MachineMemOperand *MMO = *MI.memoperands_begin();
711 return 8 * MMO->getSize().getValue() == 32;
712 });
713
714 auto isUL = !isAtomicMMO && isUniMMO && (isConst || !isVolatileMMO) &&
715 (isConst || isInvMMO || isNoClobberMMO);
716
717 // clang-format off
718 // TODO: S32Dst, 16-bit any-extending load should not appear on True16 targets
719 addRulesForGOpcs({G_LOAD})
720 // flat, addrspace(0), never uniform - flat_load
721 .Any({{DivS16, P0}, {{Vgpr16}, {VgprP0}}}, usesTrue16)
722 .Any({{DivB32, P0}, {{VgprB32}, {VgprP0}}}) // 32-bit load, 8-bit and 16-bit any-extending load
723 .Any({{DivB64, P0}, {{VgprB64}, {VgprP0}}})
724 .Any({{DivB96, P0}, {{VgprB96}, {VgprP0}}})
725 .Any({{DivB128, P0}, {{VgprB128}, {VgprP0}}})
726
727 // global, addrspace(1)
728 // divergent - global_load
729 .Any({{DivS16, P1}, {{Vgpr16}, {VgprP1}}}, usesTrue16)
730 .Any({{DivB32, P1}, {{VgprB32}, {VgprP1}}}) //32-bit load, 8-bit and 16-bit any-extending load
731 .Any({{DivB64, P1}, {{VgprB64}, {VgprP1}}})
732 .Any({{DivB96, P1}, {{VgprB96}, {VgprP1}}})
733 .Any({{DivB128, P1}, {{VgprB128}, {VgprP1}}})
734 .Any({{DivB256, P1}, {{VgprB256}, {VgprP1}, SplitLoad}})
735 .Any({{DivB512, P1}, {{VgprB512}, {VgprP1}, SplitLoad}})
736
737 // uniform - s_load
738 .Any({{{UniS16, P1}, isNaturalAligned && isUL}, {{Sgpr32Trunc}, {SgprP1}}}, usesTrue16 && hasSMRDSmall) // s16 load
739 .Any({{{UniS16, P1}, isAlign4 && isUL}, {{Sgpr32Trunc}, {SgprP1}, WidenMMOToS32}}, usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
740 .Any({{{UniB32, P1}, isNaturalAligned && isUL}, {{SgprB32}, {SgprP1}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
741 // TODO: SplitLoad when !isNaturalAligned && isUL and target hasSMRDSmall
742 .Any({{{UniB32, P1}, is8Or16BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP1}, WidenMMOToS32}}, !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
743 .Any({{{UniB32, P1}, is32BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP1}}}) //32-bit load
744 .Any({{{UniB64, P1}, isAlign4 && isUL}, {{SgprB64}, {SgprP1}}})
745 .Any({{{UniB96, P1}, isAlign16 && isUL}, {{SgprB96}, {SgprP1}, WidenLoad}}, !hasSMRDx3)
746 .Any({{{UniB96, P1}, isAlign4 && !isAlign16 && isUL}, {{SgprB96}, {SgprP1}, SplitLoad}}, !hasSMRDx3)
747 .Any({{{UniB96, P1}, isAlign4 && isUL}, {{SgprB96}, {SgprP1}}}, hasSMRDx3)
748 .Any({{{UniB128, P1}, isAlign4 && isUL}, {{SgprB128}, {SgprP1}}})
749 .Any({{{UniB256, P1}, isAlign4 && isUL}, {{SgprB256}, {SgprP1}}})
750 .Any({{{UniB512, P1}, isAlign4 && isUL}, {{SgprB512}, {SgprP1}}})
751
752 // Uniform via global or buffer load, for example volatile or non-aligned
753 // uniform load. Not using standard {{UniInVgprTy}, {VgprP1}} since it is
754 // selected as global_load, use SgprP1 for pointer instead to match
755 // patterns without flat-for-global, default for GFX7 and older.
756 // -> +flat-for-global + {{UniInVgprTy}, {SgprP1}} - global_load
757 // -> -flat-for-global + {{UniInVgprTy}, {SgprP1}} - buffer_load
758 .Any({{{UniS16, P1}, !isNaturalAligned || !isUL}, {{UniInVgprS16}, {SgprP1}}}, usesTrue16 && hasSMRDSmall) // s16 load
759 .Any({{{UniS16, P1}, !isAlign4 || !isUL}, {{UniInVgprS16}, {SgprP1}}}, usesTrue16 && !hasSMRDSmall) // s16 load
760 .Any({{{UniB32, P1}, !isNaturalAligned || !isUL}, {{UniInVgprB32}, {SgprP1}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
761 .Any({{{UniB32, P1}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP1}}}, !hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
762 .Any({{{UniB64, P1}, !isAlign4 || !isUL}, {{UniInVgprB64}, {SgprP1}}})
763 .Any({{{UniB96, P1}, !isAlign4 || !isUL}, {{UniInVgprB96}, {SgprP1}}})
764 .Any({{{UniB128, P1}, !isAlign4 || !isUL}, {{UniInVgprB128}, {SgprP1}}})
765 .Any({{{UniB256, P1}, !isAlign4 || !isUL}, {{UniInVgprB256}, {SgprP1}, SplitLoad}})
766 .Any({{{UniB512, P1}, !isAlign4 || !isUL}, {{UniInVgprB512}, {SgprP1}, SplitLoad}})
767
768 // local, addrspace(3) - ds_load
769 .Any({{DivS16, P3}, {{Vgpr16}, {VgprP3}}}, usesTrue16)
770 .Any({{DivB32, P3}, {{VgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
771 .Any({{DivB64, P3}, {{VgprB64}, {VgprP3}}})
772 .Any({{DivB96, P3}, {{VgprB96}, {VgprP3}}})
773 .Any({{DivB128, P3}, {{VgprB128}, {VgprP3}}})
774
775 .Any({{UniS16, P3}, {{UniInVgprS16}, {SgprP3}}}, usesTrue16) // 16-bit load
776 .Any({{UniB32, P3}, {{UniInVgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
777 .Any({{UniB64, P3}, {{UniInVgprB64}, {VgprP3}}})
778 .Any({{UniB96, P3}, {{UniInVgprB96}, {VgprP3}}})
779 .Any({{UniB128, P3}, {{UniInVgprB128}, {VgprP3}}})
780
781 // constant, addrspace(4)
782 // divergent - global_load
783 .Any({{DivS16, P4}, {{Vgpr16}, {VgprP4}}}, usesTrue16)
784 .Any({{DivB32, P4}, {{VgprB32}, {VgprP4}}}) //32-bit load, 8-bit and 16-bit any-extending load
785 .Any({{DivB64, P4}, {{VgprB64}, {VgprP4}}})
786 .Any({{DivB96, P4}, {{VgprB96}, {VgprP4}}})
787 .Any({{DivB128, P4}, {{VgprB128}, {VgprP4}}})
788 .Any({{DivB256, P4}, {{VgprB256}, {VgprP4}, SplitLoad}})
789 .Any({{DivB512, P4}, {{VgprB512}, {VgprP4}, SplitLoad}})
790
791 // uniform - s_load
792 .Any({{{UniS16, P4}, isNaturalAligned && isUL}, {{Sgpr32Trunc}, {SgprP4}}}, usesTrue16 && hasSMRDSmall) // s16 load
793 .Any({{{UniS16, P4}, isAlign4 && isUL}, {{Sgpr32Trunc}, {SgprP4}, WidenMMOToS32}}, usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
794 .Any({{{UniB32, P4}, isNaturalAligned && isUL}, {{SgprB32}, {SgprP4}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
795 .Any({{{UniB32, P4}, is8Or16BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP4}, WidenMMOToS32}}, !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
796 .Any({{{UniB32, P4}, is32BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP4}}}) //32-bit load
797 .Any({{{UniB64, P4}, isAlign4 && isUL}, {{SgprB64}, {SgprP4}}})
798 .Any({{{UniB96, P4}, isAlign16 && isUL}, {{SgprB96}, {SgprP4}, WidenLoad}}, !hasSMRDx3)
799 .Any({{{UniB96, P4}, isAlign4 && !isAlign16 && isUL}, {{SgprB96}, {SgprP4}, SplitLoad}}, !hasSMRDx3)
800 .Any({{{UniB96, P4}, isAlign4 && isUL}, {{SgprB96}, {SgprP4}}}, hasSMRDx3)
801 .Any({{{UniB128, P4}, isAlign4 && isUL}, {{SgprB128}, {SgprP4}}})
802 .Any({{{UniB256, P4}, isAlign4 && isUL}, {{SgprB256}, {SgprP4}}})
803 .Any({{{UniB512, P4}, isAlign4 && isUL}, {{SgprB512}, {SgprP4}}})
804
805 // uniform in vgpr - global_load or buffer_load
806 .Any({{{UniS16, P4}, !isNaturalAligned || !isUL}, {{UniInVgprS16}, {SgprP4}}}, usesTrue16 && hasSMRDSmall) // s16 load
807 .Any({{{UniS16, P4}, !isAlign4 || !isUL}, {{UniInVgprS16}, {SgprP4}}}, usesTrue16 && !hasSMRDSmall) // s16 load
808 .Any({{{UniB32, P4}, !isNaturalAligned || !isUL}, {{UniInVgprB32}, {SgprP4}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
809 .Any({{{UniB32, P4}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP4}}}, !hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
810 .Any({{{UniB64, P4}, !isAlign4 || !isUL}, {{UniInVgprB64}, {SgprP4}}})
811 .Any({{{UniB96, P4}, !isAlign4 || !isUL}, {{UniInVgprB96}, {SgprP4}}})
812 .Any({{{UniB128, P4}, !isAlign4 || !isUL}, {{UniInVgprB128}, {SgprP4}}})
813 .Any({{{UniB256, P4}, !isAlign4 || !isUL}, {{UniInVgprB256}, {SgprP4}, SplitLoad}})
814 .Any({{{UniB512, P4}, !isAlign4 || !isUL}, {{UniInVgprB512}, {SgprP4}, SplitLoad}})
815
816 // private, addrspace(5), never uniform - scratch_load
817 .Any({{DivS16, P5}, {{Vgpr16}, {VgprP5}}}, usesTrue16)
818 .Any({{DivB32, P5}, {{VgprB32}, {VgprP5}}}) // 32-bit load, 8-bit and 16-bit any-extending load
819 .Any({{DivB64, P5}, {{VgprB64}, {VgprP5}}})
820 .Any({{DivB96, P5}, {{VgprB96}, {VgprP5}}})
821 .Any({{DivB128, P5}, {{VgprB128}, {VgprP5}}})
822
823 .Any({{DivS32, Ptr128}, {{Vgpr32}, {VgprPtr128}}});
824
825
826 addRulesForGOpcs({G_ZEXTLOAD, G_SEXTLOAD}) // i8 and i16 zeroextending loads
827 .Any({{DivS32, P0}, {{Vgpr32}, {VgprP0}}})
828
829 .Any({{DivS32, P1}, {{Vgpr32}, {VgprP1}}})
830 .Any({{{UniS32, P1}, isAlign4 && isUL}, {{Sgpr32}, {SgprP1}, WidenMMOToS32}}, !hasSMRDSmall)
831 .Any({{{UniS32, P1}, isNaturalAligned && isUL}, {{Sgpr32}, {SgprP1}}}, hasSMRDSmall)
832 .Any({{{UniS32, P1}, !isAlign4 || !isUL}, {{UniInVgprS32}, {SgprP1}}}, !hasSMRDSmall)
833 .Any({{{UniS32, P1}, !isNaturalAligned || !isUL}, {{UniInVgprS32}, {SgprP1}}}, hasSMRDSmall)
834
835 .Any({{DivS32, P3}, {{Vgpr32}, {VgprP3}}})
836 .Any({{UniS32, P3}, {{UniInVgprS32}, {VgprP3}}})
837
838 .Any({{DivS32, P4}, {{Vgpr32}, {VgprP4}}})
839 .Any({{{UniS32, P4}, isAlign4 && isUL}, {{Sgpr32}, {SgprP4}, WidenMMOToS32}}, !hasSMRDSmall)
840 .Any({{{UniS32, P4}, isNaturalAligned && isUL}, {{Sgpr32}, {SgprP4}}}, hasSMRDSmall)
841 .Any({{{UniS32, P4}, !isAlign4 || !isUL}, {{UniInVgprS32}, {SgprP4}}}, !hasSMRDSmall)
842 .Any({{{UniS32, P4}, !isNaturalAligned || !isUL}, {{UniInVgprS32}, {SgprP4}}}, hasSMRDSmall)
843
844 .Any({{DivS32, P5}, {{Vgpr32}, {VgprP5}}});
845
846 addRulesForGOpcs({G_STORE})
847 // addrspace(0)
848 .Any({{S16, P0}, {{}, {Vgpr16, VgprP0}}}, usesTrue16) // 16-bit store
849 .Any({{B32, P0}, {{}, {VgprB32, VgprP0}}}) // 32-bit store, 8-bit and 16-bit truncating store
850 .Any({{B64, P0}, {{}, {VgprB64, VgprP0}}})
851 .Any({{B96, P0}, {{}, {VgprB96, VgprP0}}})
852 .Any({{B128, P0}, {{}, {VgprB128, VgprP0}}})
853
854 // addrspace(1), there are no stores to addrspace(4)
855 // For targets:
856 // - with "+flat-for-global" - global_store
857 // - without(-flat-for-global) - buffer_store addr64
858 .Any({{S16, DivP1}, {{}, {Vgpr16, VgprP1}}}, usesTrue16) // 16-bit store
859 .Any({{B32, DivP1}, {{}, {VgprB32, VgprP1}}}) // 32-bit store, 8-bit and 16-bit truncating store
860 .Any({{B64, DivP1}, {{}, {VgprB64, VgprP1}}})
861 .Any({{B96, DivP1}, {{}, {VgprB96, VgprP1}}})
862 .Any({{B128, DivP1}, {{}, {VgprB128, VgprP1}}})
863
864 // For UniP1, use sgpr ptr to match flat-for-global patterns. Targets:
865 // - with "+flat-for-global" - global_store for both sgpr and vgpr ptr
866 // - without(-flat-for-global) - need sgpr ptr to select buffer_store
867 .Any({{S16, UniP1}, {{}, {Vgpr16, SgprP1}}}, usesTrue16) // 16-bit store
868 .Any({{B32, UniP1}, {{}, {VgprB32, SgprP1}}}) // 32-bit store, 8-bit and 16-bit truncating store
869 .Any({{B64, UniP1}, {{}, {VgprB64, SgprP1}}})
870 .Any({{B96, UniP1}, {{}, {VgprB96, SgprP1}}})
871 .Any({{B128, UniP1}, {{}, {VgprB128, SgprP1}}})
872
873 // addrspace(3) and addrspace(5)
874 .Any({{S16, Ptr32}, {{}, {Vgpr16, VgprPtr32}}}, usesTrue16) // 16-bit store
875 .Any({{B32, Ptr32}, {{}, {VgprB32, VgprPtr32}}}) // 32-bit store, 8-bit and 16-bit truncating store
876 .Any({{B64, Ptr32}, {{}, {VgprB64, VgprPtr32}}})
877 .Any({{B96, Ptr32}, {{}, {VgprB96, VgprPtr32}}})
878 .Any({{B128, Ptr32}, {{}, {VgprB128, VgprPtr32}}});
879 // clang-format on
880
881 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD, G_AMDGPU_BUFFER_LOAD_FORMAT,
882 G_AMDGPU_TBUFFER_LOAD_FORMAT},
883 StandardB)
892
893 addRulesForGOpcs({G_AMDGPU_BUFFER_STORE})
894 .Any({{S32}, {{}, {Vgpr32, SgprV4S32, Vgpr32, Vgpr32, Sgpr32}}});
895
896 addRulesForGOpcs({G_PTR_ADD})
897 .Any({{UniPtr32}, {{SgprPtr32}, {SgprPtr32, Sgpr32}}})
898 .Any({{DivPtr32}, {{VgprPtr32}, {VgprPtr32, Vgpr32}}})
899 .Any({{UniPtr64}, {{SgprPtr64}, {SgprPtr64, Sgpr64}}})
900 .Any({{DivPtr64}, {{VgprPtr64}, {VgprPtr64, Vgpr64}}});
901
902 addRulesForGOpcs({G_INTTOPTR})
903 .Any({{UniPtr32}, {{SgprPtr32}, {Sgpr32}}})
904 .Any({{DivPtr32}, {{VgprPtr32}, {Vgpr32}}})
905 .Any({{UniPtr64}, {{SgprPtr64}, {Sgpr64}}})
906 .Any({{DivPtr64}, {{VgprPtr64}, {Vgpr64}}})
907 .Any({{UniPtr128}, {{SgprPtr128}, {Sgpr128}}})
908 .Any({{DivPtr128}, {{VgprPtr128}, {Vgpr128}}});
909
910 addRulesForGOpcs({G_PTRTOINT})
911 .Any({{UniS32}, {{Sgpr32}, {SgprPtr32}}})
912 .Any({{DivS32}, {{Vgpr32}, {VgprPtr32}}})
913 .Any({{UniS64}, {{Sgpr64}, {SgprPtr64}}})
914 .Any({{DivS64}, {{Vgpr64}, {VgprPtr64}}})
915 .Any({{UniS128}, {{Sgpr128}, {SgprPtr128}}})
916 .Any({{DivS128}, {{Vgpr128}, {VgprPtr128}}});
917
918 addRulesForGOpcs({G_ABS}, Standard).Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt}});
919
920 addRulesForGOpcs({G_FENCE}).Any({{{}}, {{}, {}}});
921
922 addRulesForGOpcs({G_READSTEADYCOUNTER, G_READCYCLECOUNTER}, Standard)
923 .Uni(S64, {{Sgpr64}, {}});
924
925 addRulesForGOpcs({G_BLOCK_ADDR}).Any({{UniP0}, {{SgprP0}, {}}});
926
927 addRulesForGOpcs({G_GLOBAL_VALUE})
928 .Any({{UniP0}, {{SgprP0}, {}}})
929 .Any({{UniP1}, {{SgprP1}, {}}})
930 .Any({{UniP3}, {{SgprP3}, {}}})
931 .Any({{UniP4}, {{SgprP4}, {}}})
932 .Any({{UniP8}, {{SgprP8}, {}}});
933
934 addRulesForGOpcs({G_AMDGPU_WAVE_ADDRESS}).Any({{UniP5}, {{SgprP5}, {}}});
935
936 bool hasSALUFloat = ST->hasSALUFloatInsts();
937
938 addRulesForGOpcs({G_FADD}, Standard)
939 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUFloat)
940 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUFloat)
941 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
942 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUFloat)
943 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUFloat)
944 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
945 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64}})
946 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}})
947 .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}}, !hasSALUFloat)
949 hasSALUFloat)
950 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
953
954 addRulesForGOpcs({G_FPTOUI})
955 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
956 .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat);
957
958 addRulesForGOpcs({G_UITOFP})
959 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
960 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
961 .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat);
962
963 using namespace Intrinsic;
964
965 addRulesForIOpcs({amdgcn_s_getpc}).Any({{UniS64, _}, {{Sgpr64}, {None}}});
966
967 // This is "intrinsic lane mask" it was set to i32/i64 in llvm-ir.
968 addRulesForIOpcs({amdgcn_end_cf}).Any({{_, S32}, {{}, {None, Sgpr32}}});
969
970 addRulesForIOpcs({amdgcn_if_break}, Standard)
971 .Uni(S32, {{Sgpr32}, {IntrId, Vcc, Sgpr32}});
972
973 addRulesForIOpcs({amdgcn_mbcnt_lo, amdgcn_mbcnt_hi}, Standard)
974 .Div(S32, {{}, {Vgpr32, None, Vgpr32, Vgpr32}});
975
976 addRulesForIOpcs({amdgcn_readfirstlane})
977 .Any({{UniS32, _, DivS32}, {{}, {Sgpr32, None, Vgpr32}}})
978 // this should not exist in the first place, it is from call lowering
979 // readfirstlaning just in case register is not in sgpr.
980 .Any({{UniS32, _, UniS32}, {{}, {Sgpr32, None, Vgpr32}}});
981
982} // end initialize rules
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
constexpr LLT S16
constexpr LLT S1
constexpr LLT V2S16
constexpr LLT S32
constexpr LLT V4S32
constexpr LLT V3S32
constexpr LLT S64
constexpr LLT V2S32
constexpr LLT S128
UniformityLLTOpPredicateID LLTToBId(LLT Ty)
bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID, const MachineUniformityInfo &MUI, const MachineRegisterInfo &MRI)
UniformityLLTOpPredicateID LLTToId(LLT Ty)
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
Register Reg
Machine IR instance of the generic uniformity analysis.
#define LLVM_DEBUG(...)
Definition Debug.h:114
bool operator()(const MachineInstr &MI) const
Predicate operator||(const Predicate &RHS) const
Predicate operator&&(const Predicate &RHS) const
Predicate(std::function< bool(const MachineInstr &)> Pred)
Predicate operator!() const
RegBankLegalizeRules(const GCNSubtarget &ST, MachineRegisterInfo &MRI)
const SetOfRulesForOpcode & getRulesForOpc(MachineInstr &MI) const
const RegBankLLTMapping & findMappingForMI(const MachineInstr &MI, const MachineRegisterInfo &MRI, const MachineUniformityInfo &MUI) const
void addFastRuleDivergent(UniformityLLTOpPredicateID Ty, RegBankLLTMapping RuleApplyIDs)
void addFastRuleUniform(UniformityLLTOpPredicateID Ty, RegBankLLTMapping RuleApplyIDs)
bool isDivergent(ConstValueRefT V) const
Whether V is divergent at its definition.
bool isUniform(ConstValueRefT V) const
Whether V is uniform/non-divergent.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
TypeSize getValue() const
Representation of each machine instruction.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
unsigned getAddrSpace() const
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void swap(SmallVectorImpl &RHS)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
bool isAnyPtr(LLT Ty, unsigned Width)
bool isUniformMMO(const MachineMemOperand *MMO)
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
This is an optimization pass for GlobalISel generic memory operations.
GenericUniformityInfo< MachineSSAContext > MachineUniformityInfo
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
Definition SIInstrInfo.h:44
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
SmallVector< UniformityLLTOpPredicateID, 4 > OpUniformityAndTypes
PredicateMapping(std::initializer_list< UniformityLLTOpPredicateID > OpList, std::function< bool(const MachineInstr &)> TestFunc=nullptr)
bool match(const MachineInstr &MI, const MachineUniformityInfo &MUI, const MachineRegisterInfo &MRI) const
std::function< bool(const MachineInstr &)> TestFunc
RegBankLLTMapping(std::initializer_list< RegBankLLTMappingApplyID > DstOpMappingList, std::initializer_list< RegBankLLTMappingApplyID > SrcOpMappingList, LoweringMethodID LoweringMethod=DoNotLower)
SmallVector< RegBankLLTMappingApplyID, 2 > DstOpMapping
SmallVector< RegBankLLTMappingApplyID, 4 > SrcOpMapping
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39