LLVM 22.0.0git
AMDGPURegBankLegalizeRules.cpp
Go to the documentation of this file.
1//===-- AMDGPURegBankLegalizeRules.cpp ------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// Definitions of RegBankLegalize Rules for all opcodes.
10/// Implementation of container for all the Rules and search.
11/// Fast search for most common case when Rule.Predicate checks LLT and
12/// uniformity of register in operand 0.
13//
14//===----------------------------------------------------------------------===//
15
17#include "AMDGPUInstrInfo.h"
18#include "GCNSubtarget.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
23
24#define DEBUG_TYPE "amdgpu-regbanklegalize"
25
26using namespace llvm;
27using namespace AMDGPU;
28
29bool AMDGPU::isAnyPtr(LLT Ty, unsigned Width) {
30 return Ty.isPointer() && Ty.getSizeInBits() == Width;
31}
32
34 std::initializer_list<RegBankLLTMappingApplyID> DstOpMappingList,
35 std::initializer_list<RegBankLLTMappingApplyID> SrcOpMappingList,
37 : DstOpMapping(DstOpMappingList), SrcOpMapping(SrcOpMappingList),
39
41 std::initializer_list<UniformityLLTOpPredicateID> OpList,
42 std::function<bool(const MachineInstr &)> TestFunc)
44
46 const MachineUniformityInfo &MUI,
47 const MachineRegisterInfo &MRI) {
48 switch (UniID) {
49 case S1:
50 return MRI.getType(Reg) == LLT::scalar(1);
51 case S16:
52 return MRI.getType(Reg) == LLT::scalar(16);
53 case S32:
54 return MRI.getType(Reg) == LLT::scalar(32);
55 case S64:
56 return MRI.getType(Reg) == LLT::scalar(64);
57 case S128:
58 return MRI.getType(Reg) == LLT::scalar(128);
59 case P0:
60 return MRI.getType(Reg) == LLT::pointer(0, 64);
61 case P1:
62 return MRI.getType(Reg) == LLT::pointer(1, 64);
63 case P3:
64 return MRI.getType(Reg) == LLT::pointer(3, 32);
65 case P4:
66 return MRI.getType(Reg) == LLT::pointer(4, 64);
67 case P5:
68 return MRI.getType(Reg) == LLT::pointer(5, 32);
69 case Ptr32:
70 return isAnyPtr(MRI.getType(Reg), 32);
71 case Ptr64:
72 return isAnyPtr(MRI.getType(Reg), 64);
73 case Ptr128:
74 return isAnyPtr(MRI.getType(Reg), 128);
75 case V2S32:
76 return MRI.getType(Reg) == LLT::fixed_vector(2, 32);
77 case V4S32:
78 return MRI.getType(Reg) == LLT::fixed_vector(4, 32);
79 case B32:
80 return MRI.getType(Reg).getSizeInBits() == 32;
81 case B64:
82 return MRI.getType(Reg).getSizeInBits() == 64;
83 case B96:
84 return MRI.getType(Reg).getSizeInBits() == 96;
85 case B128:
86 return MRI.getType(Reg).getSizeInBits() == 128;
87 case B256:
88 return MRI.getType(Reg).getSizeInBits() == 256;
89 case B512:
90 return MRI.getType(Reg).getSizeInBits() == 512;
91 case UniS1:
92 return MRI.getType(Reg) == LLT::scalar(1) && MUI.isUniform(Reg);
93 case UniS16:
94 return MRI.getType(Reg) == LLT::scalar(16) && MUI.isUniform(Reg);
95 case UniS32:
96 return MRI.getType(Reg) == LLT::scalar(32) && MUI.isUniform(Reg);
97 case UniS64:
98 return MRI.getType(Reg) == LLT::scalar(64) && MUI.isUniform(Reg);
99 case UniS128:
100 return MRI.getType(Reg) == LLT::scalar(128) && MUI.isUniform(Reg);
101 case UniP0:
102 return MRI.getType(Reg) == LLT::pointer(0, 64) && MUI.isUniform(Reg);
103 case UniP1:
104 return MRI.getType(Reg) == LLT::pointer(1, 64) && MUI.isUniform(Reg);
105 case UniP3:
106 return MRI.getType(Reg) == LLT::pointer(3, 32) && MUI.isUniform(Reg);
107 case UniP4:
108 return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isUniform(Reg);
109 case UniP5:
110 return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isUniform(Reg);
111 case UniPtr32:
112 return isAnyPtr(MRI.getType(Reg), 32) && MUI.isUniform(Reg);
113 case UniPtr64:
114 return isAnyPtr(MRI.getType(Reg), 64) && MUI.isUniform(Reg);
115 case UniPtr128:
116 return isAnyPtr(MRI.getType(Reg), 128) && MUI.isUniform(Reg);
117 case UniV2S16:
118 return MRI.getType(Reg) == LLT::fixed_vector(2, 16) && MUI.isUniform(Reg);
119 case UniB32:
120 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isUniform(Reg);
121 case UniB64:
122 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isUniform(Reg);
123 case UniB96:
124 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isUniform(Reg);
125 case UniB128:
126 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isUniform(Reg);
127 case UniB256:
128 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isUniform(Reg);
129 case UniB512:
130 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isUniform(Reg);
131 case DivS1:
132 return MRI.getType(Reg) == LLT::scalar(1) && MUI.isDivergent(Reg);
133 case DivS16:
134 return MRI.getType(Reg) == LLT::scalar(16) && MUI.isDivergent(Reg);
135 case DivS32:
136 return MRI.getType(Reg) == LLT::scalar(32) && MUI.isDivergent(Reg);
137 case DivS64:
138 return MRI.getType(Reg) == LLT::scalar(64) && MUI.isDivergent(Reg);
139 case DivS128:
140 return MRI.getType(Reg) == LLT::scalar(128) && MUI.isDivergent(Reg);
141 case DivP0:
142 return MRI.getType(Reg) == LLT::pointer(0, 64) && MUI.isDivergent(Reg);
143 case DivP1:
144 return MRI.getType(Reg) == LLT::pointer(1, 64) && MUI.isDivergent(Reg);
145 case DivP3:
146 return MRI.getType(Reg) == LLT::pointer(3, 32) && MUI.isDivergent(Reg);
147 case DivP4:
148 return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isDivergent(Reg);
149 case DivP5:
150 return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isDivergent(Reg);
151 case DivPtr32:
152 return isAnyPtr(MRI.getType(Reg), 32) && MUI.isDivergent(Reg);
153 case DivPtr64:
154 return isAnyPtr(MRI.getType(Reg), 64) && MUI.isDivergent(Reg);
155 case DivPtr128:
156 return isAnyPtr(MRI.getType(Reg), 128) && MUI.isDivergent(Reg);
157 case DivV2S16:
158 return MRI.getType(Reg) == LLT::fixed_vector(2, 16) && MUI.isDivergent(Reg);
159 case DivB32:
160 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isDivergent(Reg);
161 case DivB64:
162 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isDivergent(Reg);
163 case DivB96:
164 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isDivergent(Reg);
165 case DivB128:
166 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isDivergent(Reg);
167 case DivB256:
168 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isDivergent(Reg);
169 case DivB512:
170 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isDivergent(Reg);
171 case _:
172 return true;
173 default:
174 llvm_unreachable("missing matchUniformityAndLLT");
175 }
176}
177
179 const MachineUniformityInfo &MUI,
180 const MachineRegisterInfo &MRI) const {
181 // Check LLT signature.
182 for (unsigned i = 0; i < OpUniformityAndTypes.size(); ++i) {
183 if (OpUniformityAndTypes[i] == _) {
184 if (MI.getOperand(i).isReg())
185 return false;
186 continue;
187 }
188
189 // Remaining IDs check registers.
190 if (!MI.getOperand(i).isReg())
191 return false;
192
193 if (!matchUniformityAndLLT(MI.getOperand(i).getReg(),
194 OpUniformityAndTypes[i], MUI, MRI))
195 return false;
196 }
197
198 // More complex check.
199 if (TestFunc)
200 return TestFunc(MI);
201
202 return true;
203}
204
206
208 : FastTypes(FastTypes) {}
209
211 if (Ty == LLT::scalar(16))
212 return S16;
213 if (Ty == LLT::scalar(32))
214 return S32;
215 if (Ty == LLT::scalar(64))
216 return S64;
217 if (Ty == LLT::fixed_vector(2, 16))
218 return V2S16;
219 if (Ty == LLT::fixed_vector(2, 32))
220 return V2S32;
221 if (Ty == LLT::fixed_vector(3, 32))
222 return V3S32;
223 if (Ty == LLT::fixed_vector(4, 32))
224 return V4S32;
225 return _;
226}
227
229 if (Ty == LLT::scalar(32) || Ty == LLT::fixed_vector(2, 16) ||
230 isAnyPtr(Ty, 32))
231 return B32;
232 if (Ty == LLT::scalar(64) || Ty == LLT::fixed_vector(2, 32) ||
233 Ty == LLT::fixed_vector(4, 16) || isAnyPtr(Ty, 64))
234 return B64;
235 if (Ty == LLT::fixed_vector(3, 32))
236 return B96;
237 if (Ty == LLT::fixed_vector(4, 32) || isAnyPtr(Ty, 128))
238 return B128;
239 return _;
240}
241
242const RegBankLLTMapping &
245 const MachineUniformityInfo &MUI) const {
246 // Search in "Fast Rules".
247 // Note: if fast rules are enabled, RegBankLLTMapping must be added in each
248 // slot that could "match fast Predicate". If not, InvalidMapping is
249 // returned which results in failure, does not search "Slow Rules".
250 if (FastTypes != NoFastRules) {
251 Register Reg = MI.getOperand(0).getReg();
252 int Slot;
253 if (FastTypes == StandardB)
254 Slot = getFastPredicateSlot(LLTToBId(MRI.getType(Reg)));
255 else
256 Slot = getFastPredicateSlot(LLTToId(MRI.getType(Reg)));
257
258 if (Slot != -1)
259 return MUI.isUniform(Reg) ? Uni[Slot] : Div[Slot];
260 }
261
262 // Slow search for more complex rules.
263 for (const RegBankLegalizeRule &Rule : Rules) {
264 if (Rule.Predicate.match(MI, MUI, MRI))
265 return Rule.OperandMapping;
266 }
267
268 LLVM_DEBUG(dbgs() << "MI: "; MI.dump(););
269 llvm_unreachable("None of the rules defined for MI's opcode matched MI");
270}
271
273 Rules.push_back(Rule);
274}
275
277 RegBankLLTMapping RuleApplyIDs) {
278 int Slot = getFastPredicateSlot(Ty);
279 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
280 Div[Slot] = RuleApplyIDs;
281}
282
284 RegBankLLTMapping RuleApplyIDs) {
285 int Slot = getFastPredicateSlot(Ty);
286 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
287 Uni[Slot] = RuleApplyIDs;
288}
289
290int SetOfRulesForOpcode::getFastPredicateSlot(
292 switch (FastTypes) {
293 case Standard: {
294 switch (Ty) {
295 case S32:
296 return 0;
297 case S16:
298 return 1;
299 case S64:
300 return 2;
301 case V2S16:
302 return 3;
303 default:
304 return -1;
305 }
306 }
307 case StandardB: {
308 switch (Ty) {
309 case B32:
310 return 0;
311 case B64:
312 return 1;
313 case B96:
314 return 2;
315 case B128:
316 return 3;
317 default:
318 return -1;
319 }
320 }
321 case Vector: {
322 switch (Ty) {
323 case S32:
324 return 0;
325 case V2S32:
326 return 1;
327 case V3S32:
328 return 2;
329 case V4S32:
330 return 3;
331 default:
332 return -1;
333 }
334 }
335 default:
336 return -1;
337 }
338}
339
340RegBankLegalizeRules::RuleSetInitializer
341RegBankLegalizeRules::addRulesForGOpcs(std::initializer_list<unsigned> OpcList,
342 FastRulesTypes FastTypes) {
343 return RuleSetInitializer(OpcList, GRulesAlias, GRules, FastTypes);
344}
345
346RegBankLegalizeRules::RuleSetInitializer
347RegBankLegalizeRules::addRulesForIOpcs(std::initializer_list<unsigned> OpcList,
348 FastRulesTypes FastTypes) {
349 return RuleSetInitializer(OpcList, IRulesAlias, IRules, FastTypes);
350}
351
354 unsigned Opc = MI.getOpcode();
355 if (Opc == AMDGPU::G_INTRINSIC || Opc == AMDGPU::G_INTRINSIC_CONVERGENT ||
356 Opc == AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS ||
357 Opc == AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS) {
358 unsigned IntrID = cast<GIntrinsic>(MI).getIntrinsicID();
359 auto IRAIt = IRulesAlias.find(IntrID);
360 if (IRAIt == IRulesAlias.end()) {
361 LLVM_DEBUG(dbgs() << "MI: "; MI.dump(););
362 llvm_unreachable("No rules defined for intrinsic opcode");
363 }
364 return IRules.at(IRAIt->second);
365 }
366
367 auto GRAIt = GRulesAlias.find(Opc);
368 if (GRAIt == GRulesAlias.end()) {
369 LLVM_DEBUG(dbgs() << "MI: "; MI.dump(););
370 llvm_unreachable("No rules defined for generic opcode");
371 }
372 return GRules.at(GRAIt->second);
373}
374
375// Syntactic sugar wrapper for predicate lambda that enables '&&', '||' and '!'.
376class Predicate {
377private:
378 struct Elt {
379 // Save formula composed of Pred, '&&', '||' and '!' as a jump table.
380 // Sink ! to Pred. For example !((A && !B) || C) -> (!A || B) && !C
381 // Sequences of && and || will be represented by jumps, for example:
382 // (A && B && ... X) or (A && B && ... X) || Y
383 // A == true jump to B
384 // A == false jump to end or Y, result is A(false) or Y
385 // (A || B || ... X) or (A || B || ... X) && Y
386 // A == true jump to end or Y, result is A(true) or Y
387 // A == false jump to B
388 // Notice that when negating expression, we simply flip Neg on each Pred
389 // and swap TJumpOffset and FJumpOffset (&& becomes ||, || becomes &&).
390 std::function<bool(const MachineInstr &)> Pred;
391 bool Neg; // Neg of Pred is calculated before jump
392 unsigned TJumpOffset;
393 unsigned FJumpOffset;
394 };
395
396 SmallVector<Elt, 8> Expression;
397
398 Predicate(SmallVectorImpl<Elt> &&Expr) { Expression.swap(Expr); };
399
400public:
401 Predicate(std::function<bool(const MachineInstr &)> Pred) {
402 Expression.push_back({Pred, false, 1, 1});
403 };
404
405 bool operator()(const MachineInstr &MI) const {
406 unsigned Idx = 0;
407 unsigned ResultIdx = Expression.size();
408 bool Result;
409 do {
410 Result = Expression[Idx].Pred(MI);
411 Result = Expression[Idx].Neg ? !Result : Result;
412 if (Result) {
413 Idx += Expression[Idx].TJumpOffset;
414 } else {
415 Idx += Expression[Idx].FJumpOffset;
416 }
417 } while ((Idx != ResultIdx));
418
419 return Result;
420 };
421
422 Predicate operator!() const {
423 SmallVector<Elt, 8> NegExpression;
424 for (const Elt &ExprElt : Expression) {
425 NegExpression.push_back({ExprElt.Pred, !ExprElt.Neg, ExprElt.FJumpOffset,
426 ExprElt.TJumpOffset});
427 }
428 return Predicate(std::move(NegExpression));
429 };
430
431 Predicate operator&&(const Predicate &RHS) const {
432 SmallVector<Elt, 8> AndExpression = Expression;
433
434 unsigned RHSSize = RHS.Expression.size();
435 unsigned ResultIdx = Expression.size();
436 for (unsigned i = 0; i < ResultIdx; ++i) {
437 // LHS results in false, whole expression results in false.
438 if (i + AndExpression[i].FJumpOffset == ResultIdx)
439 AndExpression[i].FJumpOffset += RHSSize;
440 }
441
442 AndExpression.append(RHS.Expression);
443
444 return Predicate(std::move(AndExpression));
445 }
446
447 Predicate operator||(const Predicate &RHS) const {
448 SmallVector<Elt, 8> OrExpression = Expression;
449
450 unsigned RHSSize = RHS.Expression.size();
451 unsigned ResultIdx = Expression.size();
452 for (unsigned i = 0; i < ResultIdx; ++i) {
453 // LHS results in true, whole expression results in true.
454 if (i + OrExpression[i].TJumpOffset == ResultIdx)
455 OrExpression[i].TJumpOffset += RHSSize;
456 }
457
458 OrExpression.append(RHS.Expression);
459
460 return Predicate(std::move(OrExpression));
461 }
462};
463
464// Initialize rules
467 : ST(&_ST), MRI(&_MRI) {
468
469 addRulesForGOpcs({G_ADD, G_SUB}, Standard)
470 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}})
471 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
472 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
473 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}});
474
475 addRulesForGOpcs({G_MUL}, Standard).Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}});
476
477 addRulesForGOpcs({G_XOR, G_OR, G_AND}, StandardB)
479 .Any({{DivS1}, {{Vcc}, {Vcc, Vcc}}})
480 .Any({{UniS16}, {{Sgpr16}, {Sgpr16, Sgpr16}}})
481 .Any({{DivS16}, {{Vgpr16}, {Vgpr16, Vgpr16}}})
482 .Uni(B32, {{SgprB32}, {SgprB32, SgprB32}})
483 .Div(B32, {{VgprB32}, {VgprB32, VgprB32}})
484 .Uni(B64, {{SgprB64}, {SgprB64, SgprB64}})
485 .Div(B64, {{VgprB64}, {VgprB64, VgprB64}, SplitTo32});
486
487 addRulesForGOpcs({G_SHL}, Standard)
488 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32ZExt}})
489 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
491 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
492 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
493 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
494 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
495 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
496
497 addRulesForGOpcs({G_LSHR}, Standard)
498 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
499 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
501 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
502 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
503 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
504 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
505 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
506
507 addRulesForGOpcs({G_ASHR}, Standard)
508 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32ZExt}})
509 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
511 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
512 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
513 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
514 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
515 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
516
517 addRulesForGOpcs({G_FRAME_INDEX}).Any({{UniP5, _}, {{SgprP5}, {None}}});
518
519 addRulesForGOpcs({G_UBFX, G_SBFX}, Standard)
520 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}, S_BFE})
521 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
522 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32, Sgpr32}, S_BFE})
523 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32, Vgpr32}, V_BFE});
524
525 addRulesForGOpcs({G_SMIN, G_SMAX}, Standard)
526 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32SExt}})
527 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
528 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
529 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
531 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
532
533 addRulesForGOpcs({G_UMIN, G_UMAX}, Standard)
534 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
535 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
536 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
537 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
539 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
540
541 // Note: we only write S1 rules for G_IMPLICIT_DEF, G_CONSTANT, G_FCONSTANT
542 // and G_FREEZE here, rest is trivially regbankselected earlier
543 addRulesForGOpcs({G_IMPLICIT_DEF}).Any({{UniS1}, {{Sgpr32Trunc}, {}}});
544 addRulesForGOpcs({G_CONSTANT})
545 .Any({{UniS1, _}, {{Sgpr32Trunc}, {None}, UniCstExt}});
546 addRulesForGOpcs({G_FREEZE}).Any({{DivS1}, {{Vcc}, {Vcc}}});
547
548 addRulesForGOpcs({G_ICMP})
549 .Any({{UniS1, _, S32}, {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}})
550 .Any({{DivS1, _, S32}, {{Vcc}, {None, Vgpr32, Vgpr32}}})
551 .Any({{DivS1, _, S64}, {{Vcc}, {None, Vgpr64, Vgpr64}}});
552
553 addRulesForGOpcs({G_FCMP})
554 .Any({{UniS1, _, S32}, {{UniInVcc}, {None, Vgpr32, Vgpr32}}})
555 .Any({{DivS1, _, S32}, {{Vcc}, {None, Vgpr32, Vgpr32}}});
556
557 addRulesForGOpcs({G_BRCOND})
558 .Any({{UniS1}, {{}, {Sgpr32AExtBoolInReg}}})
559 .Any({{DivS1}, {{}, {Vcc}}});
560
561 addRulesForGOpcs({G_BR}).Any({{_}, {{}, {None}}});
562
563 addRulesForGOpcs({G_SELECT}, StandardB)
564 .Any({{DivS16}, {{Vgpr16}, {Vcc, Vgpr16, Vgpr16}}})
566 .Div(B32, {{VgprB32}, {Vcc, VgprB32, VgprB32}})
570
571 addRulesForGOpcs({G_ANYEXT})
572 .Any({{UniS16, S1}, {{None}, {None}}}) // should be combined away
573 .Any({{UniS32, S1}, {{None}, {None}}}) // should be combined away
574 .Any({{UniS64, S1}, {{None}, {None}}}) // should be combined away
575 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
576 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
577 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
578 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
579 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
580 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
581 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
582
583 // In global-isel G_TRUNC in-reg is treated as no-op, inst selected into COPY.
584 // It is up to user to deal with truncated bits.
585 addRulesForGOpcs({G_TRUNC})
586 .Any({{UniS1, UniS16}, {{None}, {None}}}) // should be combined away
587 .Any({{UniS1, UniS32}, {{None}, {None}}}) // should be combined away
588 .Any({{UniS1, UniS64}, {{None}, {None}}}) // should be combined away
589 .Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}})
590 .Any({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}})
591 .Any({{UniS32, S64}, {{Sgpr32}, {Sgpr64}}})
592 .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}})
593 .Any({{UniV2S16, V2S32}, {{SgprV2S16}, {SgprV2S32}}})
594 .Any({{DivV2S16, V2S32}, {{VgprV2S16}, {VgprV2S32}}})
595 // This is non-trivial. VgprToVccCopy is done using compare instruction.
596 .Any({{DivS1, DivS16}, {{Vcc}, {Vgpr16}, VgprToVccCopy}})
597 .Any({{DivS1, DivS32}, {{Vcc}, {Vgpr32}, VgprToVccCopy}})
598 .Any({{DivS1, DivS64}, {{Vcc}, {Vgpr64}, VgprToVccCopy}});
599
600 addRulesForGOpcs({G_ZEXT})
604 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
605 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
606 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
607 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
608 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
609 // not extending S16 to S32 is questionable.
610 .Any({{UniS64, S16}, {{Sgpr64}, {Sgpr32ZExt}, Ext32To64}})
611 .Any({{DivS64, S16}, {{Vgpr64}, {Vgpr32ZExt}, Ext32To64}})
612 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
613 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
614
615 addRulesForGOpcs({G_SEXT})
619 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
620 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
621 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
622 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
623 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
624 // not extending S16 to S32 is questionable.
625 .Any({{UniS64, S16}, {{Sgpr64}, {Sgpr32SExt}, Ext32To64}})
626 .Any({{DivS64, S16}, {{Vgpr64}, {Vgpr32SExt}, Ext32To64}})
627 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
628 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
629
630 addRulesForGOpcs({G_SEXT_INREG})
631 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}})
632 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
633 .Any({{UniS64, S64}, {{Sgpr64}, {Sgpr64}}})
635
636 addRulesForGOpcs({G_ASSERT_ZEXT, G_ASSERT_SEXT}, Standard)
637 .Uni(S32, {{Sgpr32}, {Sgpr32, Imm}})
638 .Div(S32, {{Vgpr32}, {Vgpr32, Imm}})
639 .Uni(S64, {{Sgpr64}, {Sgpr64, Imm}})
640 .Div(S64, {{Vgpr64}, {Vgpr64, Imm}});
641
642 bool hasSMRDx3 = ST->hasScalarDwordx3Loads();
643 bool hasSMRDSmall = ST->hasScalarSubwordLoads();
644 bool usesTrue16 = ST->useRealTrue16Insts();
645
646 Predicate isAlign16([](const MachineInstr &MI) -> bool {
647 return (*MI.memoperands_begin())->getAlign() >= Align(16);
648 });
649
650 Predicate isAlign4([](const MachineInstr &MI) -> bool {
651 return (*MI.memoperands_begin())->getAlign() >= Align(4);
652 });
653
654 Predicate isAtomicMMO([](const MachineInstr &MI) -> bool {
655 return (*MI.memoperands_begin())->isAtomic();
656 });
657
658 Predicate isUniMMO([](const MachineInstr &MI) -> bool {
659 return AMDGPU::isUniformMMO(*MI.memoperands_begin());
660 });
661
662 Predicate isConst([](const MachineInstr &MI) -> bool {
663 // Address space in MMO be different then address space on pointer.
664 const MachineMemOperand *MMO = *MI.memoperands_begin();
665 const unsigned AS = MMO->getAddrSpace();
666 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
668 });
669
670 Predicate isVolatileMMO([](const MachineInstr &MI) -> bool {
671 return (*MI.memoperands_begin())->isVolatile();
672 });
673
674 Predicate isInvMMO([](const MachineInstr &MI) -> bool {
675 return (*MI.memoperands_begin())->isInvariant();
676 });
677
678 Predicate isNoClobberMMO([](const MachineInstr &MI) -> bool {
679 return (*MI.memoperands_begin())->getFlags() & MONoClobber;
680 });
681
682 Predicate isNaturalAligned([](const MachineInstr &MI) -> bool {
683 const MachineMemOperand *MMO = *MI.memoperands_begin();
684 return MMO->getAlign() >= Align(MMO->getSize().getValue());
685 });
686
687 Predicate is8Or16BitMMO([](const MachineInstr &MI) -> bool {
688 const MachineMemOperand *MMO = *MI.memoperands_begin();
689 const unsigned MemSize = 8 * MMO->getSize().getValue();
690 return MemSize == 16 || MemSize == 8;
691 });
692
693 Predicate is32BitMMO([](const MachineInstr &MI) -> bool {
694 const MachineMemOperand *MMO = *MI.memoperands_begin();
695 return 8 * MMO->getSize().getValue() == 32;
696 });
697
698 auto isUL = !isAtomicMMO && isUniMMO && (isConst || !isVolatileMMO) &&
699 (isConst || isInvMMO || isNoClobberMMO);
700
701 // clang-format off
702 // TODO: S32Dst, 16-bit any-extending load should not appear on True16 targets
703 addRulesForGOpcs({G_LOAD})
704 // flat, addrspace(0), never uniform - flat_load
705 .Any({{DivS16, P0}, {{Vgpr16}, {VgprP0}}}, usesTrue16)
706 .Any({{DivB32, P0}, {{VgprB32}, {VgprP0}}}) // 32-bit load, 8-bit and 16-bit any-extending load
707 .Any({{DivB64, P0}, {{VgprB64}, {VgprP0}}})
708 .Any({{DivB96, P0}, {{VgprB96}, {VgprP0}}})
709 .Any({{DivB128, P0}, {{VgprB128}, {VgprP0}}})
710
711 // global, addrspace(1)
712 // divergent - global_load
713 .Any({{DivS16, P1}, {{Vgpr16}, {VgprP1}}}, usesTrue16)
714 .Any({{DivB32, P1}, {{VgprB32}, {VgprP1}}}) //32-bit load, 8-bit and 16-bit any-extending load
715 .Any({{DivB64, P1}, {{VgprB64}, {VgprP1}}})
716 .Any({{DivB96, P1}, {{VgprB96}, {VgprP1}}})
717 .Any({{DivB128, P1}, {{VgprB128}, {VgprP1}}})
718 .Any({{DivB256, P1}, {{VgprB256}, {VgprP1}, SplitLoad}})
719 .Any({{DivB512, P1}, {{VgprB512}, {VgprP1}, SplitLoad}})
720
721 // uniform - s_load
722 .Any({{{UniS16, P1}, isNaturalAligned && isUL}, {{Sgpr32Trunc}, {SgprP1}}}, usesTrue16 && hasSMRDSmall) // s16 load
723 .Any({{{UniS16, P1}, isAlign4 && isUL}, {{Sgpr32Trunc}, {SgprP1}, WidenMMOToS32}}, usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
724 .Any({{{UniB32, P1}, isNaturalAligned && isUL}, {{SgprB32}, {SgprP1}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
725 // TODO: SplitLoad when !isNaturalAligned && isUL and target hasSMRDSmall
726 .Any({{{UniB32, P1}, is8Or16BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP1}, WidenMMOToS32}}, !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
727 .Any({{{UniB32, P1}, is32BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP1}}}) //32-bit load
728 .Any({{{UniB64, P1}, isAlign4 && isUL}, {{SgprB64}, {SgprP1}}})
729 .Any({{{UniB96, P1}, isAlign16 && isUL}, {{SgprB96}, {SgprP1}, WidenLoad}}, !hasSMRDx3)
730 .Any({{{UniB96, P1}, isAlign4 && !isAlign16 && isUL}, {{SgprB96}, {SgprP1}, SplitLoad}}, !hasSMRDx3)
731 .Any({{{UniB96, P1}, isAlign4 && isUL}, {{SgprB96}, {SgprP1}}}, hasSMRDx3)
732 .Any({{{UniB128, P1}, isAlign4 && isUL}, {{SgprB128}, {SgprP1}}})
733 .Any({{{UniB256, P1}, isAlign4 && isUL}, {{SgprB256}, {SgprP1}}})
734 .Any({{{UniB512, P1}, isAlign4 && isUL}, {{SgprB512}, {SgprP1}}})
735
736 // Uniform via global or buffer load, for example volatile or non-aligned
737 // uniform load. Not using standard {{UniInVgprTy}, {VgprP1}} since it is
738 // selected as global_load, use SgprP1 for pointer instead to match
739 // patterns without flat-for-global, default for GFX7 and older.
740 // -> +flat-for-global + {{UniInVgprTy}, {SgprP1}} - global_load
741 // -> -flat-for-global + {{UniInVgprTy}, {SgprP1}} - buffer_load
742 .Any({{{UniS16, P1}, !isNaturalAligned || !isUL}, {{UniInVgprS16}, {SgprP1}}}, usesTrue16 && hasSMRDSmall) // s16 load
743 .Any({{{UniS16, P1}, !isAlign4 || !isUL}, {{UniInVgprS16}, {SgprP1}}}, usesTrue16 && !hasSMRDSmall) // s16 load
744 .Any({{{UniB32, P1}, !isNaturalAligned || !isUL}, {{UniInVgprB32}, {SgprP1}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
745 .Any({{{UniB32, P1}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP1}}}, !hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
746 .Any({{{UniB64, P1}, !isAlign4 || !isUL}, {{UniInVgprB64}, {SgprP1}}})
747 .Any({{{UniB96, P1}, !isAlign4 || !isUL}, {{UniInVgprB96}, {SgprP1}}})
748 .Any({{{UniB128, P1}, !isAlign4 || !isUL}, {{UniInVgprB128}, {SgprP1}}})
749 .Any({{{UniB256, P1}, !isAlign4 || !isUL}, {{UniInVgprB256}, {SgprP1}, SplitLoad}})
750 .Any({{{UniB512, P1}, !isAlign4 || !isUL}, {{UniInVgprB512}, {SgprP1}, SplitLoad}})
751
752 // local, addrspace(3) - ds_load
753 .Any({{DivS16, P3}, {{Vgpr16}, {VgprP3}}}, usesTrue16)
754 .Any({{DivB32, P3}, {{VgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
755 .Any({{DivB64, P3}, {{VgprB64}, {VgprP3}}})
756 .Any({{DivB96, P3}, {{VgprB96}, {VgprP3}}})
757 .Any({{DivB128, P3}, {{VgprB128}, {VgprP3}}})
758
759 .Any({{UniS16, P3}, {{UniInVgprS16}, {SgprP3}}}, usesTrue16) // 16-bit load
760 .Any({{UniB32, P3}, {{UniInVgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
761 .Any({{UniB64, P3}, {{UniInVgprB64}, {VgprP3}}})
762 .Any({{UniB96, P3}, {{UniInVgprB96}, {VgprP3}}})
763 .Any({{UniB128, P3}, {{UniInVgprB128}, {VgprP3}}})
764
765 // constant, addrspace(4)
766 // divergent - global_load
767 .Any({{DivS16, P4}, {{Vgpr16}, {VgprP4}}}, usesTrue16)
768 .Any({{DivB32, P4}, {{VgprB32}, {VgprP4}}}) //32-bit load, 8-bit and 16-bit any-extending load
769 .Any({{DivB64, P4}, {{VgprB64}, {VgprP4}}})
770 .Any({{DivB96, P4}, {{VgprB96}, {VgprP4}}})
771 .Any({{DivB128, P4}, {{VgprB128}, {VgprP4}}})
772 .Any({{DivB256, P4}, {{VgprB256}, {VgprP4}, SplitLoad}})
773 .Any({{DivB512, P4}, {{VgprB512}, {VgprP4}, SplitLoad}})
774
775 // uniform - s_load
776 .Any({{{UniS16, P4}, isNaturalAligned && isUL}, {{Sgpr32Trunc}, {SgprP4}}}, usesTrue16 && hasSMRDSmall) // s16 load
777 .Any({{{UniS16, P4}, isAlign4 && isUL}, {{Sgpr32Trunc}, {SgprP4}, WidenMMOToS32}}, usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
778 .Any({{{UniB32, P4}, isNaturalAligned && isUL}, {{SgprB32}, {SgprP4}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
779 .Any({{{UniB32, P4}, is8Or16BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP4}, WidenMMOToS32}}, !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
780 .Any({{{UniB32, P4}, is32BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP4}}}) //32-bit load
781 .Any({{{UniB64, P4}, isAlign4 && isUL}, {{SgprB64}, {SgprP4}}})
782 .Any({{{UniB96, P4}, isAlign16 && isUL}, {{SgprB96}, {SgprP4}, WidenLoad}}, !hasSMRDx3)
783 .Any({{{UniB96, P4}, isAlign4 && !isAlign16 && isUL}, {{SgprB96}, {SgprP4}, SplitLoad}}, !hasSMRDx3)
784 .Any({{{UniB96, P4}, isAlign4 && isUL}, {{SgprB96}, {SgprP4}}}, hasSMRDx3)
785 .Any({{{UniB128, P4}, isAlign4 && isUL}, {{SgprB128}, {SgprP4}}})
786 .Any({{{UniB256, P4}, isAlign4 && isUL}, {{SgprB256}, {SgprP4}}})
787 .Any({{{UniB512, P4}, isAlign4 && isUL}, {{SgprB512}, {SgprP4}}})
788
789 // uniform in vgpr - global_load or buffer_load
790 .Any({{{UniS16, P4}, !isNaturalAligned || !isUL}, {{UniInVgprS16}, {SgprP4}}}, usesTrue16 && hasSMRDSmall) // s16 load
791 .Any({{{UniS16, P4}, !isAlign4 || !isUL}, {{UniInVgprS16}, {SgprP4}}}, usesTrue16 && !hasSMRDSmall) // s16 load
792 .Any({{{UniB32, P4}, !isNaturalAligned || !isUL}, {{UniInVgprB32}, {SgprP4}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
793 .Any({{{UniB32, P4}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP4}}}, !hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
794 .Any({{{UniB64, P4}, !isAlign4 || !isUL}, {{UniInVgprB64}, {SgprP4}}})
795 .Any({{{UniB96, P4}, !isAlign4 || !isUL}, {{UniInVgprB96}, {SgprP4}}})
796 .Any({{{UniB128, P4}, !isAlign4 || !isUL}, {{UniInVgprB128}, {SgprP4}}})
797 .Any({{{UniB256, P4}, !isAlign4 || !isUL}, {{UniInVgprB256}, {SgprP4}, SplitLoad}})
798 .Any({{{UniB512, P4}, !isAlign4 || !isUL}, {{UniInVgprB512}, {SgprP4}, SplitLoad}})
799
800 // private, addrspace(5), never uniform - scratch_load
801 .Any({{DivS16, P5}, {{Vgpr16}, {VgprP5}}}, usesTrue16)
802 .Any({{DivB32, P5}, {{VgprB32}, {VgprP5}}}) // 32-bit load, 8-bit and 16-bit any-extending load
803 .Any({{DivB64, P5}, {{VgprB64}, {VgprP5}}})
804 .Any({{DivB96, P5}, {{VgprB96}, {VgprP5}}})
805 .Any({{DivB128, P5}, {{VgprB128}, {VgprP5}}})
806
807 .Any({{DivS32, Ptr128}, {{Vgpr32}, {VgprPtr128}}});
808
809
810 addRulesForGOpcs({G_ZEXTLOAD, G_SEXTLOAD}) // i8 and i16 zeroextending loads
811 .Any({{DivS32, P0}, {{Vgpr32}, {VgprP0}}})
812
813 .Any({{DivS32, P1}, {{Vgpr32}, {VgprP1}}})
814 .Any({{{UniS32, P1}, isAlign4 && isUL}, {{Sgpr32}, {SgprP1}, WidenMMOToS32}}, !hasSMRDSmall)
815 .Any({{{UniS32, P1}, isNaturalAligned && isUL}, {{Sgpr32}, {SgprP1}}}, hasSMRDSmall)
816 .Any({{{UniS32, P1}, !isAlign4 || !isUL}, {{UniInVgprS32}, {SgprP1}}}, !hasSMRDSmall)
817 .Any({{{UniS32, P1}, !isNaturalAligned || !isUL}, {{UniInVgprS32}, {SgprP1}}}, hasSMRDSmall)
818
819 .Any({{DivS32, P3}, {{Vgpr32}, {VgprP3}}})
820 .Any({{UniS32, P3}, {{UniInVgprS32}, {VgprP3}}})
821
822 .Any({{DivS32, P4}, {{Vgpr32}, {VgprP4}}})
823 .Any({{{UniS32, P4}, isAlign4 && isUL}, {{Sgpr32}, {SgprP4}, WidenMMOToS32}}, !hasSMRDSmall)
824 .Any({{{UniS32, P4}, isNaturalAligned && isUL}, {{Sgpr32}, {SgprP4}}}, hasSMRDSmall)
825 .Any({{{UniS32, P4}, !isAlign4 || !isUL}, {{UniInVgprS32}, {SgprP4}}}, !hasSMRDSmall)
826 .Any({{{UniS32, P4}, !isNaturalAligned || !isUL}, {{UniInVgprS32}, {SgprP4}}}, hasSMRDSmall)
827
828 .Any({{DivS32, P5}, {{Vgpr32}, {VgprP5}}});
829
830 addRulesForGOpcs({G_STORE})
831 // addrspace(0)
832 .Any({{S16, P0}, {{}, {Vgpr16, VgprP0}}}, usesTrue16) // 16-bit store
833 .Any({{B32, P0}, {{}, {VgprB32, VgprP0}}}) // 32-bit store, 8-bit and 16-bit truncating store
834 .Any({{B64, P0}, {{}, {VgprB64, VgprP0}}})
835 .Any({{B96, P0}, {{}, {VgprB96, VgprP0}}})
836 .Any({{B128, P0}, {{}, {VgprB128, VgprP0}}})
837
838 // addrspace(1), there are no stores to addrspace(4)
839 // For targets:
840 // - with "+flat-for-global" - global_store
841 // - without(-flat-for-global) - buffer_store addr64
842 .Any({{S16, DivP1}, {{}, {Vgpr16, VgprP1}}}, usesTrue16) // 16-bit store
843 .Any({{B32, DivP1}, {{}, {VgprB32, VgprP1}}}) // 32-bit store, 8-bit and 16-bit truncating store
844 .Any({{B64, DivP1}, {{}, {VgprB64, VgprP1}}})
845 .Any({{B96, DivP1}, {{}, {VgprB96, VgprP1}}})
846 .Any({{B128, DivP1}, {{}, {VgprB128, VgprP1}}})
847
848 // For UniP1, use sgpr ptr to match flat-for-global patterns. Targets:
849 // - with "+flat-for-global" - global_store for both sgpr and vgpr ptr
850 // - without(-flat-for-global) - need sgpr ptr to select buffer_store
851 .Any({{S16, UniP1}, {{}, {Vgpr16, SgprP1}}}, usesTrue16) // 16-bit store
852 .Any({{B32, UniP1}, {{}, {VgprB32, SgprP1}}}) // 32-bit store, 8-bit and 16-bit truncating store
853 .Any({{B64, UniP1}, {{}, {VgprB64, SgprP1}}})
854 .Any({{B96, UniP1}, {{}, {VgprB96, SgprP1}}})
855 .Any({{B128, UniP1}, {{}, {VgprB128, SgprP1}}})
856
857 // addrspace(3) and addrspace(5)
858 .Any({{S16, Ptr32}, {{}, {Vgpr16, VgprPtr32}}}, usesTrue16) // 16-bit store
859 .Any({{B32, Ptr32}, {{}, {VgprB32, VgprPtr32}}}) // 32-bit store, 8-bit and 16-bit truncating store
860 .Any({{B64, Ptr32}, {{}, {VgprB64, VgprPtr32}}})
861 .Any({{B96, Ptr32}, {{}, {VgprB96, VgprPtr32}}})
862 .Any({{B128, Ptr32}, {{}, {VgprB128, VgprPtr32}}});
863 // clang-format on
864
865 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD, G_AMDGPU_BUFFER_LOAD_FORMAT,
866 G_AMDGPU_TBUFFER_LOAD_FORMAT},
867 StandardB)
876
877 addRulesForGOpcs({G_AMDGPU_BUFFER_STORE})
878 .Any({{S32}, {{}, {Vgpr32, SgprV4S32, Vgpr32, Vgpr32, Sgpr32}}});
879
880 addRulesForGOpcs({G_PTR_ADD})
881 .Any({{UniPtr32}, {{SgprPtr32}, {SgprPtr32, Sgpr32}}})
882 .Any({{DivPtr32}, {{VgprPtr32}, {VgprPtr32, Vgpr32}}})
883 .Any({{UniPtr64}, {{SgprPtr64}, {SgprPtr64, Sgpr64}}})
884 .Any({{DivPtr64}, {{VgprPtr64}, {VgprPtr64, Vgpr64}}});
885
886 addRulesForGOpcs({G_INTTOPTR})
887 .Any({{UniPtr32}, {{SgprPtr32}, {Sgpr32}}})
888 .Any({{DivPtr32}, {{VgprPtr32}, {Vgpr32}}})
889 .Any({{UniPtr64}, {{SgprPtr64}, {Sgpr64}}})
890 .Any({{DivPtr64}, {{VgprPtr64}, {Vgpr64}}})
891 .Any({{UniPtr128}, {{SgprPtr128}, {Sgpr128}}})
892 .Any({{DivPtr128}, {{VgprPtr128}, {Vgpr128}}});
893
894 addRulesForGOpcs({G_PTRTOINT})
895 .Any({{UniS32}, {{Sgpr32}, {SgprPtr32}}})
896 .Any({{DivS32}, {{Vgpr32}, {VgprPtr32}}})
897 .Any({{UniS64}, {{Sgpr64}, {SgprPtr64}}})
898 .Any({{DivS64}, {{Vgpr64}, {VgprPtr64}}})
899 .Any({{UniS128}, {{Sgpr128}, {SgprPtr128}}})
900 .Any({{DivS128}, {{Vgpr128}, {VgprPtr128}}});
901
902 addRulesForGOpcs({G_ABS}, Standard).Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt}});
903
904 addRulesForGOpcs({G_READSTEADYCOUNTER}, Standard).Uni(S64, {{Sgpr64}, {}});
905
906 bool hasSALUFloat = ST->hasSALUFloatInsts();
907
908 addRulesForGOpcs({G_FADD}, Standard)
909 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUFloat)
910 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUFloat)
911 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}});
912
913 addRulesForGOpcs({G_FPTOUI})
914 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
915 .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat);
916
917 addRulesForGOpcs({G_UITOFP})
918 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
919 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
920 .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat);
921
922 using namespace Intrinsic;
923
924 addRulesForIOpcs({amdgcn_s_getpc}).Any({{UniS64, _}, {{Sgpr64}, {None}}});
925
926 // This is "intrinsic lane mask" it was set to i32/i64 in llvm-ir.
927 addRulesForIOpcs({amdgcn_end_cf}).Any({{_, S32}, {{}, {None, Sgpr32}}});
928
929 addRulesForIOpcs({amdgcn_if_break}, Standard)
930 .Uni(S32, {{Sgpr32}, {IntrId, Vcc, Sgpr32}});
931
932 addRulesForIOpcs({amdgcn_mbcnt_lo, amdgcn_mbcnt_hi}, Standard)
933 .Div(S32, {{}, {Vgpr32, None, Vgpr32, Vgpr32}});
934
935 addRulesForIOpcs({amdgcn_readfirstlane})
936 .Any({{UniS32, _, DivS32}, {{}, {Sgpr32, None, Vgpr32}}})
937 // this should not exist in the first place, it is from call lowering
938 // readfirstlaning just in case register is not in sgpr.
939 .Any({{UniS32, _, UniS32}, {{}, {Sgpr32, None, Vgpr32}}});
940
941} // end initialize rules
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
constexpr LLT S16
constexpr LLT S1
constexpr LLT V2S16
constexpr LLT S32
constexpr LLT V4S32
constexpr LLT V3S32
constexpr LLT S64
constexpr LLT V2S32
constexpr LLT S128
UniformityLLTOpPredicateID LLTToBId(LLT Ty)
bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID, const MachineUniformityInfo &MUI, const MachineRegisterInfo &MRI)
UniformityLLTOpPredicateID LLTToId(LLT Ty)
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
Register Reg
Machine IR instance of the generic uniformity analysis.
#define LLVM_DEBUG(...)
Definition Debug.h:114
bool operator()(const MachineInstr &MI) const
Predicate operator||(const Predicate &RHS) const
Predicate operator&&(const Predicate &RHS) const
Predicate(std::function< bool(const MachineInstr &)> Pred)
Predicate operator!() const
RegBankLegalizeRules(const GCNSubtarget &ST, MachineRegisterInfo &MRI)
const SetOfRulesForOpcode & getRulesForOpc(MachineInstr &MI) const
const RegBankLLTMapping & findMappingForMI(const MachineInstr &MI, const MachineRegisterInfo &MRI, const MachineUniformityInfo &MUI) const
void addFastRuleDivergent(UniformityLLTOpPredicateID Ty, RegBankLLTMapping RuleApplyIDs)
void addFastRuleUniform(UniformityLLTOpPredicateID Ty, RegBankLLTMapping RuleApplyIDs)
bool isDivergent(ConstValueRefT V) const
Whether V is divergent at its definition.
bool isUniform(ConstValueRefT V) const
Whether V is uniform/non-divergent.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
TypeSize getValue() const
Representation of each machine instruction.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
unsigned getAddrSpace() const
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
Definition Register.h:19
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void swap(SmallVectorImpl &RHS)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
bool isAnyPtr(LLT Ty, unsigned Width)
bool isUniformMMO(const MachineMemOperand *MMO)
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
This is an optimization pass for GlobalISel generic memory operations.
GenericUniformityInfo< MachineSSAContext > MachineUniformityInfo
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
Definition SIInstrInfo.h:44
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
SmallVector< UniformityLLTOpPredicateID, 4 > OpUniformityAndTypes
PredicateMapping(std::initializer_list< UniformityLLTOpPredicateID > OpList, std::function< bool(const MachineInstr &)> TestFunc=nullptr)
bool match(const MachineInstr &MI, const MachineUniformityInfo &MUI, const MachineRegisterInfo &MRI) const
std::function< bool(const MachineInstr &)> TestFunc
RegBankLLTMapping(std::initializer_list< RegBankLLTMappingApplyID > DstOpMappingList, std::initializer_list< RegBankLLTMappingApplyID > SrcOpMappingList, LoweringMethodID LoweringMethod=DoNotLower)
SmallVector< RegBankLLTMappingApplyID, 2 > DstOpMapping
SmallVector< RegBankLLTMappingApplyID, 4 > SrcOpMapping
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39