LLVM 22.0.0git
AMDGPURegBankLegalizeRules.cpp
Go to the documentation of this file.
1//===-- AMDGPURegBankLegalizeRules.cpp ------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// Definitions of RegBankLegalize Rules for all opcodes.
10/// Implementation of container for all the Rules and search.
11/// Fast search for most common case when Rule.Predicate checks LLT and
12/// uniformity of register in operand 0.
13//
14//===----------------------------------------------------------------------===//
15
17#include "AMDGPUInstrInfo.h"
18#include "GCNSubtarget.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
23
24#define DEBUG_TYPE "amdgpu-regbanklegalize"
25
26using namespace llvm;
27using namespace AMDGPU;
28
29bool AMDGPU::isAnyPtr(LLT Ty, unsigned Width) {
30 return Ty.isPointer() && Ty.getSizeInBits() == Width;
31}
32
34 std::initializer_list<RegBankLLTMappingApplyID> DstOpMappingList,
35 std::initializer_list<RegBankLLTMappingApplyID> SrcOpMappingList,
37 : DstOpMapping(DstOpMappingList), SrcOpMapping(SrcOpMappingList),
39
41 std::initializer_list<UniformityLLTOpPredicateID> OpList,
42 std::function<bool(const MachineInstr &)> TestFunc)
44
46 const MachineUniformityInfo &MUI,
47 const MachineRegisterInfo &MRI) {
48 switch (UniID) {
49 case S1:
50 return MRI.getType(Reg) == LLT::scalar(1);
51 case S16:
52 return MRI.getType(Reg) == LLT::scalar(16);
53 case S32:
54 return MRI.getType(Reg) == LLT::scalar(32);
55 case S64:
56 return MRI.getType(Reg) == LLT::scalar(64);
57 case S128:
58 return MRI.getType(Reg) == LLT::scalar(128);
59 case P0:
60 return MRI.getType(Reg) == LLT::pointer(0, 64);
61 case P1:
62 return MRI.getType(Reg) == LLT::pointer(1, 64);
63 case P3:
64 return MRI.getType(Reg) == LLT::pointer(3, 32);
65 case P4:
66 return MRI.getType(Reg) == LLT::pointer(4, 64);
67 case P5:
68 return MRI.getType(Reg) == LLT::pointer(5, 32);
69 case Ptr32:
70 return isAnyPtr(MRI.getType(Reg), 32);
71 case Ptr64:
72 return isAnyPtr(MRI.getType(Reg), 64);
73 case Ptr128:
74 return isAnyPtr(MRI.getType(Reg), 128);
75 case V2S32:
76 return MRI.getType(Reg) == LLT::fixed_vector(2, 32);
77 case V4S32:
78 return MRI.getType(Reg) == LLT::fixed_vector(4, 32);
79 case B32:
80 return MRI.getType(Reg).getSizeInBits() == 32;
81 case B64:
82 return MRI.getType(Reg).getSizeInBits() == 64;
83 case B96:
84 return MRI.getType(Reg).getSizeInBits() == 96;
85 case B128:
86 return MRI.getType(Reg).getSizeInBits() == 128;
87 case B256:
88 return MRI.getType(Reg).getSizeInBits() == 256;
89 case B512:
90 return MRI.getType(Reg).getSizeInBits() == 512;
91 case UniS1:
92 return MRI.getType(Reg) == LLT::scalar(1) && MUI.isUniform(Reg);
93 case UniS16:
94 return MRI.getType(Reg) == LLT::scalar(16) && MUI.isUniform(Reg);
95 case UniS32:
96 return MRI.getType(Reg) == LLT::scalar(32) && MUI.isUniform(Reg);
97 case UniS64:
98 return MRI.getType(Reg) == LLT::scalar(64) && MUI.isUniform(Reg);
99 case UniS128:
100 return MRI.getType(Reg) == LLT::scalar(128) && MUI.isUniform(Reg);
101 case UniP0:
102 return MRI.getType(Reg) == LLT::pointer(0, 64) && MUI.isUniform(Reg);
103 case UniP1:
104 return MRI.getType(Reg) == LLT::pointer(1, 64) && MUI.isUniform(Reg);
105 case UniP3:
106 return MRI.getType(Reg) == LLT::pointer(3, 32) && MUI.isUniform(Reg);
107 case UniP4:
108 return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isUniform(Reg);
109 case UniP5:
110 return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isUniform(Reg);
111 case UniPtr32:
112 return isAnyPtr(MRI.getType(Reg), 32) && MUI.isUniform(Reg);
113 case UniPtr64:
114 return isAnyPtr(MRI.getType(Reg), 64) && MUI.isUniform(Reg);
115 case UniPtr128:
116 return isAnyPtr(MRI.getType(Reg), 128) && MUI.isUniform(Reg);
117 case UniV2S16:
118 return MRI.getType(Reg) == LLT::fixed_vector(2, 16) && MUI.isUniform(Reg);
119 case UniB32:
120 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isUniform(Reg);
121 case UniB64:
122 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isUniform(Reg);
123 case UniB96:
124 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isUniform(Reg);
125 case UniB128:
126 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isUniform(Reg);
127 case UniB256:
128 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isUniform(Reg);
129 case UniB512:
130 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isUniform(Reg);
131 case DivS1:
132 return MRI.getType(Reg) == LLT::scalar(1) && MUI.isDivergent(Reg);
133 case DivS16:
134 return MRI.getType(Reg) == LLT::scalar(16) && MUI.isDivergent(Reg);
135 case DivS32:
136 return MRI.getType(Reg) == LLT::scalar(32) && MUI.isDivergent(Reg);
137 case DivS64:
138 return MRI.getType(Reg) == LLT::scalar(64) && MUI.isDivergent(Reg);
139 case DivS128:
140 return MRI.getType(Reg) == LLT::scalar(128) && MUI.isDivergent(Reg);
141 case DivP0:
142 return MRI.getType(Reg) == LLT::pointer(0, 64) && MUI.isDivergent(Reg);
143 case DivP1:
144 return MRI.getType(Reg) == LLT::pointer(1, 64) && MUI.isDivergent(Reg);
145 case DivP3:
146 return MRI.getType(Reg) == LLT::pointer(3, 32) && MUI.isDivergent(Reg);
147 case DivP4:
148 return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isDivergent(Reg);
149 case DivP5:
150 return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isDivergent(Reg);
151 case DivPtr32:
152 return isAnyPtr(MRI.getType(Reg), 32) && MUI.isDivergent(Reg);
153 case DivPtr64:
154 return isAnyPtr(MRI.getType(Reg), 64) && MUI.isDivergent(Reg);
155 case DivPtr128:
156 return isAnyPtr(MRI.getType(Reg), 128) && MUI.isDivergent(Reg);
157 case DivV2S16:
158 return MRI.getType(Reg) == LLT::fixed_vector(2, 16) && MUI.isDivergent(Reg);
159 case DivB32:
160 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isDivergent(Reg);
161 case DivB64:
162 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isDivergent(Reg);
163 case DivB96:
164 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isDivergent(Reg);
165 case DivB128:
166 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isDivergent(Reg);
167 case DivB256:
168 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isDivergent(Reg);
169 case DivB512:
170 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isDivergent(Reg);
171 case _:
172 return true;
173 default:
174 llvm_unreachable("missing matchUniformityAndLLT");
175 }
176}
177
179 const MachineUniformityInfo &MUI,
180 const MachineRegisterInfo &MRI) const {
181 // Check LLT signature.
182 for (unsigned i = 0; i < OpUniformityAndTypes.size(); ++i) {
183 if (OpUniformityAndTypes[i] == _) {
184 if (MI.getOperand(i).isReg())
185 return false;
186 continue;
187 }
188
189 // Remaining IDs check registers.
190 if (!MI.getOperand(i).isReg())
191 return false;
192
193 if (!matchUniformityAndLLT(MI.getOperand(i).getReg(),
194 OpUniformityAndTypes[i], MUI, MRI))
195 return false;
196 }
197
198 // More complex check.
199 if (TestFunc)
200 return TestFunc(MI);
201
202 return true;
203}
204
206
208 : FastTypes(FastTypes) {}
209
211 if (Ty == LLT::scalar(16))
212 return S16;
213 if (Ty == LLT::scalar(32))
214 return S32;
215 if (Ty == LLT::scalar(64))
216 return S64;
217 if (Ty == LLT::fixed_vector(2, 16))
218 return V2S16;
219 if (Ty == LLT::fixed_vector(2, 32))
220 return V2S32;
221 if (Ty == LLT::fixed_vector(3, 32))
222 return V3S32;
223 if (Ty == LLT::fixed_vector(4, 32))
224 return V4S32;
225 return _;
226}
227
229 if (Ty == LLT::scalar(32) || Ty == LLT::fixed_vector(2, 16) ||
230 isAnyPtr(Ty, 32))
231 return B32;
232 if (Ty == LLT::scalar(64) || Ty == LLT::fixed_vector(2, 32) ||
233 Ty == LLT::fixed_vector(4, 16) || isAnyPtr(Ty, 64))
234 return B64;
235 if (Ty == LLT::fixed_vector(3, 32))
236 return B96;
237 if (Ty == LLT::fixed_vector(4, 32) || isAnyPtr(Ty, 128))
238 return B128;
239 return _;
240}
241
242const RegBankLLTMapping &
245 const MachineUniformityInfo &MUI) const {
246 // Search in "Fast Rules".
247 // Note: if fast rules are enabled, RegBankLLTMapping must be added in each
248 // slot that could "match fast Predicate". If not, InvalidMapping is
249 // returned which results in failure, does not search "Slow Rules".
250 if (FastTypes != NoFastRules) {
251 Register Reg = MI.getOperand(0).getReg();
252 int Slot;
253 if (FastTypes == StandardB)
254 Slot = getFastPredicateSlot(LLTToBId(MRI.getType(Reg)));
255 else
256 Slot = getFastPredicateSlot(LLTToId(MRI.getType(Reg)));
257
258 if (Slot != -1)
259 return MUI.isUniform(Reg) ? Uni[Slot] : Div[Slot];
260 }
261
262 // Slow search for more complex rules.
263 for (const RegBankLegalizeRule &Rule : Rules) {
264 if (Rule.Predicate.match(MI, MUI, MRI))
265 return Rule.OperandMapping;
266 }
267
268 LLVM_DEBUG(dbgs() << "MI: "; MI.dump(););
269 llvm_unreachable("None of the rules defined for MI's opcode matched MI");
270}
271
273 Rules.push_back(Rule);
274}
275
277 RegBankLLTMapping RuleApplyIDs) {
278 int Slot = getFastPredicateSlot(Ty);
279 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
280 Div[Slot] = RuleApplyIDs;
281}
282
284 RegBankLLTMapping RuleApplyIDs) {
285 int Slot = getFastPredicateSlot(Ty);
286 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
287 Uni[Slot] = RuleApplyIDs;
288}
289
290int SetOfRulesForOpcode::getFastPredicateSlot(
292 switch (FastTypes) {
293 case Standard: {
294 switch (Ty) {
295 case S32:
296 return 0;
297 case S16:
298 return 1;
299 case S64:
300 return 2;
301 case V2S16:
302 return 3;
303 default:
304 return -1;
305 }
306 }
307 case StandardB: {
308 switch (Ty) {
309 case B32:
310 return 0;
311 case B64:
312 return 1;
313 case B96:
314 return 2;
315 case B128:
316 return 3;
317 default:
318 return -1;
319 }
320 }
321 case Vector: {
322 switch (Ty) {
323 case S32:
324 return 0;
325 case V2S32:
326 return 1;
327 case V3S32:
328 return 2;
329 case V4S32:
330 return 3;
331 default:
332 return -1;
333 }
334 }
335 default:
336 return -1;
337 }
338}
339
340RegBankLegalizeRules::RuleSetInitializer
341RegBankLegalizeRules::addRulesForGOpcs(std::initializer_list<unsigned> OpcList,
342 FastRulesTypes FastTypes) {
343 return RuleSetInitializer(OpcList, GRulesAlias, GRules, FastTypes);
344}
345
346RegBankLegalizeRules::RuleSetInitializer
347RegBankLegalizeRules::addRulesForIOpcs(std::initializer_list<unsigned> OpcList,
348 FastRulesTypes FastTypes) {
349 return RuleSetInitializer(OpcList, IRulesAlias, IRules, FastTypes);
350}
351
354 unsigned Opc = MI.getOpcode();
355 if (Opc == AMDGPU::G_INTRINSIC || Opc == AMDGPU::G_INTRINSIC_CONVERGENT ||
356 Opc == AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS ||
357 Opc == AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS) {
358 unsigned IntrID = cast<GIntrinsic>(MI).getIntrinsicID();
359 auto IRAIt = IRulesAlias.find(IntrID);
360 if (IRAIt == IRulesAlias.end()) {
361 LLVM_DEBUG(dbgs() << "MI: "; MI.dump(););
362 llvm_unreachable("No rules defined for intrinsic opcode");
363 }
364 return IRules.at(IRAIt->second);
365 }
366
367 auto GRAIt = GRulesAlias.find(Opc);
368 if (GRAIt == GRulesAlias.end()) {
369 LLVM_DEBUG(dbgs() << "MI: "; MI.dump(););
370 llvm_unreachable("No rules defined for generic opcode");
371 }
372 return GRules.at(GRAIt->second);
373}
374
375// Syntactic sugar wrapper for predicate lambda that enables '&&', '||' and '!'.
376class Predicate {
377private:
378 struct Elt {
379 // Save formula composed of Pred, '&&', '||' and '!' as a jump table.
380 // Sink ! to Pred. For example !((A && !B) || C) -> (!A || B) && !C
381 // Sequences of && and || will be represented by jumps, for example:
382 // (A && B && ... X) or (A && B && ... X) || Y
383 // A == true jump to B
384 // A == false jump to end or Y, result is A(false) or Y
385 // (A || B || ... X) or (A || B || ... X) && Y
386 // A == true jump to end or Y, result is A(true) or Y
387 // A == false jump to B
388 // Notice that when negating expression, we simply flip Neg on each Pred
389 // and swap TJumpOffset and FJumpOffset (&& becomes ||, || becomes &&).
390 std::function<bool(const MachineInstr &)> Pred;
391 bool Neg; // Neg of Pred is calculated before jump
392 unsigned TJumpOffset;
393 unsigned FJumpOffset;
394 };
395
396 SmallVector<Elt, 8> Expression;
397
398 Predicate(SmallVectorImpl<Elt> &&Expr) { Expression.swap(Expr); };
399
400public:
401 Predicate(std::function<bool(const MachineInstr &)> Pred) {
402 Expression.push_back({Pred, false, 1, 1});
403 };
404
405 bool operator()(const MachineInstr &MI) const {
406 unsigned Idx = 0;
407 unsigned ResultIdx = Expression.size();
408 bool Result;
409 do {
410 Result = Expression[Idx].Pred(MI);
411 Result = Expression[Idx].Neg ? !Result : Result;
412 if (Result) {
413 Idx += Expression[Idx].TJumpOffset;
414 } else {
415 Idx += Expression[Idx].FJumpOffset;
416 }
417 } while ((Idx != ResultIdx));
418
419 return Result;
420 };
421
422 Predicate operator!() const {
423 SmallVector<Elt, 8> NegExpression;
424 for (const Elt &ExprElt : Expression) {
425 NegExpression.push_back({ExprElt.Pred, !ExprElt.Neg, ExprElt.FJumpOffset,
426 ExprElt.TJumpOffset});
427 }
428 return Predicate(std::move(NegExpression));
429 };
430
431 Predicate operator&&(const Predicate &RHS) const {
432 SmallVector<Elt, 8> AndExpression = Expression;
433
434 unsigned RHSSize = RHS.Expression.size();
435 unsigned ResultIdx = Expression.size();
436 for (unsigned i = 0; i < ResultIdx; ++i) {
437 // LHS results in false, whole expression results in false.
438 if (i + AndExpression[i].FJumpOffset == ResultIdx)
439 AndExpression[i].FJumpOffset += RHSSize;
440 }
441
442 AndExpression.append(RHS.Expression);
443
444 return Predicate(std::move(AndExpression));
445 }
446
447 Predicate operator||(const Predicate &RHS) const {
448 SmallVector<Elt, 8> OrExpression = Expression;
449
450 unsigned RHSSize = RHS.Expression.size();
451 unsigned ResultIdx = Expression.size();
452 for (unsigned i = 0; i < ResultIdx; ++i) {
453 // LHS results in true, whole expression results in true.
454 if (i + OrExpression[i].TJumpOffset == ResultIdx)
455 OrExpression[i].TJumpOffset += RHSSize;
456 }
457
458 OrExpression.append(RHS.Expression);
459
460 return Predicate(std::move(OrExpression));
461 }
462};
463
464// Initialize rules
467 : ST(&_ST), MRI(&_MRI) {
468
469 addRulesForGOpcs({G_ADD, G_SUB}, Standard)
470 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}})
471 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
472 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
473 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
475 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
476 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr64}})
477 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}});
478
479 addRulesForGOpcs({G_UADDO, G_USUBO}, Standard)
480 .Uni(S32, {{Sgpr32, Sgpr32Trunc}, {Sgpr32, Sgpr32}})
481 .Div(S32, {{Vgpr32, Vcc}, {Vgpr32, Vgpr32}});
482
483 addRulesForGOpcs({G_UADDE, G_USUBE}, Standard)
485 .Div(S32, {{Vgpr32, Vcc}, {Vgpr32, Vgpr32, Vcc}});
486
487 addRulesForGOpcs({G_MUL}, Standard).Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}});
488
489 addRulesForGOpcs({G_XOR, G_OR, G_AND}, StandardB)
491 .Any({{DivS1}, {{Vcc}, {Vcc, Vcc}}})
492 .Any({{UniS16}, {{Sgpr16}, {Sgpr16, Sgpr16}}})
493 .Any({{DivS16}, {{Vgpr16}, {Vgpr16, Vgpr16}}})
494 .Uni(B32, {{SgprB32}, {SgprB32, SgprB32}})
495 .Div(B32, {{VgprB32}, {VgprB32, VgprB32}})
496 .Uni(B64, {{SgprB64}, {SgprB64, SgprB64}})
497 .Div(B64, {{VgprB64}, {VgprB64, VgprB64}, SplitTo32});
498
499 addRulesForGOpcs({G_SHL}, Standard)
500 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32ZExt}})
501 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
503 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
504 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
505 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
506 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
507 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
508
509 addRulesForGOpcs({G_LSHR}, Standard)
510 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
511 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
513 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
514 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
515 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
516 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
517 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
518
519 addRulesForGOpcs({G_ASHR}, Standard)
520 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32ZExt}})
521 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
523 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
524 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
525 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
526 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
527 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
528
529 addRulesForGOpcs({G_FRAME_INDEX}).Any({{UniP5, _}, {{SgprP5}, {None}}});
530
531 addRulesForGOpcs({G_UBFX, G_SBFX}, Standard)
532 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}, S_BFE})
533 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
534 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32, Sgpr32}, S_BFE})
535 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32, Vgpr32}, V_BFE});
536
537 addRulesForGOpcs({G_SMIN, G_SMAX}, Standard)
538 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32SExt}})
539 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
540 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
541 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
543 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
544
545 addRulesForGOpcs({G_UMIN, G_UMAX}, Standard)
546 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
547 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
548 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
549 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
551 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
552
553 // Note: we only write S1 rules for G_IMPLICIT_DEF, G_CONSTANT, G_FCONSTANT
554 // and G_FREEZE here, rest is trivially regbankselected earlier
555 addRulesForGOpcs({G_IMPLICIT_DEF}).Any({{UniS1}, {{Sgpr32Trunc}, {}}});
556 addRulesForGOpcs({G_CONSTANT})
557 .Any({{UniS1, _}, {{Sgpr32Trunc}, {None}, UniCstExt}});
558 addRulesForGOpcs({G_FREEZE}).Any({{DivS1}, {{Vcc}, {Vcc}}});
559
560 addRulesForGOpcs({G_ICMP})
561 .Any({{UniS1, _, S32}, {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}})
562 .Any({{DivS1, _, S32}, {{Vcc}, {None, Vgpr32, Vgpr32}}})
563 .Any({{DivS1, _, S64}, {{Vcc}, {None, Vgpr64, Vgpr64}}});
564
565 addRulesForGOpcs({G_FCMP})
566 .Any({{UniS1, _, S32}, {{UniInVcc}, {None, Vgpr32, Vgpr32}}})
567 .Any({{DivS1, _, S32}, {{Vcc}, {None, Vgpr32, Vgpr32}}});
568
569 addRulesForGOpcs({G_BRCOND})
570 .Any({{UniS1}, {{}, {Sgpr32AExtBoolInReg}}})
571 .Any({{DivS1}, {{}, {Vcc}}});
572
573 addRulesForGOpcs({G_BR}).Any({{_}, {{}, {None}}});
574
575 addRulesForGOpcs({G_SELECT}, StandardB)
576 .Any({{DivS16}, {{Vgpr16}, {Vcc, Vgpr16, Vgpr16}}})
578 .Div(B32, {{VgprB32}, {Vcc, VgprB32, VgprB32}})
582
583 addRulesForGOpcs({G_ANYEXT})
584 .Any({{UniS16, S1}, {{None}, {None}}}) // should be combined away
585 .Any({{UniS32, S1}, {{None}, {None}}}) // should be combined away
586 .Any({{UniS64, S1}, {{None}, {None}}}) // should be combined away
587 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
588 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
589 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
590 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
591 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
592 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
593 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
594
595 // In global-isel G_TRUNC in-reg is treated as no-op, inst selected into COPY.
596 // It is up to user to deal with truncated bits.
597 addRulesForGOpcs({G_TRUNC})
598 .Any({{UniS1, UniS16}, {{None}, {None}}}) // should be combined away
599 .Any({{UniS1, UniS32}, {{None}, {None}}}) // should be combined away
600 .Any({{UniS1, UniS64}, {{None}, {None}}}) // should be combined away
601 .Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}})
602 .Any({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}})
603 .Any({{UniS32, S64}, {{Sgpr32}, {Sgpr64}}})
604 .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}})
605 .Any({{UniV2S16, V2S32}, {{SgprV2S16}, {SgprV2S32}}})
606 .Any({{DivV2S16, V2S32}, {{VgprV2S16}, {VgprV2S32}}})
607 // This is non-trivial. VgprToVccCopy is done using compare instruction.
608 .Any({{DivS1, DivS16}, {{Vcc}, {Vgpr16}, VgprToVccCopy}})
609 .Any({{DivS1, DivS32}, {{Vcc}, {Vgpr32}, VgprToVccCopy}})
610 .Any({{DivS1, DivS64}, {{Vcc}, {Vgpr64}, VgprToVccCopy}});
611
612 addRulesForGOpcs({G_ZEXT})
616 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
617 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
618 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
619 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
620 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
621 // not extending S16 to S32 is questionable.
622 .Any({{UniS64, S16}, {{Sgpr64}, {Sgpr32ZExt}, Ext32To64}})
623 .Any({{DivS64, S16}, {{Vgpr64}, {Vgpr32ZExt}, Ext32To64}})
624 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
625 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
626
627 addRulesForGOpcs({G_SEXT})
631 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
632 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
633 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
634 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
635 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
636 // not extending S16 to S32 is questionable.
637 .Any({{UniS64, S16}, {{Sgpr64}, {Sgpr32SExt}, Ext32To64}})
638 .Any({{DivS64, S16}, {{Vgpr64}, {Vgpr32SExt}, Ext32To64}})
639 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
640 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
641
642 addRulesForGOpcs({G_SEXT_INREG})
643 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}})
644 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
645 .Any({{UniS64, S64}, {{Sgpr64}, {Sgpr64}}})
647
648 addRulesForGOpcs({G_ASSERT_ZEXT, G_ASSERT_SEXT}, Standard)
649 .Uni(S32, {{Sgpr32}, {Sgpr32, Imm}})
650 .Div(S32, {{Vgpr32}, {Vgpr32, Imm}})
651 .Uni(S64, {{Sgpr64}, {Sgpr64, Imm}})
652 .Div(S64, {{Vgpr64}, {Vgpr64, Imm}});
653
654 bool hasSMRDx3 = ST->hasScalarDwordx3Loads();
655 bool hasSMRDSmall = ST->hasScalarSubwordLoads();
656 bool usesTrue16 = ST->useRealTrue16Insts();
657
658 Predicate isAlign16([](const MachineInstr &MI) -> bool {
659 return (*MI.memoperands_begin())->getAlign() >= Align(16);
660 });
661
662 Predicate isAlign4([](const MachineInstr &MI) -> bool {
663 return (*MI.memoperands_begin())->getAlign() >= Align(4);
664 });
665
666 Predicate isAtomicMMO([](const MachineInstr &MI) -> bool {
667 return (*MI.memoperands_begin())->isAtomic();
668 });
669
670 Predicate isUniMMO([](const MachineInstr &MI) -> bool {
671 return AMDGPU::isUniformMMO(*MI.memoperands_begin());
672 });
673
674 Predicate isConst([](const MachineInstr &MI) -> bool {
675 // Address space in MMO be different then address space on pointer.
676 const MachineMemOperand *MMO = *MI.memoperands_begin();
677 const unsigned AS = MMO->getAddrSpace();
678 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
680 });
681
682 Predicate isVolatileMMO([](const MachineInstr &MI) -> bool {
683 return (*MI.memoperands_begin())->isVolatile();
684 });
685
686 Predicate isInvMMO([](const MachineInstr &MI) -> bool {
687 return (*MI.memoperands_begin())->isInvariant();
688 });
689
690 Predicate isNoClobberMMO([](const MachineInstr &MI) -> bool {
691 return (*MI.memoperands_begin())->getFlags() & MONoClobber;
692 });
693
694 Predicate isNaturalAligned([](const MachineInstr &MI) -> bool {
695 const MachineMemOperand *MMO = *MI.memoperands_begin();
696 return MMO->getAlign() >= Align(MMO->getSize().getValue());
697 });
698
699 Predicate is8Or16BitMMO([](const MachineInstr &MI) -> bool {
700 const MachineMemOperand *MMO = *MI.memoperands_begin();
701 const unsigned MemSize = 8 * MMO->getSize().getValue();
702 return MemSize == 16 || MemSize == 8;
703 });
704
705 Predicate is32BitMMO([](const MachineInstr &MI) -> bool {
706 const MachineMemOperand *MMO = *MI.memoperands_begin();
707 return 8 * MMO->getSize().getValue() == 32;
708 });
709
710 auto isUL = !isAtomicMMO && isUniMMO && (isConst || !isVolatileMMO) &&
711 (isConst || isInvMMO || isNoClobberMMO);
712
713 // clang-format off
714 // TODO: S32Dst, 16-bit any-extending load should not appear on True16 targets
715 addRulesForGOpcs({G_LOAD})
716 // flat, addrspace(0), never uniform - flat_load
717 .Any({{DivS16, P0}, {{Vgpr16}, {VgprP0}}}, usesTrue16)
718 .Any({{DivB32, P0}, {{VgprB32}, {VgprP0}}}) // 32-bit load, 8-bit and 16-bit any-extending load
719 .Any({{DivB64, P0}, {{VgprB64}, {VgprP0}}})
720 .Any({{DivB96, P0}, {{VgprB96}, {VgprP0}}})
721 .Any({{DivB128, P0}, {{VgprB128}, {VgprP0}}})
722
723 // global, addrspace(1)
724 // divergent - global_load
725 .Any({{DivS16, P1}, {{Vgpr16}, {VgprP1}}}, usesTrue16)
726 .Any({{DivB32, P1}, {{VgprB32}, {VgprP1}}}) //32-bit load, 8-bit and 16-bit any-extending load
727 .Any({{DivB64, P1}, {{VgprB64}, {VgprP1}}})
728 .Any({{DivB96, P1}, {{VgprB96}, {VgprP1}}})
729 .Any({{DivB128, P1}, {{VgprB128}, {VgprP1}}})
730 .Any({{DivB256, P1}, {{VgprB256}, {VgprP1}, SplitLoad}})
731 .Any({{DivB512, P1}, {{VgprB512}, {VgprP1}, SplitLoad}})
732
733 // uniform - s_load
734 .Any({{{UniS16, P1}, isNaturalAligned && isUL}, {{Sgpr32Trunc}, {SgprP1}}}, usesTrue16 && hasSMRDSmall) // s16 load
735 .Any({{{UniS16, P1}, isAlign4 && isUL}, {{Sgpr32Trunc}, {SgprP1}, WidenMMOToS32}}, usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
736 .Any({{{UniB32, P1}, isNaturalAligned && isUL}, {{SgprB32}, {SgprP1}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
737 // TODO: SplitLoad when !isNaturalAligned && isUL and target hasSMRDSmall
738 .Any({{{UniB32, P1}, is8Or16BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP1}, WidenMMOToS32}}, !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
739 .Any({{{UniB32, P1}, is32BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP1}}}) //32-bit load
740 .Any({{{UniB64, P1}, isAlign4 && isUL}, {{SgprB64}, {SgprP1}}})
741 .Any({{{UniB96, P1}, isAlign16 && isUL}, {{SgprB96}, {SgprP1}, WidenLoad}}, !hasSMRDx3)
742 .Any({{{UniB96, P1}, isAlign4 && !isAlign16 && isUL}, {{SgprB96}, {SgprP1}, SplitLoad}}, !hasSMRDx3)
743 .Any({{{UniB96, P1}, isAlign4 && isUL}, {{SgprB96}, {SgprP1}}}, hasSMRDx3)
744 .Any({{{UniB128, P1}, isAlign4 && isUL}, {{SgprB128}, {SgprP1}}})
745 .Any({{{UniB256, P1}, isAlign4 && isUL}, {{SgprB256}, {SgprP1}}})
746 .Any({{{UniB512, P1}, isAlign4 && isUL}, {{SgprB512}, {SgprP1}}})
747
748 // Uniform via global or buffer load, for example volatile or non-aligned
749 // uniform load. Not using standard {{UniInVgprTy}, {VgprP1}} since it is
750 // selected as global_load, use SgprP1 for pointer instead to match
751 // patterns without flat-for-global, default for GFX7 and older.
752 // -> +flat-for-global + {{UniInVgprTy}, {SgprP1}} - global_load
753 // -> -flat-for-global + {{UniInVgprTy}, {SgprP1}} - buffer_load
754 .Any({{{UniS16, P1}, !isNaturalAligned || !isUL}, {{UniInVgprS16}, {SgprP1}}}, usesTrue16 && hasSMRDSmall) // s16 load
755 .Any({{{UniS16, P1}, !isAlign4 || !isUL}, {{UniInVgprS16}, {SgprP1}}}, usesTrue16 && !hasSMRDSmall) // s16 load
756 .Any({{{UniB32, P1}, !isNaturalAligned || !isUL}, {{UniInVgprB32}, {SgprP1}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
757 .Any({{{UniB32, P1}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP1}}}, !hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
758 .Any({{{UniB64, P1}, !isAlign4 || !isUL}, {{UniInVgprB64}, {SgprP1}}})
759 .Any({{{UniB96, P1}, !isAlign4 || !isUL}, {{UniInVgprB96}, {SgprP1}}})
760 .Any({{{UniB128, P1}, !isAlign4 || !isUL}, {{UniInVgprB128}, {SgprP1}}})
761 .Any({{{UniB256, P1}, !isAlign4 || !isUL}, {{UniInVgprB256}, {SgprP1}, SplitLoad}})
762 .Any({{{UniB512, P1}, !isAlign4 || !isUL}, {{UniInVgprB512}, {SgprP1}, SplitLoad}})
763
764 // local, addrspace(3) - ds_load
765 .Any({{DivS16, P3}, {{Vgpr16}, {VgprP3}}}, usesTrue16)
766 .Any({{DivB32, P3}, {{VgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
767 .Any({{DivB64, P3}, {{VgprB64}, {VgprP3}}})
768 .Any({{DivB96, P3}, {{VgprB96}, {VgprP3}}})
769 .Any({{DivB128, P3}, {{VgprB128}, {VgprP3}}})
770
771 .Any({{UniS16, P3}, {{UniInVgprS16}, {SgprP3}}}, usesTrue16) // 16-bit load
772 .Any({{UniB32, P3}, {{UniInVgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
773 .Any({{UniB64, P3}, {{UniInVgprB64}, {VgprP3}}})
774 .Any({{UniB96, P3}, {{UniInVgprB96}, {VgprP3}}})
775 .Any({{UniB128, P3}, {{UniInVgprB128}, {VgprP3}}})
776
777 // constant, addrspace(4)
778 // divergent - global_load
779 .Any({{DivS16, P4}, {{Vgpr16}, {VgprP4}}}, usesTrue16)
780 .Any({{DivB32, P4}, {{VgprB32}, {VgprP4}}}) //32-bit load, 8-bit and 16-bit any-extending load
781 .Any({{DivB64, P4}, {{VgprB64}, {VgprP4}}})
782 .Any({{DivB96, P4}, {{VgprB96}, {VgprP4}}})
783 .Any({{DivB128, P4}, {{VgprB128}, {VgprP4}}})
784 .Any({{DivB256, P4}, {{VgprB256}, {VgprP4}, SplitLoad}})
785 .Any({{DivB512, P4}, {{VgprB512}, {VgprP4}, SplitLoad}})
786
787 // uniform - s_load
788 .Any({{{UniS16, P4}, isNaturalAligned && isUL}, {{Sgpr32Trunc}, {SgprP4}}}, usesTrue16 && hasSMRDSmall) // s16 load
789 .Any({{{UniS16, P4}, isAlign4 && isUL}, {{Sgpr32Trunc}, {SgprP4}, WidenMMOToS32}}, usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
790 .Any({{{UniB32, P4}, isNaturalAligned && isUL}, {{SgprB32}, {SgprP4}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
791 .Any({{{UniB32, P4}, is8Or16BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP4}, WidenMMOToS32}}, !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
792 .Any({{{UniB32, P4}, is32BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP4}}}) //32-bit load
793 .Any({{{UniB64, P4}, isAlign4 && isUL}, {{SgprB64}, {SgprP4}}})
794 .Any({{{UniB96, P4}, isAlign16 && isUL}, {{SgprB96}, {SgprP4}, WidenLoad}}, !hasSMRDx3)
795 .Any({{{UniB96, P4}, isAlign4 && !isAlign16 && isUL}, {{SgprB96}, {SgprP4}, SplitLoad}}, !hasSMRDx3)
796 .Any({{{UniB96, P4}, isAlign4 && isUL}, {{SgprB96}, {SgprP4}}}, hasSMRDx3)
797 .Any({{{UniB128, P4}, isAlign4 && isUL}, {{SgprB128}, {SgprP4}}})
798 .Any({{{UniB256, P4}, isAlign4 && isUL}, {{SgprB256}, {SgprP4}}})
799 .Any({{{UniB512, P4}, isAlign4 && isUL}, {{SgprB512}, {SgprP4}}})
800
801 // uniform in vgpr - global_load or buffer_load
802 .Any({{{UniS16, P4}, !isNaturalAligned || !isUL}, {{UniInVgprS16}, {SgprP4}}}, usesTrue16 && hasSMRDSmall) // s16 load
803 .Any({{{UniS16, P4}, !isAlign4 || !isUL}, {{UniInVgprS16}, {SgprP4}}}, usesTrue16 && !hasSMRDSmall) // s16 load
804 .Any({{{UniB32, P4}, !isNaturalAligned || !isUL}, {{UniInVgprB32}, {SgprP4}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
805 .Any({{{UniB32, P4}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP4}}}, !hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
806 .Any({{{UniB64, P4}, !isAlign4 || !isUL}, {{UniInVgprB64}, {SgprP4}}})
807 .Any({{{UniB96, P4}, !isAlign4 || !isUL}, {{UniInVgprB96}, {SgprP4}}})
808 .Any({{{UniB128, P4}, !isAlign4 || !isUL}, {{UniInVgprB128}, {SgprP4}}})
809 .Any({{{UniB256, P4}, !isAlign4 || !isUL}, {{UniInVgprB256}, {SgprP4}, SplitLoad}})
810 .Any({{{UniB512, P4}, !isAlign4 || !isUL}, {{UniInVgprB512}, {SgprP4}, SplitLoad}})
811
812 // private, addrspace(5), never uniform - scratch_load
813 .Any({{DivS16, P5}, {{Vgpr16}, {VgprP5}}}, usesTrue16)
814 .Any({{DivB32, P5}, {{VgprB32}, {VgprP5}}}) // 32-bit load, 8-bit and 16-bit any-extending load
815 .Any({{DivB64, P5}, {{VgprB64}, {VgprP5}}})
816 .Any({{DivB96, P5}, {{VgprB96}, {VgprP5}}})
817 .Any({{DivB128, P5}, {{VgprB128}, {VgprP5}}})
818
819 .Any({{DivS32, Ptr128}, {{Vgpr32}, {VgprPtr128}}});
820
821
822 addRulesForGOpcs({G_ZEXTLOAD, G_SEXTLOAD}) // i8 and i16 zeroextending loads
823 .Any({{DivS32, P0}, {{Vgpr32}, {VgprP0}}})
824
825 .Any({{DivS32, P1}, {{Vgpr32}, {VgprP1}}})
826 .Any({{{UniS32, P1}, isAlign4 && isUL}, {{Sgpr32}, {SgprP1}, WidenMMOToS32}}, !hasSMRDSmall)
827 .Any({{{UniS32, P1}, isNaturalAligned && isUL}, {{Sgpr32}, {SgprP1}}}, hasSMRDSmall)
828 .Any({{{UniS32, P1}, !isAlign4 || !isUL}, {{UniInVgprS32}, {SgprP1}}}, !hasSMRDSmall)
829 .Any({{{UniS32, P1}, !isNaturalAligned || !isUL}, {{UniInVgprS32}, {SgprP1}}}, hasSMRDSmall)
830
831 .Any({{DivS32, P3}, {{Vgpr32}, {VgprP3}}})
832 .Any({{UniS32, P3}, {{UniInVgprS32}, {VgprP3}}})
833
834 .Any({{DivS32, P4}, {{Vgpr32}, {VgprP4}}})
835 .Any({{{UniS32, P4}, isAlign4 && isUL}, {{Sgpr32}, {SgprP4}, WidenMMOToS32}}, !hasSMRDSmall)
836 .Any({{{UniS32, P4}, isNaturalAligned && isUL}, {{Sgpr32}, {SgprP4}}}, hasSMRDSmall)
837 .Any({{{UniS32, P4}, !isAlign4 || !isUL}, {{UniInVgprS32}, {SgprP4}}}, !hasSMRDSmall)
838 .Any({{{UniS32, P4}, !isNaturalAligned || !isUL}, {{UniInVgprS32}, {SgprP4}}}, hasSMRDSmall)
839
840 .Any({{DivS32, P5}, {{Vgpr32}, {VgprP5}}});
841
842 addRulesForGOpcs({G_STORE})
843 // addrspace(0)
844 .Any({{S16, P0}, {{}, {Vgpr16, VgprP0}}}, usesTrue16) // 16-bit store
845 .Any({{B32, P0}, {{}, {VgprB32, VgprP0}}}) // 32-bit store, 8-bit and 16-bit truncating store
846 .Any({{B64, P0}, {{}, {VgprB64, VgprP0}}})
847 .Any({{B96, P0}, {{}, {VgprB96, VgprP0}}})
848 .Any({{B128, P0}, {{}, {VgprB128, VgprP0}}})
849
850 // addrspace(1), there are no stores to addrspace(4)
851 // For targets:
852 // - with "+flat-for-global" - global_store
853 // - without(-flat-for-global) - buffer_store addr64
854 .Any({{S16, DivP1}, {{}, {Vgpr16, VgprP1}}}, usesTrue16) // 16-bit store
855 .Any({{B32, DivP1}, {{}, {VgprB32, VgprP1}}}) // 32-bit store, 8-bit and 16-bit truncating store
856 .Any({{B64, DivP1}, {{}, {VgprB64, VgprP1}}})
857 .Any({{B96, DivP1}, {{}, {VgprB96, VgprP1}}})
858 .Any({{B128, DivP1}, {{}, {VgprB128, VgprP1}}})
859
860 // For UniP1, use sgpr ptr to match flat-for-global patterns. Targets:
861 // - with "+flat-for-global" - global_store for both sgpr and vgpr ptr
862 // - without(-flat-for-global) - need sgpr ptr to select buffer_store
863 .Any({{S16, UniP1}, {{}, {Vgpr16, SgprP1}}}, usesTrue16) // 16-bit store
864 .Any({{B32, UniP1}, {{}, {VgprB32, SgprP1}}}) // 32-bit store, 8-bit and 16-bit truncating store
865 .Any({{B64, UniP1}, {{}, {VgprB64, SgprP1}}})
866 .Any({{B96, UniP1}, {{}, {VgprB96, SgprP1}}})
867 .Any({{B128, UniP1}, {{}, {VgprB128, SgprP1}}})
868
869 // addrspace(3) and addrspace(5)
870 .Any({{S16, Ptr32}, {{}, {Vgpr16, VgprPtr32}}}, usesTrue16) // 16-bit store
871 .Any({{B32, Ptr32}, {{}, {VgprB32, VgprPtr32}}}) // 32-bit store, 8-bit and 16-bit truncating store
872 .Any({{B64, Ptr32}, {{}, {VgprB64, VgprPtr32}}})
873 .Any({{B96, Ptr32}, {{}, {VgprB96, VgprPtr32}}})
874 .Any({{B128, Ptr32}, {{}, {VgprB128, VgprPtr32}}});
875 // clang-format on
876
877 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD, G_AMDGPU_BUFFER_LOAD_FORMAT,
878 G_AMDGPU_TBUFFER_LOAD_FORMAT},
879 StandardB)
888
889 addRulesForGOpcs({G_AMDGPU_BUFFER_STORE})
890 .Any({{S32}, {{}, {Vgpr32, SgprV4S32, Vgpr32, Vgpr32, Sgpr32}}});
891
892 addRulesForGOpcs({G_PTR_ADD})
893 .Any({{UniPtr32}, {{SgprPtr32}, {SgprPtr32, Sgpr32}}})
894 .Any({{DivPtr32}, {{VgprPtr32}, {VgprPtr32, Vgpr32}}})
895 .Any({{UniPtr64}, {{SgprPtr64}, {SgprPtr64, Sgpr64}}})
896 .Any({{DivPtr64}, {{VgprPtr64}, {VgprPtr64, Vgpr64}}});
897
898 addRulesForGOpcs({G_INTTOPTR})
899 .Any({{UniPtr32}, {{SgprPtr32}, {Sgpr32}}})
900 .Any({{DivPtr32}, {{VgprPtr32}, {Vgpr32}}})
901 .Any({{UniPtr64}, {{SgprPtr64}, {Sgpr64}}})
902 .Any({{DivPtr64}, {{VgprPtr64}, {Vgpr64}}})
903 .Any({{UniPtr128}, {{SgprPtr128}, {Sgpr128}}})
904 .Any({{DivPtr128}, {{VgprPtr128}, {Vgpr128}}});
905
906 addRulesForGOpcs({G_PTRTOINT})
907 .Any({{UniS32}, {{Sgpr32}, {SgprPtr32}}})
908 .Any({{DivS32}, {{Vgpr32}, {VgprPtr32}}})
909 .Any({{UniS64}, {{Sgpr64}, {SgprPtr64}}})
910 .Any({{DivS64}, {{Vgpr64}, {VgprPtr64}}})
911 .Any({{UniS128}, {{Sgpr128}, {SgprPtr128}}})
912 .Any({{DivS128}, {{Vgpr128}, {VgprPtr128}}});
913
914 addRulesForGOpcs({G_ABS}, Standard).Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt}});
915
916 addRulesForGOpcs({G_FENCE}).Any({{{}}, {{}, {}}});
917
918 addRulesForGOpcs({G_READSTEADYCOUNTER, G_READCYCLECOUNTER}, Standard)
919 .Uni(S64, {{Sgpr64}, {}});
920
921 bool hasSALUFloat = ST->hasSALUFloatInsts();
922
923 addRulesForGOpcs({G_FADD}, Standard)
924 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUFloat)
925 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUFloat)
926 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
927 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUFloat)
928 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUFloat)
929 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
930 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64}})
931 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}})
932 .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}}, !hasSALUFloat)
934 hasSALUFloat)
935 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
938
939 addRulesForGOpcs({G_FPTOUI})
940 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
941 .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat);
942
943 addRulesForGOpcs({G_UITOFP})
944 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
945 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
946 .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat);
947
948 using namespace Intrinsic;
949
950 addRulesForIOpcs({amdgcn_s_getpc}).Any({{UniS64, _}, {{Sgpr64}, {None}}});
951
952 // This is "intrinsic lane mask" it was set to i32/i64 in llvm-ir.
953 addRulesForIOpcs({amdgcn_end_cf}).Any({{_, S32}, {{}, {None, Sgpr32}}});
954
955 addRulesForIOpcs({amdgcn_if_break}, Standard)
956 .Uni(S32, {{Sgpr32}, {IntrId, Vcc, Sgpr32}});
957
958 addRulesForIOpcs({amdgcn_mbcnt_lo, amdgcn_mbcnt_hi}, Standard)
959 .Div(S32, {{}, {Vgpr32, None, Vgpr32, Vgpr32}});
960
961 addRulesForIOpcs({amdgcn_readfirstlane})
962 .Any({{UniS32, _, DivS32}, {{}, {Sgpr32, None, Vgpr32}}})
963 // this should not exist in the first place, it is from call lowering
964 // readfirstlaning just in case register is not in sgpr.
965 .Any({{UniS32, _, UniS32}, {{}, {Sgpr32, None, Vgpr32}}});
966
967} // end initialize rules
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
constexpr LLT S16
constexpr LLT S1
constexpr LLT V2S16
constexpr LLT S32
constexpr LLT V4S32
constexpr LLT V3S32
constexpr LLT S64
constexpr LLT V2S32
constexpr LLT S128
UniformityLLTOpPredicateID LLTToBId(LLT Ty)
bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID, const MachineUniformityInfo &MUI, const MachineRegisterInfo &MRI)
UniformityLLTOpPredicateID LLTToId(LLT Ty)
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
Register Reg
Machine IR instance of the generic uniformity analysis.
#define LLVM_DEBUG(...)
Definition Debug.h:114
bool operator()(const MachineInstr &MI) const
Predicate operator||(const Predicate &RHS) const
Predicate operator&&(const Predicate &RHS) const
Predicate(std::function< bool(const MachineInstr &)> Pred)
Predicate operator!() const
RegBankLegalizeRules(const GCNSubtarget &ST, MachineRegisterInfo &MRI)
const SetOfRulesForOpcode & getRulesForOpc(MachineInstr &MI) const
const RegBankLLTMapping & findMappingForMI(const MachineInstr &MI, const MachineRegisterInfo &MRI, const MachineUniformityInfo &MUI) const
void addFastRuleDivergent(UniformityLLTOpPredicateID Ty, RegBankLLTMapping RuleApplyIDs)
void addFastRuleUniform(UniformityLLTOpPredicateID Ty, RegBankLLTMapping RuleApplyIDs)
bool isDivergent(ConstValueRefT V) const
Whether V is divergent at its definition.
bool isUniform(ConstValueRefT V) const
Whether V is uniform/non-divergent.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
TypeSize getValue() const
Representation of each machine instruction.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
unsigned getAddrSpace() const
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
Definition Register.h:19
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void swap(SmallVectorImpl &RHS)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
bool isAnyPtr(LLT Ty, unsigned Width)
bool isUniformMMO(const MachineMemOperand *MMO)
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
This is an optimization pass for GlobalISel generic memory operations.
GenericUniformityInfo< MachineSSAContext > MachineUniformityInfo
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
Definition SIInstrInfo.h:44
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
SmallVector< UniformityLLTOpPredicateID, 4 > OpUniformityAndTypes
PredicateMapping(std::initializer_list< UniformityLLTOpPredicateID > OpList, std::function< bool(const MachineInstr &)> TestFunc=nullptr)
bool match(const MachineInstr &MI, const MachineUniformityInfo &MUI, const MachineRegisterInfo &MRI) const
std::function< bool(const MachineInstr &)> TestFunc
RegBankLLTMapping(std::initializer_list< RegBankLLTMappingApplyID > DstOpMappingList, std::initializer_list< RegBankLLTMappingApplyID > SrcOpMappingList, LoweringMethodID LoweringMethod=DoNotLower)
SmallVector< RegBankLLTMappingApplyID, 2 > DstOpMapping
SmallVector< RegBankLLTMappingApplyID, 4 > SrcOpMapping
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39