51enum RegisterKind { IS_UNKNOWN,
IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
65 SMLoc StartLoc, EndLoc;
66 const AMDGPUAsmParser *AsmParser;
69 AMDGPUOperand(KindTy Kind_,
const AMDGPUAsmParser *AsmParser_)
70 :
Kind(Kind_), AsmParser(AsmParser_) {}
72 using Ptr = std::unique_ptr<AMDGPUOperand>;
79 bool hasFPModifiers()
const {
return Abs || Neg; }
80 bool hasIntModifiers()
const {
return Sext; }
81 bool hasModifiers()
const {
return hasFPModifiers() || hasIntModifiers(); }
83 int64_t getFPModifiersOperand()
const {
90 int64_t getIntModifiersOperand()
const {
96 int64_t getModifiersOperand()
const {
97 assert(!(hasFPModifiers() && hasIntModifiers())
98 &&
"fp and int modifiers should not be used simultaneously");
99 if (hasFPModifiers()) {
100 return getFPModifiersOperand();
101 }
else if (hasIntModifiers()) {
102 return getIntModifiersOperand();
181 ImmKindTyMandatoryLiteral,
195 mutable ImmKindTy
Kind;
212 bool isToken()
const override {
return Kind == Token; }
214 bool isSymbolRefExpr()
const {
215 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
218 bool isImm()
const override {
219 return Kind == Immediate;
222 void setImmKindNone()
const {
224 Imm.Kind = ImmKindTyNone;
227 void setImmKindLiteral()
const {
229 Imm.Kind = ImmKindTyLiteral;
232 void setImmKindMandatoryLiteral()
const {
234 Imm.Kind = ImmKindTyMandatoryLiteral;
237 void setImmKindConst()
const {
239 Imm.Kind = ImmKindTyConst;
242 bool IsImmKindLiteral()
const {
243 return isImm() &&
Imm.Kind == ImmKindTyLiteral;
246 bool IsImmKindMandatoryLiteral()
const {
247 return isImm() &&
Imm.Kind == ImmKindTyMandatoryLiteral;
250 bool isImmKindConst()
const {
251 return isImm() &&
Imm.Kind == ImmKindTyConst;
254 bool isInlinableImm(
MVT type)
const;
255 bool isLiteralImm(
MVT type)
const;
257 bool isRegKind()
const {
261 bool isReg()
const override {
262 return isRegKind() && !hasModifiers();
265 bool isRegOrInline(
unsigned RCID,
MVT type)
const {
266 return isRegClass(RCID) || isInlinableImm(type);
270 return isRegOrInline(RCID, type) || isLiteralImm(type);
273 bool isRegOrImmWithInt16InputMods()
const {
277 bool isRegOrImmWithInt32InputMods()
const {
281 bool isRegOrInlineImmWithInt16InputMods()
const {
282 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
285 bool isRegOrInlineImmWithInt32InputMods()
const {
286 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
289 bool isRegOrImmWithInt64InputMods()
const {
293 bool isRegOrImmWithFP16InputMods()
const {
297 bool isRegOrImmWithFP32InputMods()
const {
301 bool isRegOrImmWithFP64InputMods()
const {
305 bool isRegOrInlineImmWithFP16InputMods()
const {
306 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16);
309 bool isRegOrInlineImmWithFP32InputMods()
const {
310 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
314 bool isVReg()
const {
315 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
316 isRegClass(AMDGPU::VReg_64RegClassID) ||
317 isRegClass(AMDGPU::VReg_96RegClassID) ||
318 isRegClass(AMDGPU::VReg_128RegClassID) ||
319 isRegClass(AMDGPU::VReg_160RegClassID) ||
320 isRegClass(AMDGPU::VReg_192RegClassID) ||
321 isRegClass(AMDGPU::VReg_256RegClassID) ||
322 isRegClass(AMDGPU::VReg_512RegClassID) ||
323 isRegClass(AMDGPU::VReg_1024RegClassID);
326 bool isVReg32()
const {
327 return isRegClass(AMDGPU::VGPR_32RegClassID);
330 bool isVReg32OrOff()
const {
331 return isOff() || isVReg32();
334 bool isNull()
const {
335 return isRegKind() &&
getReg() == AMDGPU::SGPR_NULL;
338 bool isVRegWithInputMods()
const;
339 bool isT16VRegWithInputMods()
const;
341 bool isSDWAOperand(
MVT type)
const;
342 bool isSDWAFP16Operand()
const;
343 bool isSDWAFP32Operand()
const;
344 bool isSDWAInt16Operand()
const;
345 bool isSDWAInt32Operand()
const;
347 bool isImmTy(ImmTy ImmT)
const {
351 bool isImmLiteral()
const {
return isImmTy(ImmTyNone); }
353 bool isImmModifier()
const {
354 return isImm() &&
Imm.Type != ImmTyNone;
357 bool isClampSI()
const {
return isImmTy(ImmTyClampSI); }
358 bool isOModSI()
const {
return isImmTy(ImmTyOModSI); }
359 bool isDMask()
const {
return isImmTy(ImmTyDMask); }
360 bool isDim()
const {
return isImmTy(ImmTyDim); }
361 bool isUNorm()
const {
return isImmTy(ImmTyUNorm); }
362 bool isDA()
const {
return isImmTy(ImmTyDA); }
363 bool isR128A16()
const {
return isImmTy(ImmTyR128A16); }
364 bool isA16()
const {
return isImmTy(ImmTyA16); }
365 bool isLWE()
const {
return isImmTy(ImmTyLWE); }
366 bool isOff()
const {
return isImmTy(ImmTyOff); }
367 bool isExpTgt()
const {
return isImmTy(ImmTyExpTgt); }
368 bool isExpVM()
const {
return isImmTy(ImmTyExpVM); }
369 bool isExpCompr()
const {
return isImmTy(ImmTyExpCompr); }
370 bool isOffen()
const {
return isImmTy(ImmTyOffen); }
371 bool isIdxen()
const {
return isImmTy(ImmTyIdxen); }
372 bool isAddr64()
const {
return isImmTy(ImmTyAddr64); }
373 bool isOffset()
const {
return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
374 bool isOffset0()
const {
return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
375 bool isOffset1()
const {
return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
376 bool isSMEMOffsetMod()
const {
return isImmTy(ImmTySMEMOffsetMod); }
377 bool isFlatOffset()
const {
return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
378 bool isGDS()
const {
return isImmTy(ImmTyGDS); }
379 bool isLDS()
const {
return isImmTy(ImmTyLDS); }
380 bool isCPol()
const {
return isImmTy(ImmTyCPol); }
381 bool isSWZ()
const {
return isImmTy(ImmTySWZ); }
382 bool isTFE()
const {
return isImmTy(ImmTyTFE); }
383 bool isD16()
const {
return isImmTy(ImmTyD16); }
384 bool isFORMAT()
const {
return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
385 bool isDppBankMask()
const {
return isImmTy(ImmTyDppBankMask); }
386 bool isDppRowMask()
const {
return isImmTy(ImmTyDppRowMask); }
387 bool isDppBoundCtrl()
const {
return isImmTy(ImmTyDppBoundCtrl); }
388 bool isDppFI()
const {
return isImmTy(ImmTyDppFI); }
389 bool isSDWADstSel()
const {
return isImmTy(ImmTySDWADstSel); }
390 bool isSDWASrc0Sel()
const {
return isImmTy(ImmTySDWASrc0Sel); }
391 bool isSDWASrc1Sel()
const {
return isImmTy(ImmTySDWASrc1Sel); }
392 bool isSDWADstUnused()
const {
return isImmTy(ImmTySDWADstUnused); }
393 bool isInterpSlot()
const {
return isImmTy(ImmTyInterpSlot); }
394 bool isInterpAttr()
const {
return isImmTy(ImmTyInterpAttr); }
395 bool isAttrChan()
const {
return isImmTy(ImmTyAttrChan); }
396 bool isOpSel()
const {
return isImmTy(ImmTyOpSel); }
397 bool isOpSelHi()
const {
return isImmTy(ImmTyOpSelHi); }
398 bool isNegLo()
const {
return isImmTy(ImmTyNegLo); }
399 bool isNegHi()
const {
return isImmTy(ImmTyNegHi); }
400 bool isHigh()
const {
return isImmTy(ImmTyHigh); }
402 bool isRegOrImm()
const {
406 bool isRegClass(
unsigned RCID)
const;
410 bool isRegOrInlineNoMods(
unsigned RCID,
MVT type)
const {
411 return isRegOrInline(RCID, type) && !hasModifiers();
414 bool isSCSrcB16()
const {
415 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
418 bool isSCSrcV2B16()
const {
422 bool isSCSrcB32()
const {
423 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
426 bool isSCSrcB64()
const {
427 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
430 bool isBoolReg()
const;
432 bool isSCSrcF16()
const {
433 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
436 bool isSCSrcV2F16()
const {
440 bool isSCSrcF32()
const {
441 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
444 bool isSCSrcF64()
const {
445 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
448 bool isSSrcB32()
const {
449 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
452 bool isSSrcB16()
const {
453 return isSCSrcB16() || isLiteralImm(MVT::i16);
456 bool isSSrcV2B16()
const {
461 bool isSSrcB64()
const {
464 return isSCSrcB64() || isLiteralImm(MVT::i64);
467 bool isSSrcF32()
const {
468 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
471 bool isSSrcF64()
const {
472 return isSCSrcB64() || isLiteralImm(MVT::f64);
475 bool isSSrcF16()
const {
476 return isSCSrcB16() || isLiteralImm(MVT::f16);
479 bool isSSrcV2F16()
const {
484 bool isSSrcV2FP32()
const {
489 bool isSCSrcV2FP32()
const {
494 bool isSSrcV2INT32()
const {
499 bool isSCSrcV2INT32()
const {
504 bool isSSrcOrLdsB32()
const {
505 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
506 isLiteralImm(MVT::i32) || isExpr();
509 bool isVCSrcB32()
const {
510 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
513 bool isVCSrcB64()
const {
514 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
517 bool isVCSrcTB16_Lo128()
const {
518 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
521 bool isVCSrcB16()
const {
522 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
525 bool isVCSrcV2B16()
const {
529 bool isVCSrcF32()
const {
530 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
533 bool isVCSrcF64()
const {
534 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
537 bool isVCSrcTF16_Lo128()
const {
538 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
541 bool isVCSrcF16()
const {
542 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
545 bool isVCSrcV2F16()
const {
549 bool isVSrcB32()
const {
550 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
553 bool isVSrcB64()
const {
554 return isVCSrcF64() || isLiteralImm(MVT::i64);
557 bool isVSrcTB16_Lo128()
const {
558 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
561 bool isVSrcB16()
const {
562 return isVCSrcB16() || isLiteralImm(MVT::i16);
565 bool isVSrcV2B16()
const {
566 return isVSrcB16() || isLiteralImm(MVT::v2i16);
569 bool isVCSrcV2FP32()
const {
573 bool isVSrcV2FP32()
const {
574 return isVSrcF64() || isLiteralImm(MVT::v2f32);
577 bool isVCSrcV2INT32()
const {
581 bool isVSrcV2INT32()
const {
582 return isVSrcB64() || isLiteralImm(MVT::v2i32);
585 bool isVSrcF32()
const {
586 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
589 bool isVSrcF64()
const {
590 return isVCSrcF64() || isLiteralImm(MVT::f64);
593 bool isVSrcTF16_Lo128()
const {
594 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
597 bool isVSrcF16()
const {
598 return isVCSrcF16() || isLiteralImm(MVT::f16);
601 bool isVSrcV2F16()
const {
602 return isVSrcF16() || isLiteralImm(MVT::v2f16);
605 bool isVISrcB32()
const {
606 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
609 bool isVISrcB16()
const {
610 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
613 bool isVISrcV2B16()
const {
617 bool isVISrcF32()
const {
618 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
621 bool isVISrcF16()
const {
622 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
625 bool isVISrcV2F16()
const {
626 return isVISrcF16() || isVISrcB32();
629 bool isVISrc_64B64()
const {
630 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
633 bool isVISrc_64F64()
const {
634 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
637 bool isVISrc_64V2FP32()
const {
638 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
641 bool isVISrc_64V2INT32()
const {
642 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
645 bool isVISrc_256B64()
const {
646 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
649 bool isVISrc_256F64()
const {
650 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
653 bool isVISrc_128B16()
const {
654 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
657 bool isVISrc_128V2B16()
const {
658 return isVISrc_128B16();
661 bool isVISrc_128B32()
const {
662 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
665 bool isVISrc_128F32()
const {
666 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
669 bool isVISrc_256V2FP32()
const {
670 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
673 bool isVISrc_256V2INT32()
const {
674 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
677 bool isVISrc_512B32()
const {
678 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
681 bool isVISrc_512B16()
const {
682 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
685 bool isVISrc_512V2B16()
const {
686 return isVISrc_512B16();
689 bool isVISrc_512F32()
const {
690 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
693 bool isVISrc_512F16()
const {
694 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
697 bool isVISrc_512V2F16()
const {
698 return isVISrc_512F16() || isVISrc_512B32();
701 bool isVISrc_1024B32()
const {
702 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
705 bool isVISrc_1024B16()
const {
706 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
709 bool isVISrc_1024V2B16()
const {
710 return isVISrc_1024B16();
713 bool isVISrc_1024F32()
const {
714 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
717 bool isVISrc_1024F16()
const {
718 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
721 bool isVISrc_1024V2F16()
const {
722 return isVISrc_1024F16() || isVISrc_1024B32();
725 bool isAISrcB32()
const {
726 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
729 bool isAISrcB16()
const {
730 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
733 bool isAISrcV2B16()
const {
737 bool isAISrcF32()
const {
738 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
741 bool isAISrcF16()
const {
742 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
745 bool isAISrcV2F16()
const {
746 return isAISrcF16() || isAISrcB32();
749 bool isAISrc_64B64()
const {
750 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
753 bool isAISrc_64F64()
const {
754 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
757 bool isAISrc_128B32()
const {
758 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
761 bool isAISrc_128B16()
const {
762 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
765 bool isAISrc_128V2B16()
const {
766 return isAISrc_128B16();
769 bool isAISrc_128F32()
const {
770 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
773 bool isAISrc_128F16()
const {
774 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
777 bool isAISrc_128V2F16()
const {
778 return isAISrc_128F16() || isAISrc_128B32();
781 bool isVISrc_128F16()
const {
782 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
785 bool isVISrc_128V2F16()
const {
786 return isVISrc_128F16() || isVISrc_128B32();
789 bool isAISrc_256B64()
const {
790 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
793 bool isAISrc_256F64()
const {
794 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
797 bool isAISrc_512B32()
const {
798 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
801 bool isAISrc_512B16()
const {
802 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
805 bool isAISrc_512V2B16()
const {
806 return isAISrc_512B16();
809 bool isAISrc_512F32()
const {
810 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
813 bool isAISrc_512F16()
const {
814 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
817 bool isAISrc_512V2F16()
const {
818 return isAISrc_512F16() || isAISrc_512B32();
821 bool isAISrc_1024B32()
const {
822 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
825 bool isAISrc_1024B16()
const {
826 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
829 bool isAISrc_1024V2B16()
const {
830 return isAISrc_1024B16();
833 bool isAISrc_1024F32()
const {
834 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
837 bool isAISrc_1024F16()
const {
838 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
841 bool isAISrc_1024V2F16()
const {
842 return isAISrc_1024F16() || isAISrc_1024B32();
845 bool isKImmFP32()
const {
846 return isLiteralImm(MVT::f32);
849 bool isKImmFP16()
const {
850 return isLiteralImm(MVT::f16);
853 bool isMem()
const override {
857 bool isExpr()
const {
861 bool isSoppBrTarget()
const {
862 return isExpr() ||
isImm();
865 bool isSWaitCnt()
const;
866 bool isDepCtr()
const;
867 bool isSDelayAlu()
const;
868 bool isHwreg()
const;
869 bool isSendMsg()
const;
870 bool isSwizzle()
const;
871 bool isSMRDOffset8()
const;
872 bool isSMEMOffset()
const;
873 bool isSMRDLiteralOffset()
const;
875 bool isDPPCtrl()
const;
879 bool isGPRIdxMode()
const;
880 bool isS16Imm()
const;
881 bool isU16Imm()
const;
882 bool isEndpgm()
const;
883 bool isWaitVDST()
const;
884 bool isWaitEXP()
const;
891 int64_t getImm()
const {
896 void setImm(int64_t Val) {
901 ImmTy getImmTy()
const {
906 unsigned getReg()
const override {
920 return SMRange(StartLoc, EndLoc);
923 Modifiers getModifiers()
const {
924 assert(isRegKind() || isImmTy(ImmTyNone));
925 return isRegKind() ?
Reg.Mods :
Imm.Mods;
928 void setModifiers(Modifiers Mods) {
929 assert(isRegKind() || isImmTy(ImmTyNone));
936 bool hasModifiers()
const {
937 return getModifiers().hasModifiers();
940 bool hasFPModifiers()
const {
941 return getModifiers().hasFPModifiers();
944 bool hasIntModifiers()
const {
945 return getModifiers().hasIntModifiers();
950 void addImmOperands(
MCInst &Inst,
unsigned N,
bool ApplyModifiers =
true)
const;
952 void addLiteralImmOperand(
MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const;
954 template <
unsigned Bitw
idth>
955 void addKImmFPOperands(
MCInst &Inst,
unsigned N)
const;
957 void addKImmFP16Operands(
MCInst &Inst,
unsigned N)
const {
958 addKImmFPOperands<16>(Inst,
N);
961 void addKImmFP32Operands(
MCInst &Inst,
unsigned N)
const {
962 addKImmFPOperands<32>(Inst,
N);
965 void addRegOperands(
MCInst &Inst,
unsigned N)
const;
967 void addBoolRegOperands(
MCInst &Inst,
unsigned N)
const {
968 addRegOperands(Inst,
N);
971 void addRegOrImmOperands(
MCInst &Inst,
unsigned N)
const {
973 addRegOperands(Inst,
N);
977 addImmOperands(Inst,
N);
980 void addRegOrImmWithInputModsOperands(
MCInst &Inst,
unsigned N)
const {
981 Modifiers Mods = getModifiers();
984 addRegOperands(Inst,
N);
986 addImmOperands(Inst,
N,
false);
990 void addRegOrImmWithFPInputModsOperands(
MCInst &Inst,
unsigned N)
const {
991 assert(!hasIntModifiers());
992 addRegOrImmWithInputModsOperands(Inst,
N);
995 void addRegOrImmWithIntInputModsOperands(
MCInst &Inst,
unsigned N)
const {
996 assert(!hasFPModifiers());
997 addRegOrImmWithInputModsOperands(Inst,
N);
1000 void addRegWithInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1001 Modifiers Mods = getModifiers();
1004 addRegOperands(Inst,
N);
1007 void addRegWithFPInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1008 assert(!hasIntModifiers());
1009 addRegWithInputModsOperands(Inst,
N);
1012 void addRegWithIntInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1013 assert(!hasFPModifiers());
1014 addRegWithInputModsOperands(Inst,
N);
1017 void addSoppBrTargetOperands(
MCInst &Inst,
unsigned N)
const {
1019 addImmOperands(Inst,
N);
1028 case ImmTyNone:
OS <<
"None";
break;
1029 case ImmTyGDS:
OS <<
"GDS";
break;
1030 case ImmTyLDS:
OS <<
"LDS";
break;
1031 case ImmTyOffen:
OS <<
"Offen";
break;
1032 case ImmTyIdxen:
OS <<
"Idxen";
break;
1033 case ImmTyAddr64:
OS <<
"Addr64";
break;
1034 case ImmTyOffset:
OS <<
"Offset";
break;
1035 case ImmTyInstOffset:
OS <<
"InstOffset";
break;
1036 case ImmTyOffset0:
OS <<
"Offset0";
break;
1037 case ImmTyOffset1:
OS <<
"Offset1";
break;
1038 case ImmTySMEMOffsetMod:
OS <<
"SMEMOffsetMod";
break;
1039 case ImmTyCPol:
OS <<
"CPol";
break;
1040 case ImmTySWZ:
OS <<
"SWZ";
break;
1041 case ImmTyTFE:
OS <<
"TFE";
break;
1042 case ImmTyD16:
OS <<
"D16";
break;
1043 case ImmTyFORMAT:
OS <<
"FORMAT";
break;
1044 case ImmTyClampSI:
OS <<
"ClampSI";
break;
1045 case ImmTyOModSI:
OS <<
"OModSI";
break;
1046 case ImmTyDPP8:
OS <<
"DPP8";
break;
1047 case ImmTyDppCtrl:
OS <<
"DppCtrl";
break;
1048 case ImmTyDppRowMask:
OS <<
"DppRowMask";
break;
1049 case ImmTyDppBankMask:
OS <<
"DppBankMask";
break;
1050 case ImmTyDppBoundCtrl:
OS <<
"DppBoundCtrl";
break;
1051 case ImmTyDppFI:
OS <<
"DppFI";
break;
1052 case ImmTySDWADstSel:
OS <<
"SDWADstSel";
break;
1053 case ImmTySDWASrc0Sel:
OS <<
"SDWASrc0Sel";
break;
1054 case ImmTySDWASrc1Sel:
OS <<
"SDWASrc1Sel";
break;
1055 case ImmTySDWADstUnused:
OS <<
"SDWADstUnused";
break;
1056 case ImmTyDMask:
OS <<
"DMask";
break;
1057 case ImmTyDim:
OS <<
"Dim";
break;
1058 case ImmTyUNorm:
OS <<
"UNorm";
break;
1059 case ImmTyDA:
OS <<
"DA";
break;
1060 case ImmTyR128A16:
OS <<
"R128A16";
break;
1061 case ImmTyA16:
OS <<
"A16";
break;
1062 case ImmTyLWE:
OS <<
"LWE";
break;
1063 case ImmTyOff:
OS <<
"Off";
break;
1064 case ImmTyExpTgt:
OS <<
"ExpTgt";
break;
1065 case ImmTyExpCompr:
OS <<
"ExpCompr";
break;
1066 case ImmTyExpVM:
OS <<
"ExpVM";
break;
1067 case ImmTyHwreg:
OS <<
"Hwreg";
break;
1068 case ImmTySendMsg:
OS <<
"SendMsg";
break;
1069 case ImmTyInterpSlot:
OS <<
"InterpSlot";
break;
1070 case ImmTyInterpAttr:
OS <<
"InterpAttr";
break;
1071 case ImmTyAttrChan:
OS <<
"AttrChan";
break;
1072 case ImmTyOpSel:
OS <<
"OpSel";
break;
1073 case ImmTyOpSelHi:
OS <<
"OpSelHi";
break;
1074 case ImmTyNegLo:
OS <<
"NegLo";
break;
1075 case ImmTyNegHi:
OS <<
"NegHi";
break;
1076 case ImmTySwizzle:
OS <<
"Swizzle";
break;
1077 case ImmTyGprIdxMode:
OS <<
"GprIdxMode";
break;
1078 case ImmTyHigh:
OS <<
"High";
break;
1079 case ImmTyBLGP:
OS <<
"BLGP";
break;
1080 case ImmTyCBSZ:
OS <<
"CBSZ";
break;
1081 case ImmTyABID:
OS <<
"ABID";
break;
1082 case ImmTyEndpgm:
OS <<
"Endpgm";
break;
1083 case ImmTyWaitVDST:
OS <<
"WaitVDST";
break;
1084 case ImmTyWaitEXP:
OS <<
"WaitEXP";
break;
1091 OS <<
"<register " <<
getReg() <<
" mods: " <<
Reg.Mods <<
'>';
1094 OS <<
'<' << getImm();
1095 if (getImmTy() != ImmTyNone) {
1096 OS <<
" type: "; printImmTy(
OS, getImmTy());
1098 OS <<
" mods: " <<
Imm.Mods <<
'>';
1101 OS <<
'\'' << getToken() <<
'\'';
1104 OS <<
"<expr " << *Expr <<
'>';
1109 static AMDGPUOperand::Ptr CreateImm(
const AMDGPUAsmParser *AsmParser,
1110 int64_t Val,
SMLoc Loc,
1111 ImmTy
Type = ImmTyNone,
1112 bool IsFPImm =
false) {
1113 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1115 Op->Imm.IsFPImm = IsFPImm;
1116 Op->Imm.Kind = ImmKindTyNone;
1118 Op->Imm.Mods = Modifiers();
1124 static AMDGPUOperand::Ptr CreateToken(
const AMDGPUAsmParser *AsmParser,
1126 bool HasExplicitEncodingSize =
true) {
1127 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1128 Res->Tok.Data = Str.data();
1129 Res->Tok.Length = Str.size();
1130 Res->StartLoc = Loc;
1135 static AMDGPUOperand::Ptr CreateReg(
const AMDGPUAsmParser *AsmParser,
1136 unsigned RegNo,
SMLoc S,
1138 auto Op = std::make_unique<AMDGPUOperand>(
Register, AsmParser);
1139 Op->Reg.RegNo = RegNo;
1140 Op->Reg.Mods = Modifiers();
1146 static AMDGPUOperand::Ptr CreateExpr(
const AMDGPUAsmParser *AsmParser,
1148 auto Op = std::make_unique<AMDGPUOperand>(
Expression, AsmParser);
1157 OS <<
"abs:" << Mods.Abs <<
" neg: " << Mods.Neg <<
" sext:" << Mods.Sext;
1168class KernelScopeInfo {
1169 int SgprIndexUnusedMin = -1;
1170 int VgprIndexUnusedMin = -1;
1171 int AgprIndexUnusedMin = -1;
1175 void usesSgprAt(
int i) {
1176 if (i >= SgprIndexUnusedMin) {
1177 SgprIndexUnusedMin = ++i;
1186 void usesVgprAt(
int i) {
1187 if (i >= VgprIndexUnusedMin) {
1188 VgprIndexUnusedMin = ++i;
1193 VgprIndexUnusedMin);
1199 void usesAgprAt(
int i) {
1204 if (i >= AgprIndexUnusedMin) {
1205 AgprIndexUnusedMin = ++i;
1215 VgprIndexUnusedMin);
1222 KernelScopeInfo() =
default;
1228 usesSgprAt(SgprIndexUnusedMin = -1);
1229 usesVgprAt(VgprIndexUnusedMin = -1);
1231 usesAgprAt(AgprIndexUnusedMin = -1);
1235 void usesRegister(RegisterKind RegKind,
unsigned DwordRegIndex,
1236 unsigned RegWidth) {
1239 usesSgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1242 usesAgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1245 usesVgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1256 unsigned ForcedEncodingSize = 0;
1257 bool ForcedDPP =
false;
1258 bool ForcedSDWA =
false;
1259 KernelScopeInfo KernelScope;
1264#define GET_ASSEMBLER_HEADER
1265#include "AMDGPUGenAsmMatcher.inc"
1270 bool ParseAsAbsoluteExpression(
uint32_t &Ret);
1271 bool OutOfRangeError(
SMRange Range);
1287 bool calculateGPRBlocks(
const FeatureBitset &Features,
bool VCCUsed,
1288 bool FlatScrUsed,
bool XNACKUsed,
1289 std::optional<bool> EnableWavefrontSize32,
1290 unsigned NextFreeVGPR,
SMRange VGPRRange,
1291 unsigned NextFreeSGPR,
SMRange SGPRRange,
1292 unsigned &VGPRBlocks,
unsigned &SGPRBlocks);
1293 bool ParseDirectiveAMDGCNTarget();
1294 bool ParseDirectiveAMDHSAKernel();
1296 bool ParseDirectiveHSACodeObjectVersion();
1297 bool ParseDirectiveHSACodeObjectISA();
1299 bool ParseDirectiveAMDKernelCodeT();
1302 bool ParseDirectiveAMDGPUHsaKernel();
1304 bool ParseDirectiveISAVersion();
1305 bool ParseDirectiveHSAMetadata();
1306 bool ParseDirectivePALMetadataBegin();
1307 bool ParseDirectivePALMetadata();
1308 bool ParseDirectiveAMDGPULDS();
1312 bool ParseToEndDirective(
const char *AssemblerDirectiveBegin,
1313 const char *AssemblerDirectiveEnd,
1314 std::string &CollectString);
1316 bool AddNextRegisterToList(
unsigned& Reg,
unsigned& RegWidth,
1317 RegisterKind RegKind,
unsigned Reg1,
SMLoc Loc);
1318 bool ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
1319 unsigned &RegNum,
unsigned &RegWidth,
1320 bool RestoreOnFailure =
false);
1321 bool ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
1322 unsigned &RegNum,
unsigned &RegWidth,
1324 unsigned ParseRegularReg(RegisterKind &RegKind,
unsigned &RegNum,
1327 unsigned ParseSpecialReg(RegisterKind &RegKind,
unsigned &RegNum,
1330 unsigned ParseRegList(RegisterKind &RegKind,
unsigned &RegNum,
1332 bool ParseRegRange(
unsigned& Num,
unsigned& Width);
1333 unsigned getRegularReg(RegisterKind RegKind,
1340 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1341 void initializeGprCountSymbol(RegisterKind RegKind);
1342 bool updateGprCountSymbols(RegisterKind RegKind,
unsigned DwordRegIndex,
1347 bool IsGdsHardcoded);
1350 enum AMDGPUMatchResultTy {
1351 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1354 OperandMode_Default,
1358 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1366 if (getFeatureBits().
none()) {
1398 initializeGprCountSymbol(IS_VGPR);
1399 initializeGprCountSymbol(IS_SGPR);
1466 bool hasInv2PiInlineImm()
const {
1467 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1470 bool hasFlatOffsets()
const {
1471 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1475 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1478 bool hasSGPR102_SGPR103()
const {
1482 bool hasSGPR104_SGPR105()
const {
return isGFX10Plus(); }
1484 bool hasIntClamp()
const {
1485 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1488 bool hasPartialNSAEncoding()
const {
1489 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1515 void setForcedEncodingSize(
unsigned Size) { ForcedEncodingSize =
Size; }
1516 void setForcedDPP(
bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1517 void setForcedSDWA(
bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1519 unsigned getForcedEncodingSize()
const {
return ForcedEncodingSize; }
1520 bool isForcedVOP3()
const {
return ForcedEncodingSize == 64; }
1521 bool isForcedDPP()
const {
return ForcedDPP; }
1522 bool isForcedSDWA()
const {
return ForcedSDWA; }
1524 StringRef getMatchedVariantName()
const;
1526 std::unique_ptr<AMDGPUOperand>
parseRegister(
bool RestoreOnFailure =
false);
1528 bool RestoreOnFailure);
1530 SMLoc &EndLoc)
override;
1532 SMLoc &EndLoc)
override;
1535 unsigned Kind)
override;
1539 bool MatchingInlineAsm)
override;
1542 OperandMode Mode = OperandMode_Default);
1554 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1555 std::function<
bool(int64_t &)> ConvertResult =
nullptr);
1558 parseOperandArrayWithPrefix(
const char *Prefix,
1560 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1561 bool (*ConvertResult)(int64_t&) =
nullptr);
1565 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1573 bool isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1574 bool isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1575 bool isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1576 bool isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1577 bool parseSP3NegModifier();
1596 bool tryParseFmt(
const char *Pref, int64_t MaxVal, int64_t &Val);
1597 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt,
StringRef FormatStr,
SMLoc Loc);
1604 bool parseCnt(int64_t &IntVal);
1607 bool parseDepCtr(int64_t &IntVal,
unsigned &Mask);
1611 bool parseDelay(int64_t &Delay);
1617 struct OperandInfoTy {
1620 bool IsSymbolic =
false;
1621 bool IsDefined =
false;
1623 OperandInfoTy(int64_t Id_) :
Id(Id_) {}
1626 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1627 bool validateSendMsg(
const OperandInfoTy &Msg,
1628 const OperandInfoTy &Op,
1629 const OperandInfoTy &Stream);
1631 bool parseHwregBody(OperandInfoTy &HwReg,
1633 OperandInfoTy &Width);
1634 bool validateHwreg(
const OperandInfoTy &HwReg,
1635 const OperandInfoTy &
Offset,
1636 const OperandInfoTy &Width);
1642 SMLoc getOperandLoc(std::function<
bool(
const AMDGPUOperand&)>
Test,
1647 bool SearchMandatoryLiterals =
false)
const;
1655 bool validateSOPLiteral(
const MCInst &Inst)
const;
1657 bool validateVOPDRegBankConstraints(
const MCInst &Inst,
1659 bool validateIntClampSupported(
const MCInst &Inst);
1660 bool validateMIMGAtomicDMask(
const MCInst &Inst);
1661 bool validateMIMGGatherDMask(
const MCInst &Inst);
1663 bool validateMIMGDataSize(
const MCInst &Inst,
const SMLoc &IDLoc);
1664 bool validateMIMGAddrSize(
const MCInst &Inst);
1665 bool validateMIMGD16(
const MCInst &Inst);
1666 bool validateMIMGMSAA(
const MCInst &Inst);
1667 bool validateOpSel(
const MCInst &Inst);
1669 bool validateVccOperand(
unsigned Reg)
const;
1674 bool validateAGPRLdSt(
const MCInst &Inst)
const;
1675 bool validateVGPRAlign(
const MCInst &Inst)
const;
1678 bool validateDivScale(
const MCInst &Inst);
1681 const SMLoc &IDLoc);
1684 std::optional<StringRef> validateLdsDirect(
const MCInst &Inst);
1685 unsigned getConstantBusLimit(
unsigned Opcode)
const;
1686 bool usesConstantBus(
const MCInst &Inst,
unsigned OpIdx);
1687 bool isInlineConstant(
const MCInst &Inst,
unsigned OpIdx)
const;
1688 unsigned findImplicitSGPRReadInVOP(
const MCInst &Inst)
const;
1714 AsmToken peekToken(
bool ShouldSkipSpace =
true);
1716 SMLoc getLoc()
const;
1732 bool parseSwizzleOperand(int64_t &Op,
1733 const unsigned MinVal,
1734 const unsigned MaxVal,
1737 bool parseSwizzleOperands(
const unsigned OpNum, int64_t* Op,
1738 const unsigned MinVal,
1739 const unsigned MaxVal,
1742 bool parseSwizzleOffset(int64_t &Imm);
1743 bool parseSwizzleMacro(int64_t &Imm);
1744 bool parseSwizzleQuadPerm(int64_t &Imm);
1745 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1746 bool parseSwizzleBroadcast(int64_t &Imm);
1747 bool parseSwizzleSwap(int64_t &Imm);
1748 bool parseSwizzleReverse(int64_t &Imm);
1751 int64_t parseGPRIdxMacro();
1757 AMDGPUOperand::Ptr defaultCPol()
const;
1759 AMDGPUOperand::Ptr defaultSMRDOffset8()
const;
1760 AMDGPUOperand::Ptr defaultSMEMOffset()
const;
1761 AMDGPUOperand::Ptr defaultSMEMOffsetMod()
const;
1762 AMDGPUOperand::Ptr defaultFlatOffset()
const;
1767 OptionalImmIndexMap &OptionalIdx);
1773 OptionalImmIndexMap &OptionalIdx);
1775 OptionalImmIndexMap &OptionalIdx);
1781 bool IsAtomic =
false);
1787 bool parseDimId(
unsigned &Encoding);
1789 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1793 int64_t parseDPPCtrlSel(
StringRef Ctrl);
1794 int64_t parseDPPCtrlPerm();
1795 AMDGPUOperand::Ptr defaultDppRowMask()
const;
1796 AMDGPUOperand::Ptr defaultDppBankMask()
const;
1797 AMDGPUOperand::Ptr defaultDppBoundCtrl()
const;
1798 AMDGPUOperand::Ptr defaultDppFI()
const;
1804 bool IsDPP8 =
false);
1810 AMDGPUOperand::ImmTy
Type);
1819 bool SkipDstVcc =
false,
1820 bool SkipSrcVcc =
false);
1822 AMDGPUOperand::Ptr defaultBLGP()
const;
1823 AMDGPUOperand::Ptr defaultCBSZ()
const;
1824 AMDGPUOperand::Ptr defaultABID()
const;
1827 AMDGPUOperand::Ptr defaultEndpgmImmOperands()
const;
1829 AMDGPUOperand::Ptr defaultWaitVDST()
const;
1830 AMDGPUOperand::Ptr defaultWaitEXP()
const;
1840 return &APFloat::IEEEsingle();
1842 return &APFloat::IEEEdouble();
1844 return &APFloat::IEEEhalf();
1868 return &APFloat::IEEEsingle();
1874 return &APFloat::IEEEdouble();
1889 return &APFloat::IEEEhalf();
1904 APFloat::rmNearestTiesToEven,
1907 if (
Status != APFloat::opOK &&
1909 ((
Status & APFloat::opOverflow) != 0 ||
1910 (
Status & APFloat::opUnderflow) != 0)) {
1931bool AMDGPUOperand::isInlinableImm(
MVT type)
const {
1941 if (!isImmTy(ImmTyNone)) {
1952 if (type == MVT::f64 || type == MVT::i64) {
1954 AsmParser->hasInv2PiInlineImm());
1963 static_cast<int16_t
>(FPLiteral.bitcastToAPInt().getZExtValue()),
1964 type, AsmParser->hasInv2PiInlineImm());
1969 static_cast<int32_t
>(FPLiteral.bitcastToAPInt().getZExtValue()),
1970 AsmParser->hasInv2PiInlineImm());
1974 if (type == MVT::f64 || type == MVT::i64) {
1976 AsmParser->hasInv2PiInlineImm());
1985 static_cast<int16_t
>(
Literal.getLoBits(16).getSExtValue()),
1986 type, AsmParser->hasInv2PiInlineImm());
1990 static_cast<int32_t
>(
Literal.getLoBits(32).getZExtValue()),
1991 AsmParser->hasInv2PiInlineImm());
1994bool AMDGPUOperand::isLiteralImm(
MVT type)
const {
1996 if (!isImmTy(ImmTyNone)) {
2003 if (type == MVT::f64 && hasFPModifiers()) {
2020 if (type == MVT::f64) {
2025 if (type == MVT::i64) {
2034 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
2035 (type == MVT::v2i16)? MVT::i16 :
2036 (type == MVT::v2f32)? MVT::f32 : type;
2042bool AMDGPUOperand::isRegClass(
unsigned RCID)
const {
2043 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(
getReg());
2046bool AMDGPUOperand::isVRegWithInputMods()
const {
2047 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2049 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2050 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
2053bool AMDGPUOperand::isT16VRegWithInputMods()
const {
2054 return isRegClass(AMDGPU::VGPR_32_Lo128RegClassID);
2057bool AMDGPUOperand::isSDWAOperand(
MVT type)
const {
2058 if (AsmParser->isVI())
2060 else if (AsmParser->isGFX9Plus())
2061 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2066bool AMDGPUOperand::isSDWAFP16Operand()
const {
2067 return isSDWAOperand(MVT::f16);
2070bool AMDGPUOperand::isSDWAFP32Operand()
const {
2071 return isSDWAOperand(MVT::f32);
2074bool AMDGPUOperand::isSDWAInt16Operand()
const {
2075 return isSDWAOperand(MVT::i16);
2078bool AMDGPUOperand::isSDWAInt32Operand()
const {
2079 return isSDWAOperand(MVT::i32);
2082bool AMDGPUOperand::isBoolReg()
const {
2083 auto FB = AsmParser->getFeatureBits();
2084 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2085 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2090 assert(isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2105void AMDGPUOperand::addImmOperands(
MCInst &Inst,
unsigned N,
bool ApplyModifiers)
const {
2108 addLiteralImmOperand(Inst,
Imm.Val,
2110 isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2112 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2118void AMDGPUOperand::addLiteralImmOperand(
MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const {
2119 const auto& InstDesc = AsmParser->getMII()->get(Inst.
getOpcode());
2124 if (ApplyModifiers) {
2127 Val = applyInputFPModifiers(Val,
Size);
2131 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2141 AsmParser->hasInv2PiInlineImm())) {
2150 if (
Literal.getLoBits(32) != 0) {
2151 const_cast<AMDGPUAsmParser *
>(AsmParser)->
Warning(Inst.
getLoc(),
2152 "Can't encode literal as exact 64-bit floating-point operand. "
2153 "Low 32-bits will be set to zero");
2157 setImmKindLiteral();
2196 APFloat::rmNearestTiesToEven, &lost);
2200 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2203 setImmKindMandatoryLiteral();
2205 setImmKindLiteral();
2234 AsmParser->hasInv2PiInlineImm())) {
2241 setImmKindLiteral();
2256 setImmKindLiteral();
2268 AsmParser->hasInv2PiInlineImm())) {
2275 setImmKindLiteral();
2284 AsmParser->hasInv2PiInlineImm()));
2291 setImmKindMandatoryLiteral();
2295 setImmKindMandatoryLiteral();
2302template <
unsigned Bitw
idth>
2303void AMDGPUOperand::addKImmFPOperands(
MCInst &Inst,
unsigned N)
const {
2305 setImmKindMandatoryLiteral();
2316 APFloat::rmNearestTiesToEven, &Lost);
2320void AMDGPUOperand::addRegOperands(
MCInst &Inst,
unsigned N)
const {
2324bool AMDGPUOperand::isInlineValue()
const {
2333 if (Is == IS_VGPR) {
2337 return AMDGPU::VGPR_32RegClassID;
2339 return AMDGPU::VReg_64RegClassID;
2341 return AMDGPU::VReg_96RegClassID;
2343 return AMDGPU::VReg_128RegClassID;
2345 return AMDGPU::VReg_160RegClassID;
2347 return AMDGPU::VReg_192RegClassID;
2349 return AMDGPU::VReg_224RegClassID;
2351 return AMDGPU::VReg_256RegClassID;
2353 return AMDGPU::VReg_288RegClassID;
2355 return AMDGPU::VReg_320RegClassID;
2357 return AMDGPU::VReg_352RegClassID;
2359 return AMDGPU::VReg_384RegClassID;
2361 return AMDGPU::VReg_512RegClassID;
2363 return AMDGPU::VReg_1024RegClassID;
2365 }
else if (Is == IS_TTMP) {
2369 return AMDGPU::TTMP_32RegClassID;
2371 return AMDGPU::TTMP_64RegClassID;
2373 return AMDGPU::TTMP_128RegClassID;
2375 return AMDGPU::TTMP_256RegClassID;
2377 return AMDGPU::TTMP_512RegClassID;
2379 }
else if (Is == IS_SGPR) {
2383 return AMDGPU::SGPR_32RegClassID;
2385 return AMDGPU::SGPR_64RegClassID;
2387 return AMDGPU::SGPR_96RegClassID;
2389 return AMDGPU::SGPR_128RegClassID;
2391 return AMDGPU::SGPR_160RegClassID;
2393 return AMDGPU::SGPR_192RegClassID;
2395 return AMDGPU::SGPR_224RegClassID;
2397 return AMDGPU::SGPR_256RegClassID;
2399 return AMDGPU::SGPR_288RegClassID;
2401 return AMDGPU::SGPR_320RegClassID;
2403 return AMDGPU::SGPR_352RegClassID;
2405 return AMDGPU::SGPR_384RegClassID;
2407 return AMDGPU::SGPR_512RegClassID;
2409 }
else if (Is == IS_AGPR) {
2413 return AMDGPU::AGPR_32RegClassID;
2415 return AMDGPU::AReg_64RegClassID;
2417 return AMDGPU::AReg_96RegClassID;
2419 return AMDGPU::AReg_128RegClassID;
2421 return AMDGPU::AReg_160RegClassID;
2423 return AMDGPU::AReg_192RegClassID;
2425 return AMDGPU::AReg_224RegClassID;
2427 return AMDGPU::AReg_256RegClassID;
2429 return AMDGPU::AReg_288RegClassID;
2431 return AMDGPU::AReg_320RegClassID;
2433 return AMDGPU::AReg_352RegClassID;
2435 return AMDGPU::AReg_384RegClassID;
2437 return AMDGPU::AReg_512RegClassID;
2439 return AMDGPU::AReg_1024RegClassID;
2447 .
Case(
"exec", AMDGPU::EXEC)
2448 .
Case(
"vcc", AMDGPU::VCC)
2449 .
Case(
"flat_scratch", AMDGPU::FLAT_SCR)
2450 .
Case(
"xnack_mask", AMDGPU::XNACK_MASK)
2451 .
Case(
"shared_base", AMDGPU::SRC_SHARED_BASE)
2452 .
Case(
"src_shared_base", AMDGPU::SRC_SHARED_BASE)
2453 .
Case(
"shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2454 .
Case(
"src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2455 .
Case(
"private_base", AMDGPU::SRC_PRIVATE_BASE)
2456 .
Case(
"src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2457 .
Case(
"private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2458 .
Case(
"src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2459 .
Case(
"pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2460 .
Case(
"src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2461 .
Case(
"lds_direct", AMDGPU::LDS_DIRECT)
2462 .
Case(
"src_lds_direct", AMDGPU::LDS_DIRECT)
2463 .
Case(
"m0", AMDGPU::M0)
2464 .
Case(
"vccz", AMDGPU::SRC_VCCZ)
2465 .
Case(
"src_vccz", AMDGPU::SRC_VCCZ)
2466 .
Case(
"execz", AMDGPU::SRC_EXECZ)
2467 .
Case(
"src_execz", AMDGPU::SRC_EXECZ)
2468 .
Case(
"scc", AMDGPU::SRC_SCC)
2469 .
Case(
"src_scc", AMDGPU::SRC_SCC)
2470 .
Case(
"tba", AMDGPU::TBA)
2471 .
Case(
"tma", AMDGPU::TMA)
2472 .
Case(
"flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2473 .
Case(
"flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2474 .
Case(
"xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2475 .
Case(
"xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2476 .
Case(
"vcc_lo", AMDGPU::VCC_LO)
2477 .
Case(
"vcc_hi", AMDGPU::VCC_HI)
2478 .
Case(
"exec_lo", AMDGPU::EXEC_LO)
2479 .
Case(
"exec_hi", AMDGPU::EXEC_HI)
2480 .
Case(
"tma_lo", AMDGPU::TMA_LO)
2481 .
Case(
"tma_hi", AMDGPU::TMA_HI)
2482 .
Case(
"tba_lo", AMDGPU::TBA_LO)
2483 .
Case(
"tba_hi", AMDGPU::TBA_HI)
2484 .
Case(
"pc", AMDGPU::PC_REG)
2485 .
Case(
"null", AMDGPU::SGPR_NULL)
2489bool AMDGPUAsmParser::ParseRegister(
MCRegister &RegNo,
SMLoc &StartLoc,
2490 SMLoc &EndLoc,
bool RestoreOnFailure) {
2491 auto R = parseRegister();
2492 if (!R)
return true;
2494 RegNo =
R->getReg();
2495 StartLoc =
R->getStartLoc();
2496 EndLoc =
R->getEndLoc();
2500bool AMDGPUAsmParser::parseRegister(
MCRegister &RegNo,
SMLoc &StartLoc,
2502 return ParseRegister(RegNo, StartLoc, EndLoc,
false);
2509 ParseRegister(RegNo, StartLoc, EndLoc,
true);
2510 bool PendingErrors = getParser().hasPendingError();
2511 getParser().clearPendingErrors();
2519bool AMDGPUAsmParser::AddNextRegisterToList(
unsigned &Reg,
unsigned &RegWidth,
2520 RegisterKind RegKind,
unsigned Reg1,
2524 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2529 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2530 Reg = AMDGPU::FLAT_SCR;
2534 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2535 Reg = AMDGPU::XNACK_MASK;
2539 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2544 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2549 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2554 Error(Loc,
"register does not fit in the list");
2560 if (Reg1 != Reg + RegWidth / 32) {
2561 Error(Loc,
"registers in a list must have consecutive indices");
2579 {{
"ttmp"}, IS_TTMP},
2585 return Kind == IS_VGPR ||
2593 if (Str.startswith(Reg.Name))
2599 return !Str.getAsInteger(10, Num);
2603AMDGPUAsmParser::isRegister(
const AsmToken &Token,
2620 if (!RegSuffix.
empty()) {
2636AMDGPUAsmParser::isRegister()
2638 return isRegister(getToken(), peekToken());
2642AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2649 unsigned AlignSize = 1;
2650 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2653 AlignSize = std::min(RegWidth / 32, 4u);
2656 if (RegNum % AlignSize != 0) {
2657 Error(Loc,
"invalid register alignment");
2658 return AMDGPU::NoRegister;
2661 unsigned RegIdx = RegNum / AlignSize;
2664 Error(Loc,
"invalid or unsupported register size");
2665 return AMDGPU::NoRegister;
2671 Error(Loc,
"register index is out of range");
2672 return AMDGPU::NoRegister;
2678bool AMDGPUAsmParser::ParseRegRange(
unsigned &Num,
unsigned &RegWidth) {
2679 int64_t RegLo, RegHi;
2683 SMLoc FirstIdxLoc = getLoc();
2686 if (!parseExpr(RegLo))
2690 SecondIdxLoc = getLoc();
2691 if (!parseExpr(RegHi))
2700 if (!isUInt<32>(RegLo)) {
2701 Error(FirstIdxLoc,
"invalid register index");
2705 if (!isUInt<32>(RegHi)) {
2706 Error(SecondIdxLoc,
"invalid register index");
2710 if (RegLo > RegHi) {
2711 Error(FirstIdxLoc,
"first register index should not exceed second index");
2715 Num =
static_cast<unsigned>(RegLo);
2716 RegWidth = 32 * ((RegHi - RegLo) + 1);
2720unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2721 unsigned &RegNum,
unsigned &RegWidth,
2728 RegKind = IS_SPECIAL;
2735unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2736 unsigned &RegNum,
unsigned &RegWidth,
2740 auto Loc = getLoc();
2744 Error(Loc,
"invalid register name");
2745 return AMDGPU::NoRegister;
2753 if (!RegSuffix.
empty()) {
2756 Error(Loc,
"invalid register index");
2757 return AMDGPU::NoRegister;
2762 if (!ParseRegRange(RegNum, RegWidth))
2763 return AMDGPU::NoRegister;
2766 return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2769unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
unsigned &RegNum,
2772 unsigned Reg = AMDGPU::NoRegister;
2773 auto ListLoc = getLoc();
2776 "expected a register or a list of registers")) {
2777 return AMDGPU::NoRegister;
2782 auto Loc = getLoc();
2783 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2784 return AMDGPU::NoRegister;
2785 if (RegWidth != 32) {
2786 Error(Loc,
"expected a single 32-bit register");
2787 return AMDGPU::NoRegister;
2791 RegisterKind NextRegKind;
2792 unsigned NextReg, NextRegNum, NextRegWidth;
2795 if (!ParseAMDGPURegister(NextRegKind, NextReg,
2796 NextRegNum, NextRegWidth,
2798 return AMDGPU::NoRegister;
2800 if (NextRegWidth != 32) {
2801 Error(Loc,
"expected a single 32-bit register");
2802 return AMDGPU::NoRegister;
2804 if (NextRegKind != RegKind) {
2805 Error(Loc,
"registers in a list must be of the same kind");
2806 return AMDGPU::NoRegister;
2808 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2809 return AMDGPU::NoRegister;
2813 "expected a comma or a closing square bracket")) {
2814 return AMDGPU::NoRegister;
2818 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2823bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
2824 unsigned &RegNum,
unsigned &RegWidth,
2826 auto Loc = getLoc();
2827 Reg = AMDGPU::NoRegister;
2830 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2831 if (Reg == AMDGPU::NoRegister)
2832 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2834 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2838 if (Reg == AMDGPU::NoRegister) {
2843 if (!subtargetHasRegister(*
TRI, Reg)) {
2844 if (Reg == AMDGPU::SGPR_NULL) {
2845 Error(Loc,
"'null' operand is not supported on this GPU");
2847 Error(Loc,
"register not available on this GPU");
2855bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
2856 unsigned &RegNum,
unsigned &RegWidth,
2857 bool RestoreOnFailure ) {
2858 Reg = AMDGPU::NoRegister;
2861 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2862 if (RestoreOnFailure) {
2863 while (!Tokens.
empty()) {
2872std::optional<StringRef>
2873AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2876 return StringRef(
".amdgcn.next_free_vgpr");
2878 return StringRef(
".amdgcn.next_free_sgpr");
2880 return std::nullopt;
2884void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2885 auto SymbolName = getGprCountSymbolName(RegKind);
2886 assert(SymbolName &&
"initializing invalid register kind");
2887 MCSymbol *
Sym = getContext().getOrCreateSymbol(*SymbolName);
2891bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2892 unsigned DwordRegIndex,
2893 unsigned RegWidth) {
2898 auto SymbolName = getGprCountSymbolName(RegKind);
2901 MCSymbol *
Sym = getContext().getOrCreateSymbol(*SymbolName);
2903 int64_t NewMax = DwordRegIndex +
divideCeil(RegWidth, 32) - 1;
2906 if (!
Sym->isVariable())
2907 return !
Error(getLoc(),
2908 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2909 if (!
Sym->getVariableValue(
false)->evaluateAsAbsolute(OldCount))
2912 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2914 if (OldCount <= NewMax)
2920std::unique_ptr<AMDGPUOperand>
2921AMDGPUAsmParser::parseRegister(
bool RestoreOnFailure) {
2922 const auto &Tok = getToken();
2923 SMLoc StartLoc = Tok.getLoc();
2924 SMLoc EndLoc = Tok.getEndLoc();
2925 RegisterKind RegKind;
2926 unsigned Reg, RegNum, RegWidth;
2928 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2932 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2935 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2936 return AMDGPUOperand::CreateReg(
this, Reg, StartLoc, EndLoc);
2947 const auto& Tok = getToken();
2948 const auto& NextTok = peekToken();
2951 bool Negate =
false;
2967 APFloat RealVal(APFloat::IEEEdouble());
2968 auto roundMode = APFloat::rmNearestTiesToEven;
2969 if (
errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2973 RealVal.changeSign();
2976 AMDGPUOperand::CreateImm(
this, RealVal.bitcastToAPInt().getZExtValue(), S,
2977 AMDGPUOperand::ImmTyNone,
true));
2986 if (HasSP3AbsModifier) {
2995 if (getParser().parsePrimaryExpr(Expr, EndLoc,
nullptr))
3002 if (Expr->evaluateAsAbsolute(IntVal)) {
3003 Operands.push_back(AMDGPUOperand::CreateImm(
this, IntVal, S));
3005 Operands.push_back(AMDGPUOperand::CreateExpr(
this, Expr, S));
3019 if (
auto R = parseRegister()) {
3032 }
else if (isModifier()) {
3035 return parseImm(
Operands, HasSP3AbsMod);
3040AMDGPUAsmParser::isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3043 return str ==
"abs" || str ==
"neg" || str ==
"sext";
3049AMDGPUAsmParser::isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3054AMDGPUAsmParser::isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3055 return isNamedOperandModifier(Token, NextToken) || Token.
is(
AsmToken::Pipe);
3059AMDGPUAsmParser::isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3060 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3077AMDGPUAsmParser::isModifier() {
3081 peekTokens(NextToken);
3083 return isOperandModifier(Tok, NextToken[0]) ||
3084 (Tok.
is(
AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3085 isOpcodeModifierWithVal(Tok, NextToken[0]);
3111AMDGPUAsmParser::parseSP3NegModifier() {
3114 peekTokens(NextToken);
3117 (isRegister(NextToken[0], NextToken[1]) ||
3119 isId(NextToken[0],
"abs"))) {
3136 Error(getLoc(),
"invalid syntax, expected 'neg' modifier");
3140 SP3Neg = parseSP3NegModifier();
3143 Neg = trySkipId(
"neg");
3144 if (Neg && SP3Neg) {
3145 Error(Loc,
"expected register or immediate");
3151 Abs = trySkipId(
"abs");
3157 if (Abs && SP3Abs) {
3158 Error(Loc,
"expected register or immediate");
3164 Res = parseRegOrImm(
Operands, SP3Abs);
3172 if (SP3Abs && !skipToken(
AsmToken::Pipe,
"expected vertical bar"))
3179 AMDGPUOperand::Modifiers Mods;
3180 Mods.Abs = Abs || SP3Abs;
3181 Mods.Neg = Neg || SP3Neg;
3183 if (Mods.hasFPModifiers()) {
3184 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3186 Error(
Op.getStartLoc(),
"expected an absolute expression");
3189 Op.setModifiers(Mods);
3197 bool Sext = trySkipId(
"sext");
3198 if (Sext && !skipToken(
AsmToken::LParen,
"expected left paren after sext"))
3214 AMDGPUOperand::Modifiers Mods;
3217 if (Mods.hasIntModifiers()) {
3218 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3220 Error(
Op.getStartLoc(),
"expected an absolute expression");
3223 Op.setModifiers(Mods);
3231 return parseRegOrImmWithFPInputMods(
Operands,
false);
3236 return parseRegOrImmWithIntInputMods(
Operands,
false);
3240 auto Loc = getLoc();
3241 if (trySkipId(
"off")) {
3242 Operands.push_back(AMDGPUOperand::CreateImm(
this, 0, Loc,
3243 AMDGPUOperand::ImmTyOff,
false));
3250 std::unique_ptr<AMDGPUOperand>
Reg = parseRegister();
3252 Operands.push_back(std::move(Reg));
3260unsigned AMDGPUAsmParser::checkTargetMatchPredicate(
MCInst &Inst) {
3267 return Match_InvalidOperand;
3271 getForcedEncodingSize() != 64)
3272 return Match_PreferE32;
3274 if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3275 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3280 if (!
Op.isImm() ||
Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3281 return Match_InvalidOperand;
3285 return Match_Success;
3289 static const unsigned Variants[] = {
3300 if (isForcedDPP() && isForcedVOP3()) {
3304 if (getForcedEncodingSize() == 32) {
3309 if (isForcedVOP3()) {
3314 if (isForcedSDWA()) {
3320 if (isForcedDPP()) {
3328StringRef AMDGPUAsmParser::getMatchedVariantName()
const {
3329 if (isForcedDPP() && isForcedVOP3())
3332 if (getForcedEncodingSize() == 32)
3347unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(
const MCInst &Inst)
const {
3351 case AMDGPU::FLAT_SCR:
3353 case AMDGPU::VCC_LO:
3354 case AMDGPU::VCC_HI:
3361 return AMDGPU::NoRegister;
3368bool AMDGPUAsmParser::isInlineConstant(
const MCInst &Inst,
3369 unsigned OpIdx)
const {
3379 int64_t Val = MO.
getImm();
3411unsigned AMDGPUAsmParser::getConstantBusLimit(
unsigned Opcode)
const {
3417 case AMDGPU::V_LSHLREV_B64_e64:
3418 case AMDGPU::V_LSHLREV_B64_gfx10:
3419 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3420 case AMDGPU::V_LSHRREV_B64_e64:
3421 case AMDGPU::V_LSHRREV_B64_gfx10:
3422 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3423 case AMDGPU::V_ASHRREV_I64_e64:
3424 case AMDGPU::V_ASHRREV_I64_gfx10:
3425 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3426 case AMDGPU::V_LSHL_B64_e64:
3427 case AMDGPU::V_LSHR_B64_e64:
3428 case AMDGPU::V_ASHR_I64_e64:
3441 bool AddMandatoryLiterals =
false) {
3447 int16_t ImmDeferredIdx =
3464bool AMDGPUAsmParser::usesConstantBus(
const MCInst &Inst,
unsigned OpIdx) {
3467 return !isInlineConstant(Inst, OpIdx);
3468 }
else if (MO.
isReg()) {
3472 return isSGPR(PReg,
TRI) && PReg != SGPR_NULL;
3478bool AMDGPUAsmParser::validateConstantBusLimitations(
3480 const unsigned Opcode = Inst.
getOpcode();
3482 unsigned LastSGPR = AMDGPU::NoRegister;
3483 unsigned ConstantBusUseCount = 0;
3484 unsigned NumLiterals = 0;
3485 unsigned LiteralSize;
3500 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3501 if (SGPRUsed != AMDGPU::NoRegister) {
3502 SGPRsUsed.
insert(SGPRUsed);
3503 ++ConstantBusUseCount;
3508 for (
int OpIdx : OpIndices) {
3513 if (usesConstantBus(Inst, OpIdx)) {
3522 if (SGPRsUsed.
insert(LastSGPR).second) {
3523 ++ConstantBusUseCount;
3543 if (NumLiterals == 0) {
3546 }
else if (LiteralSize !=
Size) {
3552 ConstantBusUseCount += NumLiterals;
3554 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3560 Error(Loc,
"invalid operand (violates constant bus restrictions)");
3564bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3567 const unsigned Opcode = Inst.
getOpcode();
3573 auto getVRegIdx = [&](
unsigned,
unsigned OperandIdx) {
3581 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(getVRegIdx);
3582 if (!InvalidCompOprIdx)
3585 auto CompOprIdx = *InvalidCompOprIdx;
3587 std::max(InstInfo[
VOPD::X].getIndexInParsedOperands(CompOprIdx),
3588 InstInfo[
VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3591 auto Loc = ((AMDGPUOperand &)*
Operands[ParsedIdx]).getStartLoc();
3592 if (CompOprIdx == VOPD::Component::DST) {
3593 Error(Loc,
"one dst register must be even and the other odd");
3595 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3597 " operands must use different VGPR banks");
3603bool AMDGPUAsmParser::validateIntClampSupported(
const MCInst &Inst) {
3617bool AMDGPUAsmParser::validateMIMGDataSize(
const MCInst &Inst,
3618 const SMLoc &IDLoc) {
3636 unsigned TFESize = (TFEIdx != -1 && Inst.
getOperand(TFEIdx).
getImm()) ? 1 : 0;
3641 bool IsPackedD16 =
false;
3646 IsPackedD16 = D16Idx >= 0;
3648 DataSize = (DataSize + 1) / 2;
3651 if ((VDataSize / 4) == DataSize + TFESize)
3656 Modifiers = IsPackedD16 ?
"dmask and d16" :
"dmask";
3658 Modifiers = IsPackedD16 ?
"dmask, d16 and tfe" :
"dmask and tfe";
3660 Error(IDLoc,
Twine(
"image data size does not match ") + Modifiers);
3664bool AMDGPUAsmParser::validateMIMGAddrSize(
const MCInst &Inst) {
3682 assert(SrsrcIdx > VAddr0Idx);
3689 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3690 unsigned ActualAddrSize =
3691 IsNSA ? SrsrcIdx - VAddr0Idx
3695 unsigned ExpectedAddrSize =
3699 if (hasPartialNSAEncoding() && ExpectedAddrSize >
getNSAMaxSize()) {
3700 int VAddrLastIdx = SrsrcIdx - 1;
3701 unsigned VAddrLastSize =
3704 return VAddrLastIdx - VAddr0Idx + VAddrLastSize == ExpectedAddrSize;
3707 if (ExpectedAddrSize > 12)
3708 ExpectedAddrSize = 16;
3713 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3717 return ActualAddrSize == ExpectedAddrSize;
3720bool AMDGPUAsmParser::validateMIMGAtomicDMask(
const MCInst &Inst) {
3737 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3740bool AMDGPUAsmParser::validateMIMGGatherDMask(
const MCInst &Inst) {
3756 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3759bool AMDGPUAsmParser::validateMIMGMSAA(
const MCInst &Inst) {
3770 if (!BaseOpcode->
MSAA)
3779 return DimInfo->
MSAA;
3785 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3786 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3787 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3797bool AMDGPUAsmParser::validateMovrels(
const MCInst &Inst,
3821 Error(ErrLoc,
"source operand must be a VGPR");
3825bool AMDGPUAsmParser::validateMAIAccWrite(
const MCInst &Inst,
3830 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3844 "source operand must be either a VGPR or an inline constant");
3851bool AMDGPUAsmParser::validateMAISrc2(
const MCInst &Inst,
3857 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
3864 if (Inst.
getOperand(Src2Idx).
isImm() && isInlineConstant(Inst, Src2Idx)) {
3866 "inline constants are not allowed for this operand");
3873bool AMDGPUAsmParser::validateMFMA(
const MCInst &Inst,
3891 if (Src2Reg == DstReg)
3895 if (
TRI->getRegClass(Desc.
operands()[0].RegClass).getSizeInBits() <= 128)
3898 if (
TRI->regsOverlap(Src2Reg, DstReg)) {
3900 "source 2 operand must not partially overlap with dst");
3907bool AMDGPUAsmParser::validateDivScale(
const MCInst &Inst) {
3911 case V_DIV_SCALE_F32_gfx6_gfx7:
3912 case V_DIV_SCALE_F32_vi:
3913 case V_DIV_SCALE_F32_gfx10:
3914 case V_DIV_SCALE_F64_gfx6_gfx7:
3915 case V_DIV_SCALE_F64_vi:
3916 case V_DIV_SCALE_F64_gfx10:
3922 for (
auto Name : {AMDGPU::OpName::src0_modifiers,
3923 AMDGPU::OpName::src2_modifiers,
3924 AMDGPU::OpName::src2_modifiers}) {
3935bool AMDGPUAsmParser::validateMIMGD16(
const MCInst &Inst) {
3955 case AMDGPU::V_SUBREV_F32_e32:
3956 case AMDGPU::V_SUBREV_F32_e64:
3957 case AMDGPU::V_SUBREV_F32_e32_gfx10:
3958 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3959 case AMDGPU::V_SUBREV_F32_e32_vi:
3960 case AMDGPU::V_SUBREV_F32_e64_gfx10:
3961 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3962 case AMDGPU::V_SUBREV_F32_e64_vi:
3964 case AMDGPU::V_SUBREV_CO_U32_e32:
3965 case AMDGPU::V_SUBREV_CO_U32_e64:
3966 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3967 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3969 case AMDGPU::V_SUBBREV_U32_e32:
3970 case AMDGPU::V_SUBBREV_U32_e64:
3971 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3972 case AMDGPU::V_SUBBREV_U32_e32_vi:
3973 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3974 case AMDGPU::V_SUBBREV_U32_e64_vi:
3976 case AMDGPU::V_SUBREV_U32_e32:
3977 case AMDGPU::V_SUBREV_U32_e64:
3978 case AMDGPU::V_SUBREV_U32_e32_gfx9:
3979 case AMDGPU::V_SUBREV_U32_e32_vi:
3980 case AMDGPU::V_SUBREV_U32_e64_gfx9:
3981 case AMDGPU::V_SUBREV_U32_e64_vi:
3983 case AMDGPU::V_SUBREV_F16_e32:
3984 case AMDGPU::V_SUBREV_F16_e64:
3985 case AMDGPU::V_SUBREV_F16_e32_gfx10:
3986 case AMDGPU::V_SUBREV_F16_e32_vi:
3987 case AMDGPU::V_SUBREV_F16_e64_gfx10:
3988 case AMDGPU::V_SUBREV_F16_e64_vi:
3990 case AMDGPU::V_SUBREV_U16_e32:
3991 case AMDGPU::V_SUBREV_U16_e64:
3992 case AMDGPU::V_SUBREV_U16_e32_vi:
3993 case AMDGPU::V_SUBREV_U16_e64_vi:
3995 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3996 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3997 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3999 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4000 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4002 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4003 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4005 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4006 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4008 case AMDGPU::V_LSHRREV_B32_e32:
4009 case AMDGPU::V_LSHRREV_B32_e64:
4010 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4011 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4012 case AMDGPU::V_LSHRREV_B32_e32_vi:
4013 case AMDGPU::V_LSHRREV_B32_e64_vi:
4014 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4015 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4017 case AMDGPU::V_ASHRREV_I32_e32:
4018 case AMDGPU::V_ASHRREV_I32_e64:
4019 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4020 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4021 case AMDGPU::V_ASHRREV_I32_e32_vi:
4022 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4023 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4024 case AMDGPU::V_ASHRREV_I32_e64_vi:
4026 case AMDGPU::V_LSHLREV_B32_e32:
4027 case AMDGPU::V_LSHLREV_B32_e64:
4028 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4029 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4030 case AMDGPU::V_LSHLREV_B32_e32_vi:
4031 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4032 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4033 case AMDGPU::V_LSHLREV_B32_e64_vi:
4035 case AMDGPU::V_LSHLREV_B16_e32:
4036 case AMDGPU::V_LSHLREV_B16_e64:
4037 case AMDGPU::V_LSHLREV_B16_e32_vi:
4038 case AMDGPU::V_LSHLREV_B16_e64_vi:
4039 case AMDGPU::V_LSHLREV_B16_gfx10:
4041 case AMDGPU::V_LSHRREV_B16_e32:
4042 case AMDGPU::V_LSHRREV_B16_e64:
4043 case AMDGPU::V_LSHRREV_B16_e32_vi:
4044 case AMDGPU::V_LSHRREV_B16_e64_vi:
4045 case AMDGPU::V_LSHRREV_B16_gfx10:
4047 case AMDGPU::V_ASHRREV_I16_e32:
4048 case AMDGPU::V_ASHRREV_I16_e64:
4049 case AMDGPU::V_ASHRREV_I16_e32_vi:
4050 case AMDGPU::V_ASHRREV_I16_e64_vi:
4051 case AMDGPU::V_ASHRREV_I16_gfx10:
4053 case AMDGPU::V_LSHLREV_B64_e64:
4054 case AMDGPU::V_LSHLREV_B64_gfx10:
4055 case AMDGPU::V_LSHLREV_B64_vi:
4057 case AMDGPU::V_LSHRREV_B64_e64:
4058 case AMDGPU::V_LSHRREV_B64_gfx10:
4059 case AMDGPU::V_LSHRREV_B64_vi:
4061 case AMDGPU::V_ASHRREV_I64_e64:
4062 case AMDGPU::V_ASHRREV_I64_gfx10:
4063 case AMDGPU::V_ASHRREV_I64_vi:
4065 case AMDGPU::V_PK_LSHLREV_B16:
4066 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4067 case AMDGPU::V_PK_LSHLREV_B16_vi:
4069 case AMDGPU::V_PK_LSHRREV_B16:
4070 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4071 case AMDGPU::V_PK_LSHRREV_B16_vi:
4072 case AMDGPU::V_PK_ASHRREV_I16:
4073 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4074 case AMDGPU::V_PK_ASHRREV_I16_vi:
4081std::optional<StringRef>
4082AMDGPUAsmParser::validateLdsDirect(
const MCInst &Inst) {
4084 using namespace SIInstrFlags;
4085 const unsigned Opcode = Inst.
getOpcode();
4091 if ((Desc.
TSFlags & Enc) == 0)
4092 return std::nullopt;
4094 for (
auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4099 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4102 return StringRef(
"lds_direct is not supported on this GPU");
4105 return StringRef(
"lds_direct cannot be used with this instruction");
4107 if (SrcName != OpName::src0)
4108 return StringRef(
"lds_direct may be used as src0 only");
4112 return std::nullopt;
4116 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
4117 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4118 if (
Op.isFlatOffset())
4119 return Op.getStartLoc();
4124bool AMDGPUAsmParser::validateFlatOffset(
const MCInst &Inst,
4135 if (!hasFlatOffsets() &&
Op.getImm() != 0) {
4137 "flat offset modifier is not supported on this GPU");
4144 bool AllowNegative =
4146 if (!
isIntN(OffsetSize,
Op.getImm()) || (!AllowNegative &&
Op.getImm() < 0)) {
4148 Twine(
"expected a ") +
4149 (AllowNegative ?
Twine(OffsetSize) +
"-bit signed offset"
4150 :
Twine(OffsetSize - 1) +
"-bit unsigned offset"));
4159 for (
unsigned i = 2, e =
Operands.size(); i != e; ++i) {
4160 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4161 if (
Op.isSMEMOffset() ||
Op.isSMEMOffsetMod())
4162 return Op.getStartLoc();
4167bool AMDGPUAsmParser::validateSMEMOffset(
const MCInst &Inst,
4192 (
isVI() || IsBuffer) ?
"expected a 20-bit unsigned offset" :
4193 "expected a 21-bit signed offset");
4198bool AMDGPUAsmParser::validateSOPLiteral(
const MCInst &Inst)
const {
4207 const int OpIndices[] = { Src0Idx, Src1Idx };
4209 unsigned NumExprs = 0;
4210 unsigned NumLiterals = 0;
4213 for (
int OpIdx : OpIndices) {
4214 if (OpIdx == -1)
break;
4219 if (MO.
isImm() && !isInlineConstant(Inst, OpIdx)) {
4221 if (NumLiterals == 0 || LiteralValue !=
Value) {
4225 }
else if (MO.
isExpr()) {
4231 return NumLiterals + NumExprs <= 1;
4234bool AMDGPUAsmParser::validateOpSel(
const MCInst &Inst) {
4248 if (OpSelIdx != -1) {
4253 if (OpSelHiIdx != -1) {
4271bool AMDGPUAsmParser::validateDPP(
const MCInst &Inst,
4285 Error(S,
"64 bit dpp only supports row_newbcast");
4294bool AMDGPUAsmParser::validateVccOperand(
unsigned Reg)
const {
4295 auto FB = getFeatureBits();
4296 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4297 (FB[AMDGPU::FeatureWavefrontSize32] &&
Reg == AMDGPU::VCC_LO);
4301bool AMDGPUAsmParser::validateVOPLiteral(
const MCInst &Inst,
4307 !HasMandatoryLiteral && !
isVOPD(Opcode))
4312 unsigned NumExprs = 0;
4313 unsigned NumLiterals = 0;
4316 for (
int OpIdx : OpIndices) {
4326 if (MO.
isImm() && !isInlineConstant(Inst, OpIdx)) {
4328 if (NumLiterals == 0 || LiteralValue !=
Value) {
4332 }
else if (MO.
isExpr()) {
4336 NumLiterals += NumExprs;
4341 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4342 Error(getLitLoc(
Operands),
"literal operands are not supported");
4346 if (NumLiterals > 1) {
4347 Error(getLitLoc(
Operands,
true),
"only one unique literal operand is allowed");
4365 unsigned Sub =
MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4366 auto Reg = Sub ? Sub : Op.getReg();
4368 return AGPR32.
contains(Reg) ? 1 : 0;
4371bool AMDGPUAsmParser::validateAGPRLdSt(
const MCInst &Inst)
const {
4379 : AMDGPU::OpName::vdata;
4387 if (Data2Areg >= 0 && Data2Areg != DataAreg)
4391 auto FB = getFeatureBits();
4392 if (FB[AMDGPU::FeatureGFX90AInsts]) {
4393 if (DataAreg < 0 || DstAreg < 0)
4395 return DstAreg == DataAreg;
4398 return DstAreg < 1 && DataAreg < 1;
4401bool AMDGPUAsmParser::validateVGPRAlign(
const MCInst &Inst)
const {
4402 auto FB = getFeatureBits();
4403 if (!FB[AMDGPU::FeatureGFX90AInsts])
4414 unsigned Sub =
MRI->getSubReg(
Op.getReg(), AMDGPU::sub0);
4418 if (VGPR32.
contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4420 if (AGPR32.
contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4428 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
4429 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4431 return Op.getStartLoc();
4436bool AMDGPUAsmParser::validateBLGP(
const MCInst &Inst,
4446 auto FB = getFeatureBits();
4447 bool UsesNeg =
false;
4448 if (FB[AMDGPU::FeatureGFX940Insts]) {
4450 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4451 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4452 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4453 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4458 if (IsNeg == UsesNeg)
4462 UsesNeg ?
"invalid modifier: blgp is not supported"
4463 :
"invalid modifier: neg is not supported");
4468bool AMDGPUAsmParser::validateWaitCnt(
const MCInst &Inst,
4474 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4475 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4476 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4477 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4483 if (Reg == AMDGPU::SGPR_NULL)
4487 Error(RegLoc,
"src0 must be null");
4493bool AMDGPUAsmParser::validateGWS(
const MCInst &Inst,
4495 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4499 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4500 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4509 auto RegIdx =
Reg - (VGPR32.
contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4512 Error(RegLoc,
"vgpr must be even aligned");
4519bool AMDGPUAsmParser::validateCoherencyBits(
const MCInst &Inst,
4521 const SMLoc &IDLoc) {
4523 AMDGPU::OpName::cpol);
4533 Error(S,
"cache policy is not supported for SMRD instructions");
4537 Error(IDLoc,
"invalid cache policy for SMEM instruction");
4546 Error(S,
"scc is not supported on this GPU");
4556 :
"instruction must use glc");
4564 &CStr.data()[CStr.find(
isGFX940() ?
"sc0" :
"glc")]);
4566 :
"instruction must not use glc");
4578 if (!Operand->isReg())
4580 unsigned Reg = Operand->getReg();
4581 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
4583 "execz and vccz are not supported on this GPU");
4590bool AMDGPUAsmParser::validateTFE(
const MCInst &Inst,
4597 Error(Loc,
"TFE modifier has no meaning for store instructions");
4605bool AMDGPUAsmParser::validateInstruction(
const MCInst &Inst,
4608 if (
auto ErrMsg = validateLdsDirect(Inst)) {
4612 if (!validateSOPLiteral(Inst)) {
4614 "only one unique literal operand is allowed");
4617 if (!validateVOPLiteral(Inst,
Operands)) {
4620 if (!validateConstantBusLimitations(Inst,
Operands)) {
4623 if (!validateVOPDRegBankConstraints(Inst,
Operands)) {
4626 if (!validateIntClampSupported(Inst)) {
4628 "integer clamping is not supported on this GPU");
4631 if (!validateOpSel(Inst)) {
4633 "invalid op_sel operand");
4636 if (!validateDPP(Inst,
Operands)) {
4640 if (!validateMIMGD16(Inst)) {
4642 "d16 modifier is not supported on this GPU");
4645 if (!validateMIMGMSAA(Inst)) {
4647 "invalid dim; must be MSAA type");
4650 if (!validateMIMGDataSize(Inst, IDLoc)) {
4653 if (!validateMIMGAddrSize(Inst)) {
4655 "image address size does not match dim and a16");
4658 if (!validateMIMGAtomicDMask(Inst)) {
4660 "invalid atomic image dmask");
4663 if (!validateMIMGGatherDMask(Inst)) {
4665 "invalid image_gather dmask: only one bit must be set");
4668 if (!validateMovrels(Inst,
Operands)) {
4671 if (!validateFlatOffset(Inst,
Operands)) {
4674 if (!validateSMEMOffset(Inst,
Operands)) {
4677 if (!validateMAIAccWrite(Inst,
Operands)) {
4680 if (!validateMAISrc2(Inst,
Operands)) {
4683 if (!validateMFMA(Inst,
Operands)) {
4686 if (!validateCoherencyBits(Inst,
Operands, IDLoc)) {
4690 if (!validateAGPRLdSt(Inst)) {
4691 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4692 ?
"invalid register class: data and dst should be all VGPR or AGPR"
4693 :
"invalid register class: agpr loads and stores not supported on this GPU"
4697 if (!validateVGPRAlign(Inst)) {
4699 "invalid register class: vgpr tuples must be 64 bit aligned");
4702 if (!validateGWS(Inst,
Operands)) {
4706 if (!validateBLGP(Inst,
Operands)) {
4710 if (!validateDivScale(Inst)) {
4711 Error(IDLoc,
"ABS not allowed in VOP3B instructions");
4714 if (!validateWaitCnt(Inst,
Operands)) {
4717 if (!validateExeczVcczOperands(
Operands)) {
4720 if (!validateTFE(Inst,
Operands)) {
4729 unsigned VariantID = 0);
4733 unsigned VariantID);
4735bool AMDGPUAsmParser::isSupportedMnemo(
StringRef Mnemo,
4740bool AMDGPUAsmParser::isSupportedMnemo(
StringRef Mnemo,
4743 for (
auto Variant : Variants) {
4751bool AMDGPUAsmParser::checkUnsupportedInstruction(
StringRef Mnemo,
4752 const SMLoc &IDLoc) {
4753 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
4756 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4761 getParser().clearPendingErrors();
4765 StringRef VariantName = getMatchedVariantName();
4766 if (!VariantName.
empty() && isSupportedMnemo(Mnemo, FBS)) {
4769 " variant of this instruction is not supported"));
4773 if (
isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
4774 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
4777 FeaturesWS32.
flip(AMDGPU::FeatureWavefrontSize64)
4778 .
flip(AMDGPU::FeatureWavefrontSize32);
4780 ComputeAvailableFeatures(FeaturesWS32);
4782 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
4783 return Error(IDLoc,
"instruction requires wavesize=32");
4788 return Error(IDLoc,
"instruction not supported on this GPU");
4793 return Error(IDLoc,
"invalid instruction" + Suggestion);
4799 const auto &Op = ((AMDGPUOperand &)*
Operands[InvalidOprIdx]);
4800 if (Op.isToken() && InvalidOprIdx > 1) {
4801 const auto &PrevOp = ((AMDGPUOperand &)*
Operands[InvalidOprIdx - 1]);
4802 return PrevOp.isToken() && PrevOp.getToken() ==
"::";
4807bool AMDGPUAsmParser::MatchAndEmitInstruction(
SMLoc IDLoc,
unsigned &Opcode,
4811 bool MatchingInlineAsm) {
4813 unsigned Result = Match_Success;
4814 for (
auto Variant : getMatchedVariants()) {
4816 auto R = MatchInstructionImpl(
Operands, Inst, EI, MatchingInlineAsm,
4821 if ((R == Match_Success) ||
4822 (R == Match_PreferE32) ||
4823 (R == Match_MissingFeature && Result != Match_PreferE32) ||
4824 (R == Match_InvalidOperand && Result != Match_MissingFeature
4825 && Result != Match_PreferE32) ||
4826 (R == Match_MnemonicFail && Result != Match_InvalidOperand
4827 && Result != Match_MissingFeature
4828 && Result != Match_PreferE32)) {
4832 if (R == Match_Success)
4836 if (Result == Match_Success) {
4837 if (!validateInstruction(Inst, IDLoc,
Operands)) {
4846 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4852 case Match_MissingFeature:
4856 return Error(IDLoc,
"operands are not valid for this GPU or mode");
4858 case Match_InvalidOperand: {
4859 SMLoc ErrorLoc = IDLoc;
4862 return Error(IDLoc,
"too few operands for instruction");
4865 if (ErrorLoc ==
SMLoc())
4869 return Error(ErrorLoc,
"invalid VOPDY instruction");
4871 return Error(ErrorLoc,
"invalid operand for instruction");
4874 case Match_PreferE32:
4875 return Error(IDLoc,
"internal error: instruction without _e64 suffix "
4876 "should be encoded as e32");
4877 case Match_MnemonicFail:
4883bool AMDGPUAsmParser::ParseAsAbsoluteExpression(
uint32_t &Ret) {
4888 if (getParser().parseAbsoluteExpression(Tmp)) {
4895bool AMDGPUAsmParser::ParseDirectiveMajorMinor(
uint32_t &Major,
4897 if (ParseAsAbsoluteExpression(Major))
4898 return TokError(
"invalid major version");
4901 return TokError(
"minor version number required, comma expected");
4903 if (ParseAsAbsoluteExpression(Minor))
4904 return TokError(
"invalid minor version");
4909bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4911 return TokError(
"directive only supported for amdgcn architecture");
4913 std::string TargetIDDirective;
4914 SMLoc TargetStart = getTok().getLoc();
4915 if (getParser().parseEscapedString(TargetIDDirective))
4919 if (getTargetStreamer().getTargetID()->
toString() != TargetIDDirective)
4920 return getParser().Error(TargetRange.
Start,
4921 (
Twine(
".amdgcn_target directive's target id ") +
4922 Twine(TargetIDDirective) +
4923 Twine(
" does not match the specified target id ") +
4924 Twine(getTargetStreamer().getTargetID()->
toString())).str());
4929bool AMDGPUAsmParser::OutOfRangeError(
SMRange Range) {
4930 return Error(
Range.Start,
"value out of range", Range);
4933bool AMDGPUAsmParser::calculateGPRBlocks(
4934 const FeatureBitset &Features,
bool VCCUsed,
bool FlatScrUsed,
4935 bool XNACKUsed, std::optional<bool> EnableWavefrontSize32,
4936 unsigned NextFreeVGPR,
SMRange VGPRRange,
unsigned NextFreeSGPR,
4937 SMRange SGPRRange,
unsigned &VGPRBlocks,
unsigned &SGPRBlocks) {
4948 unsigned MaxAddressableNumSGPRs =
4951 if (
Version.Major >= 8 && !Features.
test(FeatureSGPRInitBug) &&
4952 NumSGPRs > MaxAddressableNumSGPRs)
4953 return OutOfRangeError(SGPRRange);
4958 if ((
Version.Major <= 7 || Features.
test(FeatureSGPRInitBug)) &&
4959 NumSGPRs > MaxAddressableNumSGPRs)
4960 return OutOfRangeError(SGPRRange);
4962 if (Features.
test(FeatureSGPRInitBug))
4973bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4975 return TokError(
"directive only supported for amdgcn architecture");
4978 return TokError(
"directive only supported for amdhsa OS");
4981 if (getParser().parseIdentifier(KernelName))
4998 unsigned ImpliedUserSGPRCount = 0;
5002 std::optional<unsigned> ExplicitUserSGPRCount;
5003 bool ReserveVCC =
true;
5004 bool ReserveFlatScr =
true;
5005 std::optional<bool> EnableWavefrontSize32;
5011 SMRange IDRange = getTok().getLocRange();
5012 if (!parseId(
ID,
"expected .amdhsa_ directive or .end_amdhsa_kernel"))
5015 if (
ID ==
".end_amdhsa_kernel")
5019 return TokError(
".amdhsa_ directives cannot be repeated");
5021 SMLoc ValStart = getLoc();
5023 if (getParser().parseAbsoluteExpression(IVal))
5025 SMLoc ValEnd = getLoc();
5029 return OutOfRangeError(ValRange);
5033#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
5034 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
5035 return OutOfRangeError(RANGE); \
5036 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
5038 if (
ID ==
".amdhsa_group_segment_fixed_size") {
5040 return OutOfRangeError(ValRange);
5042 }
else if (
ID ==
".amdhsa_private_segment_fixed_size") {
5044 return OutOfRangeError(ValRange);
5046 }
else if (
ID ==
".amdhsa_kernarg_size") {
5048 return OutOfRangeError(ValRange);
5050 }
else if (
ID ==
".amdhsa_user_sgpr_count") {
5051 ExplicitUserSGPRCount = Val;
5052 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_buffer") {
5055 "directive is not supported with architected flat scratch",
5058 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5061 ImpliedUserSGPRCount += 4;
5062 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_ptr") {
5064 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
5067 ImpliedUserSGPRCount += 2;
5068 }
else if (
ID ==
".amdhsa_user_sgpr_queue_ptr") {
5070 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
5073 ImpliedUserSGPRCount += 2;
5074 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_segment_ptr") {
5076 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5079 ImpliedUserSGPRCount += 2;
5080 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_id") {
5082 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
5085 ImpliedUserSGPRCount += 2;
5086 }
else if (
ID ==
".amdhsa_user_sgpr_flat_scratch_init") {
5089 "directive is not supported with architected flat scratch",
5092 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
5095 ImpliedUserSGPRCount += 2;
5096 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_size") {
5098 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5101 ImpliedUserSGPRCount += 1;
5102 }
else if (
ID ==
".amdhsa_wavefront_size32") {
5103 if (IVersion.
Major < 10)
5104 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5105 EnableWavefrontSize32 = Val;
5107 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
5109 }
else if (
ID ==
".amdhsa_uses_dynamic_stack") {
5111 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange);
5112 }
else if (
ID ==
".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5115 "directive is not supported with architected flat scratch",
5118 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5119 }
else if (
ID ==
".amdhsa_enable_private_segment") {
5123 "directive is not supported without architected flat scratch",
5126 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5127 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_x") {
5129 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
5131 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_y") {
5133 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
5135 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_z") {
5137 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
5139 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_info") {
5141 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
5143 }
else if (
ID ==
".amdhsa_system_vgpr_workitem_id") {
5145 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
5147 }
else if (
ID ==
".amdhsa_next_free_vgpr") {
5148 VGPRRange = ValRange;
5150 }
else if (
ID ==
".amdhsa_next_free_sgpr") {
5151 SGPRRange = ValRange;
5153 }
else if (
ID ==
".amdhsa_accum_offset") {
5155 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5157 }
else if (
ID ==
".amdhsa_reserve_vcc") {
5158 if (!isUInt<1>(Val))
5159 return OutOfRangeError(ValRange);
5161 }
else if (
ID ==
".amdhsa_reserve_flat_scratch") {
5162 if (IVersion.
Major < 7)
5163 return Error(IDRange.
Start,
"directive requires gfx7+", IDRange);
5166 "directive is not supported with architected flat scratch",
5168 if (!isUInt<1>(Val))
5169 return OutOfRangeError(ValRange);
5170 ReserveFlatScr = Val;
5171 }
else if (
ID ==
".amdhsa_reserve_xnack_mask") {
5172 if (IVersion.
Major < 8)
5173 return Error(IDRange.
Start,
"directive requires gfx8+", IDRange);
5174 if (!isUInt<1>(Val))
5175 return OutOfRangeError(ValRange);
5176 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5177 return getParser().Error(IDRange.
Start,
".amdhsa_reserve_xnack_mask does not match target id",
5179 }
else if (
ID ==
".amdhsa_float_round_mode_32") {
5181 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
5182 }
else if (
ID ==
".amdhsa_float_round_mode_16_64") {
5184 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
5185 }
else if (
ID ==
".amdhsa_float_denorm_mode_32") {
5187 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
5188 }
else if (
ID ==
".amdhsa_float_denorm_mode_16_64") {
5190 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
5192 }
else if (
ID ==
".amdhsa_dx10_clamp") {
5194 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
5195 }
else if (
ID ==
".amdhsa_ieee_mode") {
5198 }
else if (
ID ==
".amdhsa_fp16_overflow") {
5199 if (IVersion.
Major < 9)
5200 return Error(IDRange.
Start,
"directive requires gfx9+", IDRange);
5203 }
else if (
ID ==
".amdhsa_tg_split") {
5205 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5208 }
else if (
ID ==
".amdhsa_workgroup_processor_mode") {
5209 if (IVersion.
Major < 10)
5210 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5213 }
else if (
ID ==
".amdhsa_memory_ordered") {
5214 if (IVersion.
Major < 10)
5215 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5218 }
else if (
ID ==
".amdhsa_forward_progress") {
5219 if (IVersion.
Major < 10)
5220 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5223 }
else if (
ID ==
".amdhsa_shared_vgpr_count") {
5224 if (IVersion.
Major < 10)
5225 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5226 SharedVGPRCount = Val;
5228 COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT, Val,
5230 }
else if (
ID ==
".amdhsa_exception_fp_ieee_invalid_op") {