51enum RegisterKind { IS_UNKNOWN,
IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
65 SMLoc StartLoc, EndLoc;
66 const AMDGPUAsmParser *AsmParser;
69 AMDGPUOperand(KindTy Kind_,
const AMDGPUAsmParser *AsmParser_)
70 :
Kind(Kind_), AsmParser(AsmParser_) {}
72 using Ptr = std::unique_ptr<AMDGPUOperand>;
79 bool hasFPModifiers()
const {
return Abs || Neg; }
80 bool hasIntModifiers()
const {
return Sext; }
81 bool hasModifiers()
const {
return hasFPModifiers() || hasIntModifiers(); }
83 int64_t getFPModifiersOperand()
const {
90 int64_t getIntModifiersOperand()
const {
96 int64_t getModifiersOperand()
const {
97 assert(!(hasFPModifiers() && hasIntModifiers())
98 &&
"fp and int modifiers should not be used simultaneously");
99 if (hasFPModifiers()) {
100 return getFPModifiersOperand();
101 }
else if (hasIntModifiers()) {
102 return getIntModifiersOperand();
180 ImmKindTyMandatoryLiteral,
194 mutable ImmKindTy
Kind;
211 bool isToken()
const override {
return Kind == Token; }
213 bool isSymbolRefExpr()
const {
214 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
217 bool isImm()
const override {
218 return Kind == Immediate;
221 void setImmKindNone()
const {
223 Imm.Kind = ImmKindTyNone;
226 void setImmKindLiteral()
const {
228 Imm.Kind = ImmKindTyLiteral;
231 void setImmKindMandatoryLiteral()
const {
233 Imm.Kind = ImmKindTyMandatoryLiteral;
236 void setImmKindConst()
const {
238 Imm.Kind = ImmKindTyConst;
241 bool IsImmKindLiteral()
const {
242 return isImm() &&
Imm.Kind == ImmKindTyLiteral;
245 bool IsImmKindMandatoryLiteral()
const {
246 return isImm() &&
Imm.Kind == ImmKindTyMandatoryLiteral;
249 bool isImmKindConst()
const {
250 return isImm() &&
Imm.Kind == ImmKindTyConst;
253 bool isInlinableImm(
MVT type)
const;
254 bool isLiteralImm(
MVT type)
const;
256 bool isRegKind()
const {
260 bool isReg()
const override {
261 return isRegKind() && !hasModifiers();
264 bool isRegOrInline(
unsigned RCID,
MVT type)
const {
265 return isRegClass(RCID) || isInlinableImm(type);
269 return isRegOrInline(RCID, type) || isLiteralImm(type);
272 bool isRegOrImmWithInt16InputMods()
const {
276 bool isRegOrImmWithInt32InputMods()
const {
280 bool isRegOrInlineImmWithInt16InputMods()
const {
281 return isRegOrInline(AMDGPU::VS_32RegClassID,
MVT::i16);
284 bool isRegOrInlineImmWithInt32InputMods()
const {
285 return isRegOrInline(AMDGPU::VS_32RegClassID,
MVT::i32);
288 bool isRegOrImmWithInt64InputMods()
const {
292 bool isRegOrImmWithFP16InputMods()
const {
296 bool isRegOrImmWithFP32InputMods()
const {
300 bool isRegOrImmWithFP64InputMods()
const {
304 bool isRegOrInlineImmWithFP16InputMods()
const {
305 return isRegOrInline(AMDGPU::VS_32RegClassID,
MVT::f16);
308 bool isRegOrInlineImmWithFP32InputMods()
const {
309 return isRegOrInline(AMDGPU::VS_32RegClassID,
MVT::f32);
313 bool isVReg()
const {
314 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
315 isRegClass(AMDGPU::VReg_64RegClassID) ||
316 isRegClass(AMDGPU::VReg_96RegClassID) ||
317 isRegClass(AMDGPU::VReg_128RegClassID) ||
318 isRegClass(AMDGPU::VReg_160RegClassID) ||
319 isRegClass(AMDGPU::VReg_192RegClassID) ||
320 isRegClass(AMDGPU::VReg_256RegClassID) ||
321 isRegClass(AMDGPU::VReg_512RegClassID) ||
322 isRegClass(AMDGPU::VReg_1024RegClassID);
325 bool isVReg32()
const {
326 return isRegClass(AMDGPU::VGPR_32RegClassID);
329 bool isVReg32OrOff()
const {
330 return isOff() || isVReg32();
333 bool isNull()
const {
334 return isRegKind() &&
getReg() == AMDGPU::SGPR_NULL;
337 bool isVRegWithInputMods()
const;
338 bool isT16VRegWithInputMods()
const;
340 bool isSDWAOperand(
MVT type)
const;
341 bool isSDWAFP16Operand()
const;
342 bool isSDWAFP32Operand()
const;
343 bool isSDWAInt16Operand()
const;
344 bool isSDWAInt32Operand()
const;
346 bool isImmTy(ImmTy ImmT)
const {
350 bool isImmModifier()
const {
351 return isImm() &&
Imm.Type != ImmTyNone;
354 bool isClampSI()
const {
return isImmTy(ImmTyClampSI); }
355 bool isOModSI()
const {
return isImmTy(ImmTyOModSI); }
356 bool isDMask()
const {
return isImmTy(ImmTyDMask); }
357 bool isDim()
const {
return isImmTy(ImmTyDim); }
358 bool isUNorm()
const {
return isImmTy(ImmTyUNorm); }
359 bool isDA()
const {
return isImmTy(ImmTyDA); }
360 bool isR128A16()
const {
return isImmTy(ImmTyR128A16); }
361 bool isA16()
const {
return isImmTy(ImmTyA16); }
362 bool isLWE()
const {
return isImmTy(ImmTyLWE); }
363 bool isOff()
const {
return isImmTy(ImmTyOff); }
364 bool isExpTgt()
const {
return isImmTy(ImmTyExpTgt); }
365 bool isExpVM()
const {
return isImmTy(ImmTyExpVM); }
366 bool isExpCompr()
const {
return isImmTy(ImmTyExpCompr); }
367 bool isOffen()
const {
return isImmTy(ImmTyOffen); }
368 bool isIdxen()
const {
return isImmTy(ImmTyIdxen); }
369 bool isAddr64()
const {
return isImmTy(ImmTyAddr64); }
370 bool isOffset()
const {
return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
371 bool isOffset0()
const {
return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
372 bool isOffset1()
const {
return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
374 bool isFlatOffset()
const {
return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
375 bool isGDS()
const {
return isImmTy(ImmTyGDS); }
376 bool isLDS()
const {
return isImmTy(ImmTyLDS); }
377 bool isCPol()
const {
return isImmTy(ImmTyCPol); }
378 bool isSWZ()
const {
return isImmTy(ImmTySWZ); }
379 bool isTFE()
const {
return isImmTy(ImmTyTFE); }
380 bool isD16()
const {
return isImmTy(ImmTyD16); }
381 bool isFORMAT()
const {
return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
382 bool isBankMask()
const {
return isImmTy(ImmTyDppBankMask); }
383 bool isRowMask()
const {
return isImmTy(ImmTyDppRowMask); }
384 bool isDppBoundCtrl()
const {
return isImmTy(ImmTyDppBoundCtrl); }
385 bool isFI()
const {
return isImmTy(ImmTyDppFi); }
386 bool isSDWADstSel()
const {
return isImmTy(ImmTySdwaDstSel); }
387 bool isSDWASrc0Sel()
const {
return isImmTy(ImmTySdwaSrc0Sel); }
388 bool isSDWASrc1Sel()
const {
return isImmTy(ImmTySdwaSrc1Sel); }
389 bool isSDWADstUnused()
const {
return isImmTy(ImmTySdwaDstUnused); }
390 bool isInterpSlot()
const {
return isImmTy(ImmTyInterpSlot); }
391 bool isInterpAttr()
const {
return isImmTy(ImmTyInterpAttr); }
392 bool isAttrChan()
const {
return isImmTy(ImmTyAttrChan); }
393 bool isOpSel()
const {
return isImmTy(ImmTyOpSel); }
394 bool isOpSelHi()
const {
return isImmTy(ImmTyOpSelHi); }
395 bool isNegLo()
const {
return isImmTy(ImmTyNegLo); }
396 bool isNegHi()
const {
return isImmTy(ImmTyNegHi); }
397 bool isHigh()
const {
return isImmTy(ImmTyHigh); }
399 bool isRegOrImm()
const {
403 bool isRegClass(
unsigned RCID)
const;
407 bool isRegOrInlineNoMods(
unsigned RCID,
MVT type)
const {
408 return isRegOrInline(RCID, type) && !hasModifiers();
411 bool isSCSrcB16()
const {
412 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID,
MVT::i16);
415 bool isSCSrcV2B16()
const {
419 bool isSCSrcB32()
const {
420 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID,
MVT::i32);
423 bool isSCSrcB64()
const {
424 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID,
MVT::i64);
427 bool isBoolReg()
const;
429 bool isSCSrcF16()
const {
430 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID,
MVT::f16);
433 bool isSCSrcV2F16()
const {
437 bool isSCSrcF32()
const {
438 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID,
MVT::f32);
441 bool isSCSrcF64()
const {
442 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID,
MVT::f64);
445 bool isSSrcB32()
const {
446 return isSCSrcB32() || isLiteralImm(
MVT::i32) || isExpr();
449 bool isSSrcB16()
const {
450 return isSCSrcB16() || isLiteralImm(
MVT::i16);
453 bool isSSrcV2B16()
const {
458 bool isSSrcB64()
const {
461 return isSCSrcB64() || isLiteralImm(
MVT::i64);
464 bool isSSrcF32()
const {
465 return isSCSrcB32() || isLiteralImm(
MVT::f32) || isExpr();
468 bool isSSrcF64()
const {
469 return isSCSrcB64() || isLiteralImm(
MVT::f64);
472 bool isSSrcF16()
const {
473 return isSCSrcB16() || isLiteralImm(
MVT::f16);
476 bool isSSrcV2F16()
const {
481 bool isSSrcV2FP32()
const {
486 bool isSCSrcV2FP32()
const {
491 bool isSSrcV2INT32()
const {
496 bool isSCSrcV2INT32()
const {
501 bool isSSrcOrLdsB32()
const {
502 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID,
MVT::i32) ||
506 bool isVCSrcB32()
const {
507 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID,
MVT::i32);
510 bool isVCSrcB64()
const {
511 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID,
MVT::i64);
514 bool isVCSrcTB16_Lo128()
const {
515 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID,
MVT::i16);
518 bool isVCSrcB16()
const {
519 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID,
MVT::i16);
522 bool isVCSrcV2B16()
const {
526 bool isVCSrcF32()
const {
527 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID,
MVT::f32);
530 bool isVCSrcF64()
const {
531 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID,
MVT::f64);
534 bool isVCSrcTF16_Lo128()
const {
535 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID,
MVT::f16);
538 bool isVCSrcF16()
const {
539 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID,
MVT::f16);
542 bool isVCSrcV2F16()
const {
546 bool isVSrcB32()
const {
547 return isVCSrcF32() || isLiteralImm(
MVT::i32) || isExpr();
550 bool isVSrcB64()
const {
551 return isVCSrcF64() || isLiteralImm(
MVT::i64);
554 bool isVSrcTB16_Lo128()
const {
555 return isVCSrcTB16_Lo128() || isLiteralImm(
MVT::i16);
558 bool isVSrcB16()
const {
559 return isVCSrcB16() || isLiteralImm(
MVT::i16);
562 bool isVSrcV2B16()
const {
563 return isVSrcB16() || isLiteralImm(
MVT::v2i16);
566 bool isVCSrcV2FP32()
const {
570 bool isVSrcV2FP32()
const {
571 return isVSrcF64() || isLiteralImm(
MVT::v2f32);
574 bool isVCSrcV2INT32()
const {
578 bool isVSrcV2INT32()
const {
579 return isVSrcB64() || isLiteralImm(
MVT::v2i32);
582 bool isVSrcF32()
const {
583 return isVCSrcF32() || isLiteralImm(
MVT::f32) || isExpr();
586 bool isVSrcF64()
const {
587 return isVCSrcF64() || isLiteralImm(
MVT::f64);
590 bool isVSrcTF16_Lo128()
const {
591 return isVCSrcTF16_Lo128() || isLiteralImm(
MVT::f16);
594 bool isVSrcF16()
const {
595 return isVCSrcF16() || isLiteralImm(
MVT::f16);
598 bool isVSrcV2F16()
const {
599 return isVSrcF16() || isLiteralImm(
MVT::v2f16);
602 bool isVISrcB32()
const {
603 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID,
MVT::i32);
606 bool isVISrcB16()
const {
607 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID,
MVT::i16);
610 bool isVISrcV2B16()
const {
614 bool isVISrcF32()
const {
615 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID,
MVT::f32);
618 bool isVISrcF16()
const {
619 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID,
MVT::f16);
622 bool isVISrcV2F16()
const {
623 return isVISrcF16() || isVISrcB32();
626 bool isVISrc_64B64()
const {
627 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID,
MVT::i64);
630 bool isVISrc_64F64()
const {
631 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID,
MVT::f64);
634 bool isVISrc_64V2FP32()
const {
635 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID,
MVT::f32);
638 bool isVISrc_64V2INT32()
const {
639 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID,
MVT::i32);
642 bool isVISrc_256B64()
const {
643 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID,
MVT::i64);
646 bool isVISrc_256F64()
const {
647 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID,
MVT::f64);
650 bool isVISrc_128B16()
const {
651 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID,
MVT::i16);
654 bool isVISrc_128V2B16()
const {
655 return isVISrc_128B16();
658 bool isVISrc_128B32()
const {
659 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID,
MVT::i32);
662 bool isVISrc_128F32()
const {
663 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID,
MVT::f32);
666 bool isVISrc_256V2FP32()
const {
667 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID,
MVT::f32);
670 bool isVISrc_256V2INT32()
const {
671 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID,
MVT::i32);
674 bool isVISrc_512B32()
const {
675 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID,
MVT::i32);
678 bool isVISrc_512B16()
const {
679 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID,
MVT::i16);
682 bool isVISrc_512V2B16()
const {
683 return isVISrc_512B16();
686 bool isVISrc_512F32()
const {
687 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID,
MVT::f32);
690 bool isVISrc_512F16()
const {
691 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID,
MVT::f16);
694 bool isVISrc_512V2F16()
const {
695 return isVISrc_512F16() || isVISrc_512B32();
698 bool isVISrc_1024B32()
const {
699 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID,
MVT::i32);
702 bool isVISrc_1024B16()
const {
703 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID,
MVT::i16);
706 bool isVISrc_1024V2B16()
const {
707 return isVISrc_1024B16();
710 bool isVISrc_1024F32()
const {
711 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID,
MVT::f32);
714 bool isVISrc_1024F16()
const {
715 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID,
MVT::f16);
718 bool isVISrc_1024V2F16()
const {
719 return isVISrc_1024F16() || isVISrc_1024B32();
722 bool isAISrcB32()
const {
723 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID,
MVT::i32);
726 bool isAISrcB16()
const {
727 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID,
MVT::i16);
730 bool isAISrcV2B16()
const {
734 bool isAISrcF32()
const {
735 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID,
MVT::f32);
738 bool isAISrcF16()
const {
739 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID,
MVT::f16);
742 bool isAISrcV2F16()
const {
743 return isAISrcF16() || isAISrcB32();
746 bool isAISrc_64B64()
const {
747 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID,
MVT::i64);
750 bool isAISrc_64F64()
const {
751 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID,
MVT::f64);
754 bool isAISrc_128B32()
const {
755 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID,
MVT::i32);
758 bool isAISrc_128B16()
const {
759 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID,
MVT::i16);
762 bool isAISrc_128V2B16()
const {
763 return isAISrc_128B16();
766 bool isAISrc_128F32()
const {
767 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID,
MVT::f32);
770 bool isAISrc_128F16()
const {
771 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID,
MVT::f16);
774 bool isAISrc_128V2F16()
const {
775 return isAISrc_128F16() || isAISrc_128B32();
778 bool isVISrc_128F16()
const {
779 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID,
MVT::f16);
782 bool isVISrc_128V2F16()
const {
783 return isVISrc_128F16() || isVISrc_128B32();
786 bool isAISrc_256B64()
const {
787 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID,
MVT::i64);
790 bool isAISrc_256F64()
const {
791 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID,
MVT::f64);
794 bool isAISrc_512B32()
const {
795 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID,
MVT::i32);
798 bool isAISrc_512B16()
const {
799 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID,
MVT::i16);
802 bool isAISrc_512V2B16()
const {
803 return isAISrc_512B16();
806 bool isAISrc_512F32()
const {
807 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID,
MVT::f32);
810 bool isAISrc_512F16()
const {
811 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID,
MVT::f16);
814 bool isAISrc_512V2F16()
const {
815 return isAISrc_512F16() || isAISrc_512B32();
818 bool isAISrc_1024B32()
const {
819 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID,
MVT::i32);
822 bool isAISrc_1024B16()
const {
823 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID,
MVT::i16);
826 bool isAISrc_1024V2B16()
const {
827 return isAISrc_1024B16();
830 bool isAISrc_1024F32()
const {
831 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID,
MVT::f32);
834 bool isAISrc_1024F16()
const {
835 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID,
MVT::f16);
838 bool isAISrc_1024V2F16()
const {
839 return isAISrc_1024F16() || isAISrc_1024B32();
842 bool isKImmFP32()
const {
846 bool isKImmFP16()
const {
850 bool isMem()
const override {
854 bool isExpr()
const {
858 bool isSoppBrTarget()
const {
859 return isExpr() ||
isImm();
862 bool isSWaitCnt()
const;
863 bool isDepCtr()
const;
864 bool isSDelayAlu()
const;
865 bool isHwreg()
const;
866 bool isSendMsg()
const;
867 bool isSwizzle()
const;
868 bool isSMRDOffset8()
const;
869 bool isSMEMOffset()
const;
870 bool isSMRDLiteralOffset()
const;
872 bool isDPPCtrl()
const;
876 bool isGPRIdxMode()
const;
877 bool isS16Imm()
const;
878 bool isU16Imm()
const;
879 bool isEndpgm()
const;
880 bool isWaitVDST()
const;
881 bool isWaitEXP()
const;
888 int64_t getImm()
const {
893 void setImm(int64_t Val) {
898 ImmTy getImmTy()
const {
903 unsigned getReg()
const override {
917 return SMRange(StartLoc, EndLoc);
920 Modifiers getModifiers()
const {
921 assert(isRegKind() || isImmTy(ImmTyNone));
922 return isRegKind() ?
Reg.Mods :
Imm.Mods;
925 void setModifiers(Modifiers Mods) {
926 assert(isRegKind() || isImmTy(ImmTyNone));
933 bool hasModifiers()
const {
934 return getModifiers().hasModifiers();
937 bool hasFPModifiers()
const {
938 return getModifiers().hasFPModifiers();
941 bool hasIntModifiers()
const {
942 return getModifiers().hasIntModifiers();
947 void addImmOperands(
MCInst &Inst,
unsigned N,
bool ApplyModifiers =
true)
const;
949 void addLiteralImmOperand(
MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const;
951 template <
unsigned Bitw
idth>
952 void addKImmFPOperands(
MCInst &Inst,
unsigned N)
const;
954 void addKImmFP16Operands(
MCInst &Inst,
unsigned N)
const {
955 addKImmFPOperands<16>(Inst,
N);
958 void addKImmFP32Operands(
MCInst &Inst,
unsigned N)
const {
959 addKImmFPOperands<32>(Inst,
N);
962 void addRegOperands(
MCInst &Inst,
unsigned N)
const;
964 void addBoolRegOperands(
MCInst &Inst,
unsigned N)
const {
965 addRegOperands(Inst,
N);
968 void addRegOrImmOperands(
MCInst &Inst,
unsigned N)
const {
970 addRegOperands(Inst,
N);
974 addImmOperands(Inst,
N);
977 void addRegOrImmWithInputModsOperands(
MCInst &Inst,
unsigned N)
const {
978 Modifiers Mods = getModifiers();
981 addRegOperands(Inst,
N);
983 addImmOperands(Inst,
N,
false);
987 void addRegOrImmWithFPInputModsOperands(
MCInst &Inst,
unsigned N)
const {
988 assert(!hasIntModifiers());
989 addRegOrImmWithInputModsOperands(Inst,
N);
992 void addRegOrImmWithIntInputModsOperands(
MCInst &Inst,
unsigned N)
const {
993 assert(!hasFPModifiers());
994 addRegOrImmWithInputModsOperands(Inst,
N);
997 void addRegWithInputModsOperands(
MCInst &Inst,
unsigned N)
const {
998 Modifiers Mods = getModifiers();
1001 addRegOperands(Inst,
N);
1004 void addRegWithFPInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1005 assert(!hasIntModifiers());
1006 addRegWithInputModsOperands(Inst,
N);
1009 void addRegWithIntInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1010 assert(!hasFPModifiers());
1011 addRegWithInputModsOperands(Inst,
N);
1014 void addSoppBrTargetOperands(
MCInst &Inst,
unsigned N)
const {
1016 addImmOperands(Inst,
N);
1025 case ImmTyNone: OS <<
"None";
break;
1026 case ImmTyGDS: OS <<
"GDS";
break;
1027 case ImmTyLDS: OS <<
"LDS";
break;
1028 case ImmTyOffen: OS <<
"Offen";
break;
1029 case ImmTyIdxen: OS <<
"Idxen";
break;
1030 case ImmTyAddr64: OS <<
"Addr64";
break;
1031 case ImmTyOffset: OS <<
"Offset";
break;
1032 case ImmTyInstOffset: OS <<
"InstOffset";
break;
1033 case ImmTyOffset0: OS <<
"Offset0";
break;
1034 case ImmTyOffset1: OS <<
"Offset1";
break;
1035 case ImmTyCPol: OS <<
"CPol";
break;
1036 case ImmTySWZ: OS <<
"SWZ";
break;
1037 case ImmTyTFE: OS <<
"TFE";
break;
1038 case ImmTyD16: OS <<
"D16";
break;
1039 case ImmTyFORMAT: OS <<
"FORMAT";
break;
1040 case ImmTyClampSI: OS <<
"ClampSI";
break;
1041 case ImmTyOModSI: OS <<
"OModSI";
break;
1042 case ImmTyDPP8: OS <<
"DPP8";
break;
1043 case ImmTyDppCtrl: OS <<
"DppCtrl";
break;
1044 case ImmTyDppRowMask: OS <<
"DppRowMask";
break;
1045 case ImmTyDppBankMask: OS <<
"DppBankMask";
break;
1046 case ImmTyDppBoundCtrl: OS <<
"DppBoundCtrl";
break;
1047 case ImmTyDppFi: OS <<
"FI";
break;
1048 case ImmTySdwaDstSel: OS <<
"SdwaDstSel";
break;
1049 case ImmTySdwaSrc0Sel: OS <<
"SdwaSrc0Sel";
break;
1050 case ImmTySdwaSrc1Sel: OS <<
"SdwaSrc1Sel";
break;
1051 case ImmTySdwaDstUnused: OS <<
"SdwaDstUnused";
break;
1052 case ImmTyDMask: OS <<
"DMask";
break;
1053 case ImmTyDim: OS <<
"Dim";
break;
1054 case ImmTyUNorm: OS <<
"UNorm";
break;
1055 case ImmTyDA: OS <<
"DA";
break;
1056 case ImmTyR128A16: OS <<
"R128A16";
break;
1057 case ImmTyA16: OS <<
"A16";
break;
1058 case ImmTyLWE: OS <<
"LWE";
break;
1059 case ImmTyOff: OS <<
"Off";
break;
1060 case ImmTyExpTgt: OS <<
"ExpTgt";
break;
1061 case ImmTyExpCompr: OS <<
"ExpCompr";
break;
1062 case ImmTyExpVM: OS <<
"ExpVM";
break;
1063 case ImmTyHwreg: OS <<
"Hwreg";
break;
1064 case ImmTySendMsg: OS <<
"SendMsg";
break;
1065 case ImmTyInterpSlot: OS <<
"InterpSlot";
break;
1066 case ImmTyInterpAttr: OS <<
"InterpAttr";
break;
1067 case ImmTyAttrChan: OS <<
"AttrChan";
break;
1068 case ImmTyOpSel: OS <<
"OpSel";
break;
1069 case ImmTyOpSelHi: OS <<
"OpSelHi";
break;
1070 case ImmTyNegLo: OS <<
"NegLo";
break;
1071 case ImmTyNegHi: OS <<
"NegHi";
break;
1072 case ImmTySwizzle: OS <<
"Swizzle";
break;
1073 case ImmTyGprIdxMode: OS <<
"GprIdxMode";
break;
1074 case ImmTyHigh: OS <<
"High";
break;
1075 case ImmTyBLGP: OS <<
"BLGP";
break;
1076 case ImmTyCBSZ: OS <<
"CBSZ";
break;
1077 case ImmTyABID: OS <<
"ABID";
break;
1078 case ImmTyEndpgm: OS <<
"Endpgm";
break;
1079 case ImmTyWaitVDST: OS <<
"WaitVDST";
break;
1080 case ImmTyWaitEXP: OS <<
"WaitEXP";
break;
1087 OS <<
"<register " <<
getReg() <<
" mods: " <<
Reg.Mods <<
'>';
1090 OS <<
'<' << getImm();
1091 if (getImmTy() != ImmTyNone) {
1092 OS <<
" type: "; printImmTy(OS, getImmTy());
1094 OS <<
" mods: " <<
Imm.Mods <<
'>';
1097 OS <<
'\'' << getToken() <<
'\'';
1100 OS <<
"<expr " << *Expr <<
'>';
1105 static AMDGPUOperand::Ptr CreateImm(
const AMDGPUAsmParser *AsmParser,
1106 int64_t Val,
SMLoc Loc,
1107 ImmTy
Type = ImmTyNone,
1108 bool IsFPImm =
false) {
1109 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1111 Op->Imm.IsFPImm = IsFPImm;
1112 Op->Imm.Kind = ImmKindTyNone;
1114 Op->Imm.Mods = Modifiers();
1120 static AMDGPUOperand::Ptr CreateToken(
const AMDGPUAsmParser *AsmParser,
1122 bool HasExplicitEncodingSize =
true) {
1123 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1124 Res->Tok.Data = Str.data();
1125 Res->Tok.Length = Str.size();
1126 Res->StartLoc = Loc;
1131 static AMDGPUOperand::Ptr CreateReg(
const AMDGPUAsmParser *AsmParser,
1132 unsigned RegNo,
SMLoc S,
1134 auto Op = std::make_unique<AMDGPUOperand>(
Register, AsmParser);
1135 Op->Reg.RegNo = RegNo;
1136 Op->Reg.Mods = Modifiers();
1142 static AMDGPUOperand::Ptr CreateExpr(
const AMDGPUAsmParser *AsmParser,
1144 auto Op = std::make_unique<AMDGPUOperand>(
Expression, AsmParser);
1153 OS <<
"abs:" << Mods.Abs <<
" neg: " << Mods.Neg <<
" sext:" << Mods.Sext;
1164class KernelScopeInfo {
1165 int SgprIndexUnusedMin = -1;
1166 int VgprIndexUnusedMin = -1;
1167 int AgprIndexUnusedMin = -1;
1171 void usesSgprAt(
int i) {
1172 if (i >= SgprIndexUnusedMin) {
1173 SgprIndexUnusedMin = ++i;
1182 void usesVgprAt(
int i) {
1183 if (i >= VgprIndexUnusedMin) {
1184 VgprIndexUnusedMin = ++i;
1189 VgprIndexUnusedMin);
1195 void usesAgprAt(
int i) {
1200 if (i >= AgprIndexUnusedMin) {
1201 AgprIndexUnusedMin = ++i;
1211 VgprIndexUnusedMin);
1218 KernelScopeInfo() =
default;
1224 usesSgprAt(SgprIndexUnusedMin = -1);
1225 usesVgprAt(VgprIndexUnusedMin = -1);
1227 usesAgprAt(AgprIndexUnusedMin = -1);
1231 void usesRegister(RegisterKind RegKind,
unsigned DwordRegIndex,
1232 unsigned RegWidth) {
1235 usesSgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1238 usesAgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1241 usesVgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1252 unsigned ForcedEncodingSize = 0;
1253 bool ForcedDPP =
false;
1254 bool ForcedSDWA =
false;
1255 KernelScopeInfo KernelScope;
1260#define GET_ASSEMBLER_HEADER
1261#include "AMDGPUGenAsmMatcher.inc"
1266 bool ParseAsAbsoluteExpression(
uint32_t &Ret);
1267 bool OutOfRangeError(
SMRange Range);
1283 bool calculateGPRBlocks(
const FeatureBitset &Features,
bool VCCUsed,
1284 bool FlatScrUsed,
bool XNACKUsed,
1285 std::optional<bool> EnableWavefrontSize32,
1286 unsigned NextFreeVGPR,
SMRange VGPRRange,
1287 unsigned NextFreeSGPR,
SMRange SGPRRange,
1288 unsigned &VGPRBlocks,
unsigned &SGPRBlocks);
1289 bool ParseDirectiveAMDGCNTarget();
1290 bool ParseDirectiveAMDHSAKernel();
1292 bool ParseDirectiveHSACodeObjectVersion();
1293 bool ParseDirectiveHSACodeObjectISA();
1295 bool ParseDirectiveAMDKernelCodeT();
1298 bool ParseDirectiveAMDGPUHsaKernel();
1300 bool ParseDirectiveISAVersion();
1301 bool ParseDirectiveHSAMetadata();
1302 bool ParseDirectivePALMetadataBegin();
1303 bool ParseDirectivePALMetadata();
1304 bool ParseDirectiveAMDGPULDS();
1308 bool ParseToEndDirective(
const char *AssemblerDirectiveBegin,
1309 const char *AssemblerDirectiveEnd,
1310 std::string &CollectString);
1312 bool AddNextRegisterToList(
unsigned& Reg,
unsigned& RegWidth,
1313 RegisterKind RegKind,
unsigned Reg1,
SMLoc Loc);
1314 bool ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
1315 unsigned &RegNum,
unsigned &RegWidth,
1316 bool RestoreOnFailure =
false);
1317 bool ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
1318 unsigned &RegNum,
unsigned &RegWidth,
1320 unsigned ParseRegularReg(RegisterKind &RegKind,
unsigned &RegNum,
1323 unsigned ParseSpecialReg(RegisterKind &RegKind,
unsigned &RegNum,
1326 unsigned ParseRegList(RegisterKind &RegKind,
unsigned &RegNum,
1328 bool ParseRegRange(
unsigned& Num,
unsigned& Width);
1329 unsigned getRegularReg(RegisterKind RegKind,
1336 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1337 void initializeGprCountSymbol(RegisterKind RegKind);
1338 bool updateGprCountSymbols(RegisterKind RegKind,
unsigned DwordRegIndex,
1343 bool IsGdsHardcoded);
1346 enum AMDGPUMatchResultTy {
1347 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1350 OperandMode_Default,
1354 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1362 if (getFeatureBits().
none()) {
1394 initializeGprCountSymbol(IS_VGPR);
1395 initializeGprCountSymbol(IS_SGPR);
1462 bool hasInv2PiInlineImm()
const {
1463 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1466 bool hasFlatOffsets()
const {
1467 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1471 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1474 bool hasSGPR102_SGPR103()
const {
1478 bool hasSGPR104_SGPR105()
const {
return isGFX10Plus(); }
1480 bool hasIntClamp()
const {
1481 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1503 void setForcedEncodingSize(
unsigned Size) { ForcedEncodingSize =
Size; }
1504 void setForcedDPP(
bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1505 void setForcedSDWA(
bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1507 unsigned getForcedEncodingSize()
const {
return ForcedEncodingSize; }
1508 bool isForcedVOP3()
const {
return ForcedEncodingSize == 64; }
1509 bool isForcedDPP()
const {
return ForcedDPP; }
1510 bool isForcedSDWA()
const {
return ForcedSDWA; }
1512 StringRef getMatchedVariantName()
const;
1514 std::unique_ptr<AMDGPUOperand>
parseRegister(
bool RestoreOnFailure =
false);
1516 bool RestoreOnFailure);
1518 SMLoc &EndLoc)
override;
1520 SMLoc &EndLoc)
override;
1523 unsigned Kind)
override;
1527 bool MatchingInlineAsm)
override;
1530 OperandMode Mode = OperandMode_Default);
1542 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1543 bool (*ConvertResult)(int64_t &) =
nullptr);
1546 parseOperandArrayWithPrefix(
const char *Prefix,
1548 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1549 bool (*ConvertResult)(int64_t&) =
nullptr);
1553 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1561 bool isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1562 bool isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1563 bool isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1564 bool isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1565 bool parseSP3NegModifier();
1583 bool tryParseFmt(
const char *Pref, int64_t MaxVal, int64_t &Val);
1584 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt,
StringRef FormatStr,
SMLoc Loc);
1591 bool parseCnt(int64_t &IntVal);
1594 bool parseDepCtr(int64_t &IntVal,
unsigned &Mask);
1598 bool parseDelay(int64_t &Delay);
1604 struct OperandInfoTy {
1607 bool IsSymbolic =
false;
1608 bool IsDefined =
false;
1610 OperandInfoTy(int64_t Id_) :
Id(Id_) {}
1613 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1614 bool validateSendMsg(
const OperandInfoTy &Msg,
1615 const OperandInfoTy &Op,
1616 const OperandInfoTy &Stream);
1618 bool parseHwregBody(OperandInfoTy &HwReg,
1620 OperandInfoTy &Width);
1621 bool validateHwreg(
const OperandInfoTy &HwReg,
1622 const OperandInfoTy &
Offset,
1623 const OperandInfoTy &Width);
1629 SMLoc getOperandLoc(std::function<
bool(
const AMDGPUOperand&)>
Test,
1634 bool SearchMandatoryLiterals =
false)
const;
1642 bool validateSOPLiteral(
const MCInst &Inst)
const;
1644 bool validateVOPDRegBankConstraints(
const MCInst &Inst,
1646 bool validateIntClampSupported(
const MCInst &Inst);
1647 bool validateMIMGAtomicDMask(
const MCInst &Inst);
1648 bool validateMIMGGatherDMask(
const MCInst &Inst);
1650 bool validateMIMGDataSize(
const MCInst &Inst,
const SMLoc &IDLoc);
1651 bool validateMIMGAddrSize(
const MCInst &Inst);
1652 bool validateMIMGD16(
const MCInst &Inst);
1653 bool validateMIMGMSAA(
const MCInst &Inst);
1654 bool validateOpSel(
const MCInst &Inst);
1656 bool validateVccOperand(
unsigned Reg)
const;
1661 bool validateAGPRLdSt(
const MCInst &Inst)
const;
1662 bool validateVGPRAlign(
const MCInst &Inst)
const;
1665 bool validateDivScale(
const MCInst &Inst);
1668 const SMLoc &IDLoc);
1671 std::optional<StringRef> validateLdsDirect(
const MCInst &Inst);
1672 unsigned getConstantBusLimit(
unsigned Opcode)
const;
1673 bool usesConstantBus(
const MCInst &Inst,
unsigned OpIdx);
1674 bool isInlineConstant(
const MCInst &Inst,
unsigned OpIdx)
const;
1675 unsigned findImplicitSGPRReadInVOP(
const MCInst &Inst)
const;
1701 AsmToken peekToken(
bool ShouldSkipSpace =
true);
1703 SMLoc getLoc()
const;
1719 bool parseSwizzleOperand(int64_t &Op,
1720 const unsigned MinVal,
1721 const unsigned MaxVal,
1724 bool parseSwizzleOperands(
const unsigned OpNum, int64_t* Op,
1725 const unsigned MinVal,
1726 const unsigned MaxVal,
1729 bool parseSwizzleOffset(int64_t &Imm);
1730 bool parseSwizzleMacro(int64_t &Imm);
1731 bool parseSwizzleQuadPerm(int64_t &Imm);
1732 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1733 bool parseSwizzleBroadcast(int64_t &Imm);
1734 bool parseSwizzleSwap(int64_t &Imm);
1735 bool parseSwizzleReverse(int64_t &Imm);
1738 int64_t parseGPRIdxMacro();
1744 AMDGPUOperand::Ptr defaultCPol()
const;
1746 AMDGPUOperand::Ptr defaultSMRDOffset8()
const;
1747 AMDGPUOperand::Ptr defaultSMEMOffset()
const;
1748 AMDGPUOperand::Ptr defaultSMRDLiteralOffset()
const;
1749 AMDGPUOperand::Ptr defaultFlatOffset()
const;
1754 OptionalImmIndexMap &OptionalIdx);
1760 OptionalImmIndexMap &OptionalIdx);
1762 OptionalImmIndexMap &OptionalIdx);
1768 bool IsAtomic =
false);
1774 bool parseDimId(
unsigned &Encoding);
1779 int64_t parseDPPCtrlSel(
StringRef Ctrl);
1780 int64_t parseDPPCtrlPerm();
1781 AMDGPUOperand::Ptr defaultRowMask()
const;
1782 AMDGPUOperand::Ptr defaultBankMask()
const;
1783 AMDGPUOperand::Ptr defaultDppBoundCtrl()
const;
1784 AMDGPUOperand::Ptr defaultFI()
const;
1790 bool IsDPP8 =
false);
1796 AMDGPUOperand::ImmTy
Type);
1805 bool SkipDstVcc =
false,
1806 bool SkipSrcVcc =
false);
1808 AMDGPUOperand::Ptr defaultBLGP()
const;
1809 AMDGPUOperand::Ptr defaultCBSZ()
const;
1810 AMDGPUOperand::Ptr defaultABID()
const;
1813 AMDGPUOperand::Ptr defaultEndpgmImmOperands()
const;
1815 AMDGPUOperand::Ptr defaultWaitVDST()
const;
1816 AMDGPUOperand::Ptr defaultWaitEXP()
const;
1826 return &APFloat::IEEEsingle();
1828 return &APFloat::IEEEdouble();
1830 return &APFloat::IEEEhalf();
1842 case AMDGPU::OPERAND_REG_IMM_INT32:
1843 case AMDGPU::OPERAND_REG_IMM_FP32:
1844 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1845 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1846 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1847 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1848 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1849 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1850 case AMDGPU::OPERAND_REG_IMM_V2FP32:
1851 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1852 case AMDGPU::OPERAND_REG_IMM_V2INT32:
1853 case AMDGPU::OPERAND_KIMM32:
1854 return &APFloat::IEEEsingle();
1855 case AMDGPU::OPERAND_REG_IMM_INT64:
1856 case AMDGPU::OPERAND_REG_IMM_FP64:
1857 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1858 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1859 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1860 return &APFloat::IEEEdouble();
1861 case AMDGPU::OPERAND_REG_IMM_INT16:
1862 case AMDGPU::OPERAND_REG_IMM_FP16:
1863 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1864 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1865 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1866 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1867 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1868 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1869 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1870 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1871 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1872 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1873 case AMDGPU::OPERAND_REG_IMM_V2FP16:
1874 case AMDGPU::OPERAND_KIMM16:
1875 return &APFloat::IEEEhalf();
1890 APFloat::rmNearestTiesToEven,
1893 if (
Status != APFloat::opOK &&
1895 ((
Status & APFloat::opOverflow) != 0 ||
1896 (
Status & APFloat::opUnderflow) != 0)) {
1914 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1917bool AMDGPUOperand::isInlinableImm(
MVT type)
const {
1927 if (!isImmTy(ImmTyNone)) {
1939 return AMDGPU::isInlinableLiteral64(
Imm.Val,
1940 AsmParser->hasInv2PiInlineImm());
1949 static_cast<int16_t
>(FPLiteral.bitcastToAPInt().getZExtValue()),
1950 type, AsmParser->hasInv2PiInlineImm());
1954 return AMDGPU::isInlinableLiteral32(
1955 static_cast<int32_t
>(FPLiteral.bitcastToAPInt().getZExtValue()),
1956 AsmParser->hasInv2PiInlineImm());
1961 return AMDGPU::isInlinableLiteral64(
Imm.Val,
1962 AsmParser->hasInv2PiInlineImm());
1971 static_cast<int16_t
>(
Literal.getLoBits(16).getSExtValue()),
1972 type, AsmParser->hasInv2PiInlineImm());
1975 return AMDGPU::isInlinableLiteral32(
1976 static_cast<int32_t
>(
Literal.getLoBits(32).getZExtValue()),
1977 AsmParser->hasInv2PiInlineImm());
1980bool AMDGPUOperand::isLiteralImm(
MVT type)
const {
1982 if (!isImmTy(ImmTyNone)) {
1989 if (type ==
MVT::f64 && hasFPModifiers()) {
2028bool AMDGPUOperand::isRegClass(
unsigned RCID)
const {
2029 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(
getReg());
2032bool AMDGPUOperand::isVRegWithInputMods()
const {
2033 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2035 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2036 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
2039bool AMDGPUOperand::isT16VRegWithInputMods()
const {
2040 return isRegClass(AMDGPU::VGPR_32_Lo128RegClassID);
2043bool AMDGPUOperand::isSDWAOperand(
MVT type)
const {
2044 if (AsmParser->isVI())
2046 else if (AsmParser->isGFX9Plus())
2047 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2052bool AMDGPUOperand::isSDWAFP16Operand()
const {
2056bool AMDGPUOperand::isSDWAFP32Operand()
const {
2060bool AMDGPUOperand::isSDWAInt16Operand()
const {
2064bool AMDGPUOperand::isSDWAInt32Operand()
const {
2068bool AMDGPUOperand::isBoolReg()
const {
2069 auto FB = AsmParser->getFeatureBits();
2070 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2071 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2076 assert(isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2091void AMDGPUOperand::addImmOperands(
MCInst &Inst,
unsigned N,
bool ApplyModifiers)
const {
2092 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.
getOpcode()),
2094 addLiteralImmOperand(Inst,
Imm.Val,
2096 isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2098 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2104void AMDGPUOperand::addLiteralImmOperand(
MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const {
2105 const auto& InstDesc = AsmParser->getMII()->get(Inst.
getOpcode());
2108 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2110 if (ApplyModifiers) {
2111 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2113 Val = applyInputFPModifiers(Val,
Size);
2117 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2121 case AMDGPU::OPERAND_REG_IMM_INT64:
2122 case AMDGPU::OPERAND_REG_IMM_FP64:
2123 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2124 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2125 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2126 if (AMDGPU::isInlinableLiteral64(
Literal.getZExtValue(),
2127 AsmParser->hasInv2PiInlineImm())) {
2134 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) {
2136 if (
Literal.getLoBits(32) != 0) {
2137 const_cast<AMDGPUAsmParser *
>(AsmParser)->
Warning(Inst.
getLoc(),
2138 "Can't encode literal as exact 64-bit floating-point operand. "
2139 "Low 32-bits will be set to zero");
2143 setImmKindLiteral();
2152 case AMDGPU::OPERAND_REG_IMM_INT32:
2153 case AMDGPU::OPERAND_REG_IMM_FP32:
2154 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2155 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2156 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2157 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2158 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2159 case AMDGPU::OPERAND_REG_IMM_INT16:
2160 case AMDGPU::OPERAND_REG_IMM_FP16:
2161 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2162 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2163 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2164 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2165 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2166 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2167 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2168 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2169 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2170 case AMDGPU::OPERAND_REG_IMM_V2INT16:
2171 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2172 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2173 case AMDGPU::OPERAND_REG_IMM_V2FP32:
2174 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2175 case AMDGPU::OPERAND_REG_IMM_V2INT32:
2176 case AMDGPU::OPERAND_KIMM32:
2177 case AMDGPU::OPERAND_KIMM16: {
2182 APFloat::rmNearestTiesToEven, &lost);
2186 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2188 if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2189 setImmKindMandatoryLiteral();
2191 setImmKindLiteral();
2205 case AMDGPU::OPERAND_REG_IMM_INT32:
2206 case AMDGPU::OPERAND_REG_IMM_FP32:
2207 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2208 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2209 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2210 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2211 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2212 case AMDGPU::OPERAND_REG_IMM_V2INT16:
2213 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2214 case AMDGPU::OPERAND_REG_IMM_V2FP32:
2215 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2216 case AMDGPU::OPERAND_REG_IMM_V2INT32:
2217 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2219 AMDGPU::isInlinableLiteral32(
static_cast<int32_t
>(Val),
2220 AsmParser->hasInv2PiInlineImm())) {
2227 setImmKindLiteral();
2230 case AMDGPU::OPERAND_REG_IMM_INT64:
2231 case AMDGPU::OPERAND_REG_IMM_FP64:
2232 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2233 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2234 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2235 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2242 setImmKindLiteral();
2245 case AMDGPU::OPERAND_REG_IMM_INT16:
2246 case AMDGPU::OPERAND_REG_IMM_FP16:
2247 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2248 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2249 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2250 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2251 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2253 AMDGPU::isInlinableLiteral16(
static_cast<int16_t
>(Val),
2254 AsmParser->hasInv2PiInlineImm())) {
2261 setImmKindLiteral();
2264 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2265 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2266 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2267 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2269 assert(AMDGPU::isInlinableLiteral16(
static_cast<int16_t
>(Val),
2270 AsmParser->hasInv2PiInlineImm()));
2275 case AMDGPU::OPERAND_KIMM32:
2277 setImmKindMandatoryLiteral();
2279 case AMDGPU::OPERAND_KIMM16:
2281 setImmKindMandatoryLiteral();
2288template <
unsigned Bitw
idth>
2289void AMDGPUOperand::addKImmFPOperands(
MCInst &Inst,
unsigned N)
const {
2291 setImmKindMandatoryLiteral();
2302 APFloat::rmNearestTiesToEven, &Lost);
2306void AMDGPUOperand::addRegOperands(
MCInst &Inst,
unsigned N)
const {
2310bool AMDGPUOperand::isInlineValue()
const {
2319 if (Is == IS_VGPR) {
2323 return AMDGPU::VGPR_32RegClassID;
2325 return AMDGPU::VReg_64RegClassID;
2327 return AMDGPU::VReg_96RegClassID;
2329 return AMDGPU::VReg_128RegClassID;
2331 return AMDGPU::VReg_160RegClassID;
2333 return AMDGPU::VReg_192RegClassID;
2335 return AMDGPU::VReg_224RegClassID;
2337 return AMDGPU::VReg_256RegClassID;
2339 return AMDGPU::VReg_288RegClassID;
2341 return AMDGPU::VReg_320RegClassID;
2343 return AMDGPU::VReg_352RegClassID;
2345 return AMDGPU::VReg_384RegClassID;
2347 return AMDGPU::VReg_512RegClassID;
2349 return AMDGPU::VReg_1024RegClassID;
2351 }
else if (Is == IS_TTMP) {
2355 return AMDGPU::TTMP_32RegClassID;
2357 return AMDGPU::TTMP_64RegClassID;
2359 return AMDGPU::TTMP_128RegClassID;
2361 return AMDGPU::TTMP_256RegClassID;
2363 return AMDGPU::TTMP_512RegClassID;
2365 }
else if (Is == IS_SGPR) {
2369 return AMDGPU::SGPR_32RegClassID;
2371 return AMDGPU::SGPR_64RegClassID;
2373 return AMDGPU::SGPR_96RegClassID;
2375 return AMDGPU::SGPR_128RegClassID;
2377 return AMDGPU::SGPR_160RegClassID;
2379 return AMDGPU::SGPR_192RegClassID;
2381 return AMDGPU::SGPR_224RegClassID;
2383 return AMDGPU::SGPR_256RegClassID;
2385 return AMDGPU::SGPR_288RegClassID;
2387 return AMDGPU::SGPR_320RegClassID;
2389 return AMDGPU::SGPR_352RegClassID;
2391 return AMDGPU::SGPR_384RegClassID;
2393 return AMDGPU::SGPR_512RegClassID;
2395 }
else if (Is == IS_AGPR) {
2399 return AMDGPU::AGPR_32RegClassID;
2401 return AMDGPU::AReg_64RegClassID;
2403 return AMDGPU::AReg_96RegClassID;
2405 return AMDGPU::AReg_128RegClassID;
2407 return AMDGPU::AReg_160RegClassID;
2409 return AMDGPU::AReg_192RegClassID;
2411 return AMDGPU::AReg_224RegClassID;
2413 return AMDGPU::AReg_256RegClassID;
2415 return AMDGPU::AReg_288RegClassID;
2417 return AMDGPU::AReg_320RegClassID;
2419 return AMDGPU::AReg_352RegClassID;
2421 return AMDGPU::AReg_384RegClassID;
2423 return AMDGPU::AReg_512RegClassID;
2425 return AMDGPU::AReg_1024RegClassID;
2433 .
Case(
"exec", AMDGPU::EXEC)
2434 .
Case(
"vcc", AMDGPU::VCC)
2435 .
Case(
"flat_scratch", AMDGPU::FLAT_SCR)
2436 .
Case(
"xnack_mask", AMDGPU::XNACK_MASK)
2437 .
Case(
"shared_base", AMDGPU::SRC_SHARED_BASE)
2438 .
Case(
"src_shared_base", AMDGPU::SRC_SHARED_BASE)
2439 .
Case(
"shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2440 .
Case(
"src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2441 .
Case(
"private_base", AMDGPU::SRC_PRIVATE_BASE)
2442 .
Case(
"src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2443 .
Case(
"private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2444 .
Case(
"src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2445 .
Case(
"pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2446 .
Case(
"src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2447 .
Case(
"lds_direct", AMDGPU::LDS_DIRECT)
2448 .
Case(
"src_lds_direct", AMDGPU::LDS_DIRECT)
2449 .
Case(
"m0", AMDGPU::M0)
2450 .
Case(
"vccz", AMDGPU::SRC_VCCZ)
2451 .
Case(
"src_vccz", AMDGPU::SRC_VCCZ)
2452 .
Case(
"execz", AMDGPU::SRC_EXECZ)
2453 .
Case(
"src_execz", AMDGPU::SRC_EXECZ)
2454 .
Case(
"scc", AMDGPU::SRC_SCC)
2455 .
Case(
"src_scc", AMDGPU::SRC_SCC)
2456 .
Case(
"tba", AMDGPU::TBA)
2457 .
Case(
"tma", AMDGPU::TMA)
2458 .
Case(
"flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2459 .
Case(
"flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2460 .
Case(
"xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2461 .
Case(
"xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2462 .
Case(
"vcc_lo", AMDGPU::VCC_LO)
2463 .
Case(
"vcc_hi", AMDGPU::VCC_HI)
2464 .
Case(
"exec_lo", AMDGPU::EXEC_LO)
2465 .
Case(
"exec_hi", AMDGPU::EXEC_HI)
2466 .
Case(
"tma_lo", AMDGPU::TMA_LO)
2467 .
Case(
"tma_hi", AMDGPU::TMA_HI)
2468 .
Case(
"tba_lo", AMDGPU::TBA_LO)
2469 .
Case(
"tba_hi", AMDGPU::TBA_HI)
2470 .
Case(
"pc", AMDGPU::PC_REG)
2471 .
Case(
"null", AMDGPU::SGPR_NULL)
2475bool AMDGPUAsmParser::ParseRegister(
MCRegister &RegNo,
SMLoc &StartLoc,
2476 SMLoc &EndLoc,
bool RestoreOnFailure) {
2477 auto R = parseRegister();
2478 if (!R)
return true;
2480 RegNo =
R->getReg();
2481 StartLoc =
R->getStartLoc();
2482 EndLoc =
R->getEndLoc();
2486bool AMDGPUAsmParser::parseRegister(
MCRegister &RegNo,
SMLoc &StartLoc,
2488 return ParseRegister(RegNo, StartLoc, EndLoc,
false);
2495 ParseRegister(RegNo, StartLoc, EndLoc,
true);
2496 bool PendingErrors = getParser().hasPendingError();
2497 getParser().clearPendingErrors();
2505bool AMDGPUAsmParser::AddNextRegisterToList(
unsigned &Reg,
unsigned &RegWidth,
2506 RegisterKind RegKind,
unsigned Reg1,
2510 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2515 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2516 Reg = AMDGPU::FLAT_SCR;
2520 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2521 Reg = AMDGPU::XNACK_MASK;
2525 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2530 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2535 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2540 Error(Loc,
"register does not fit in the list");
2546 if (Reg1 != Reg + RegWidth / 32) {
2547 Error(Loc,
"registers in a list must have consecutive indices");
2565 {{
"ttmp"}, IS_TTMP},
2571 return Kind == IS_VGPR ||
2579 if (Str.startswith(Reg.Name))
2585 return !Str.getAsInteger(10, Num);
2589AMDGPUAsmParser::isRegister(
const AsmToken &Token,
2606 if (!RegSuffix.
empty()) {
2622AMDGPUAsmParser::isRegister()
2624 return isRegister(getToken(), peekToken());
2628AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2635 unsigned AlignSize = 1;
2636 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2639 AlignSize = std::min(RegWidth / 32, 4u);
2642 if (RegNum % AlignSize != 0) {
2643 Error(Loc,
"invalid register alignment");
2644 return AMDGPU::NoRegister;
2647 unsigned RegIdx = RegNum / AlignSize;
2650 Error(Loc,
"invalid or unsupported register size");
2651 return AMDGPU::NoRegister;
2657 Error(Loc,
"register index is out of range");
2658 return AMDGPU::NoRegister;
2664bool AMDGPUAsmParser::ParseRegRange(
unsigned &Num,
unsigned &RegWidth) {
2665 int64_t RegLo, RegHi;
2669 SMLoc FirstIdxLoc = getLoc();
2672 if (!parseExpr(RegLo))
2676 SecondIdxLoc = getLoc();
2677 if (!parseExpr(RegHi))
2686 if (!isUInt<32>(RegLo)) {
2687 Error(FirstIdxLoc,
"invalid register index");
2691 if (!isUInt<32>(RegHi)) {
2692 Error(SecondIdxLoc,
"invalid register index");
2696 if (RegLo > RegHi) {
2697 Error(FirstIdxLoc,
"first register index should not exceed second index");
2701 Num =
static_cast<unsigned>(RegLo);
2702 RegWidth = 32 * ((RegHi - RegLo) + 1);
2706unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2707 unsigned &RegNum,
unsigned &RegWidth,
2714 RegKind = IS_SPECIAL;
2721unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2722 unsigned &RegNum,
unsigned &RegWidth,
2726 auto Loc = getLoc();
2730 Error(Loc,
"invalid register name");
2731 return AMDGPU::NoRegister;
2739 if (!RegSuffix.
empty()) {
2742 Error(Loc,
"invalid register index");
2743 return AMDGPU::NoRegister;
2748 if (!ParseRegRange(RegNum, RegWidth))
2749 return AMDGPU::NoRegister;
2752 return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2755unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
unsigned &RegNum,
2758 unsigned Reg = AMDGPU::NoRegister;
2759 auto ListLoc = getLoc();
2762 "expected a register or a list of registers")) {
2763 return AMDGPU::NoRegister;
2768 auto Loc = getLoc();
2769 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2770 return AMDGPU::NoRegister;
2771 if (RegWidth != 32) {
2772 Error(Loc,
"expected a single 32-bit register");
2773 return AMDGPU::NoRegister;
2777 RegisterKind NextRegKind;
2778 unsigned NextReg, NextRegNum, NextRegWidth;
2781 if (!ParseAMDGPURegister(NextRegKind, NextReg,
2782 NextRegNum, NextRegWidth,
2784 return AMDGPU::NoRegister;
2786 if (NextRegWidth != 32) {
2787 Error(Loc,
"expected a single 32-bit register");
2788 return AMDGPU::NoRegister;
2790 if (NextRegKind != RegKind) {
2791 Error(Loc,
"registers in a list must be of the same kind");
2792 return AMDGPU::NoRegister;
2794 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2795 return AMDGPU::NoRegister;
2799 "expected a comma or a closing square bracket")) {
2800 return AMDGPU::NoRegister;
2804 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2809bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
2810 unsigned &RegNum,
unsigned &RegWidth,
2812 auto Loc = getLoc();
2813 Reg = AMDGPU::NoRegister;
2816 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2817 if (Reg == AMDGPU::NoRegister)
2818 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2820 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2824 if (Reg == AMDGPU::NoRegister) {
2829 if (!subtargetHasRegister(*
TRI, Reg)) {
2830 if (Reg == AMDGPU::SGPR_NULL) {
2831 Error(Loc,
"'null' operand is not supported on this GPU");
2833 Error(Loc,
"register not available on this GPU");
2841bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
2842 unsigned &RegNum,
unsigned &RegWidth,
2843 bool RestoreOnFailure ) {
2844 Reg = AMDGPU::NoRegister;
2847 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2848 if (RestoreOnFailure) {
2849 while (!Tokens.
empty()) {
2858std::optional<StringRef>
2859AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2862 return StringRef(
".amdgcn.next_free_vgpr");
2864 return StringRef(
".amdgcn.next_free_sgpr");
2866 return std::nullopt;
2870void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2871 auto SymbolName = getGprCountSymbolName(RegKind);
2872 assert(SymbolName &&
"initializing invalid register kind");
2873 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2877bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2878 unsigned DwordRegIndex,
2879 unsigned RegWidth) {
2881 if (AMDGPU::getIsaVersion(getSTI().
getCPU()).Major < 6)
2884 auto SymbolName = getGprCountSymbolName(RegKind);
2887 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2889 int64_t NewMax = DwordRegIndex +
divideCeil(RegWidth, 32) - 1;
2893 return !
Error(getLoc(),
2894 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2898 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2900 if (OldCount <= NewMax)
2906std::unique_ptr<AMDGPUOperand>
2907AMDGPUAsmParser::parseRegister(
bool RestoreOnFailure) {
2908 const auto &Tok = getToken();
2909 SMLoc StartLoc = Tok.getLoc();
2910 SMLoc EndLoc = Tok.getEndLoc();
2911 RegisterKind RegKind;
2912 unsigned Reg, RegNum, RegWidth;
2914 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2918 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2921 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2922 return AMDGPUOperand::CreateReg(
this, Reg, StartLoc, EndLoc);
2933 const auto& Tok = getToken();
2934 const auto& NextTok = peekToken();
2937 bool Negate =
false;
2953 APFloat RealVal(APFloat::IEEEdouble());
2954 auto roundMode = APFloat::rmNearestTiesToEven;
2955 if (
errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2959 RealVal.changeSign();
2962 AMDGPUOperand::CreateImm(
this, RealVal.bitcastToAPInt().getZExtValue(), S,
2963 AMDGPUOperand::ImmTyNone,
true));
2972 if (HasSP3AbsModifier) {
2981 if (getParser().parsePrimaryExpr(Expr, EndLoc,
nullptr))
2988 if (Expr->evaluateAsAbsolute(IntVal)) {
2989 Operands.push_back(AMDGPUOperand::CreateImm(
this, IntVal, S));
2991 Operands.push_back(AMDGPUOperand::CreateExpr(
this, Expr, S));
3005 if (
auto R = parseRegister()) {
3018 }
else if (isModifier()) {
3021 return parseImm(
Operands, HasSP3AbsMod);
3026AMDGPUAsmParser::isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3029 return str ==
"abs" || str ==
"neg" || str ==
"sext";
3035AMDGPUAsmParser::isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3040AMDGPUAsmParser::isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3041 return isNamedOperandModifier(Token, NextToken) || Token.
is(
AsmToken::Pipe);
3045AMDGPUAsmParser::isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3046 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3063AMDGPUAsmParser::isModifier() {
3067 peekTokens(NextToken);
3069 return isOperandModifier(Tok, NextToken[0]) ||
3070 (Tok.
is(
AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3071 isOpcodeModifierWithVal(Tok, NextToken[0]);
3097AMDGPUAsmParser::parseSP3NegModifier() {
3100 peekTokens(NextToken);
3103 (isRegister(NextToken[0], NextToken[1]) ||
3105 isId(NextToken[0],
"abs"))) {
3122 Error(getLoc(),
"invalid syntax, expected 'neg' modifier");
3126 SP3Neg = parseSP3NegModifier();
3129 Neg = trySkipId(
"neg");
3130 if (Neg && SP3Neg) {
3131 Error(Loc,
"expected register or immediate");
3137 Abs = trySkipId(
"abs");
3143 if (Abs && SP3Abs) {
3144 Error(Loc,
"expected register or immediate");
3150 Res = parseRegOrImm(
Operands, SP3Abs);
3158 if (SP3Abs && !skipToken(
AsmToken::Pipe,
"expected vertical bar"))
3165 AMDGPUOperand::Modifiers Mods;
3166 Mods.Abs = Abs || SP3Abs;
3167 Mods.Neg = Neg || SP3Neg;
3169 if (Mods.hasFPModifiers()) {
3170 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3172 Error(
Op.getStartLoc(),
"expected an absolute expression");
3175 Op.setModifiers(Mods);
3183 bool Sext = trySkipId(
"sext");
3184 if (Sext && !skipToken(
AsmToken::LParen,
"expected left paren after sext"))
3200 AMDGPUOperand::Modifiers Mods;
3203 if (Mods.hasIntModifiers()) {
3204 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3206 Error(
Op.getStartLoc(),
"expected an absolute expression");
3209 Op.setModifiers(Mods);
3217 return parseRegOrImmWithFPInputMods(
Operands,
false);
3222 return parseRegOrImmWithIntInputMods(
Operands,
false);
3226 auto Loc = getLoc();
3227 if (trySkipId(
"off")) {
3228 Operands.push_back(AMDGPUOperand::CreateImm(
this, 0, Loc,
3229 AMDGPUOperand::ImmTyOff,
false));
3236 std::unique_ptr<AMDGPUOperand>
Reg = parseRegister();
3238 Operands.push_back(std::move(Reg));
3246unsigned AMDGPUAsmParser::checkTargetMatchPredicate(
MCInst &Inst) {
3253 return Match_InvalidOperand;
3257 getForcedEncodingSize() != 64)
3258 return Match_PreferE32;
3260 if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3261 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3264 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::dst_sel);
3266 if (!
Op.isImm() ||
Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3267 return Match_InvalidOperand;
3271 return Match_Success;
3275 static const unsigned Variants[] = {
3286 if (isForcedDPP() && isForcedVOP3()) {
3290 if (getForcedEncodingSize() == 32) {
3295 if (isForcedVOP3()) {
3300 if (isForcedSDWA()) {
3306 if (isForcedDPP()) {
3314StringRef AMDGPUAsmParser::getMatchedVariantName()
const {
3315 if (isForcedDPP() && isForcedVOP3())
3318 if (getForcedEncodingSize() == 32)
3333unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(
const MCInst &Inst)
const {
3337 case AMDGPU::FLAT_SCR:
3339 case AMDGPU::VCC_LO:
3340 case AMDGPU::VCC_HI:
3347 return AMDGPU::NoRegister;
3354bool AMDGPUAsmParser::isInlineConstant(
const MCInst &Inst,
3355 unsigned OpIdx)
const {
3358 if (!AMDGPU::isSISrcOperand(Desc, OpIdx) ||
3359 AMDGPU::isKImmOperand(Desc, OpIdx)) {
3365 int64_t Val = MO.
getImm();
3366 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3370 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3372 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3375 if (
OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3376 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3377 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3378 return AMDGPU::isInlinableIntLiteral(Val);
3380 if (
OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3381 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3383 return AMDGPU::isInlinableIntLiteralV216(Val);
3385 if (
OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3386 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3388 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3390 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3397unsigned AMDGPUAsmParser::getConstantBusLimit(
unsigned Opcode)
const {
3403 case AMDGPU::V_LSHLREV_B64_e64:
3404 case AMDGPU::V_LSHLREV_B64_gfx10:
3405 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3406 case AMDGPU::V_LSHRREV_B64_e64:
3407 case AMDGPU::V_LSHRREV_B64_gfx10:
3408 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3409 case AMDGPU::V_ASHRREV_I64_e64:
3410 case AMDGPU::V_ASHRREV_I64_gfx10:
3411 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3412 case AMDGPU::V_LSHL_B64_e64:
3413 case AMDGPU::V_LSHR_B64_e64:
3414 case AMDGPU::V_ASHR_I64_e64:
3427 bool AddMandatoryLiterals =
false) {
3433 int16_t ImmDeferredIdx =
3450bool AMDGPUAsmParser::usesConstantBus(
const MCInst &Inst,
unsigned OpIdx) {
3453 return !isInlineConstant(Inst, OpIdx);
3454 }
else if (MO.
isReg()) {
3458 return isSGPR(PReg,
TRI) && PReg != SGPR_NULL;
3464bool AMDGPUAsmParser::validateConstantBusLimitations(
3466 const unsigned Opcode = Inst.
getOpcode();
3468 unsigned LastSGPR = AMDGPU::NoRegister;
3469 unsigned ConstantBusUseCount = 0;
3470 unsigned NumLiterals = 0;
3471 unsigned LiteralSize;
3480 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3486 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3487 if (SGPRUsed != AMDGPU::NoRegister) {
3488 SGPRsUsed.
insert(SGPRUsed);
3489 ++ConstantBusUseCount;
3494 for (
int OpIdx : OpIndices) {
3499 if (usesConstantBus(Inst, OpIdx)) {
3508 if (SGPRsUsed.
insert(LastSGPR).second) {
3509 ++ConstantBusUseCount;
3525 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3529 if (NumLiterals == 0) {
3532 }
else if (LiteralSize !=
Size) {
3538 ConstantBusUseCount += NumLiterals;
3540 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3546 Error(Loc,
"invalid operand (violates constant bus restrictions)");
3550bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3553 const unsigned Opcode = Inst.
getOpcode();
3559 auto getVRegIdx = [&](
unsigned,
unsigned OperandIdx) {
3567 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(getVRegIdx);
3568 if (!InvalidCompOprIdx)
3571 auto CompOprIdx = *InvalidCompOprIdx;
3573 std::max(InstInfo[
VOPD::X].getIndexInParsedOperands(CompOprIdx),
3574 InstInfo[
VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3577 auto Loc = ((AMDGPUOperand &)*
Operands[ParsedIdx]).getStartLoc();
3578 if (CompOprIdx == VOPD::Component::DST) {
3579 Error(Loc,
"one dst register must be even and the other odd");
3581 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3583 " operands must use different VGPR banks");
3589bool AMDGPUAsmParser::validateIntClampSupported(
const MCInst &Inst) {
3595 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3603bool AMDGPUAsmParser::validateMIMGDataSize(
const MCInst &Inst,
3604 const SMLoc &IDLoc) {
3612 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3613 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3614 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3621 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3622 unsigned TFESize = (TFEIdx != -1 && Inst.
getOperand(TFEIdx).
getImm()) ? 1 : 0;
3627 bool IsPackedD16 =
false;
3631 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3632 IsPackedD16 = D16Idx >= 0;
3634 DataSize = (DataSize + 1) / 2;
3637 if ((VDataSize / 4) == DataSize + TFESize)
3642 Modifiers = IsPackedD16 ?
"dmask and d16" :
"dmask";
3644 Modifiers = IsPackedD16 ?
"dmask, d16 and tfe" :
"dmask and tfe";
3646 Error(IDLoc,
Twine(
"image data size does not match ") + Modifiers);
3650bool AMDGPUAsmParser::validateMIMGAddrSize(
const MCInst &Inst) {
3660 AMDGPU::getMIMGBaseOpcodeInfo(
Info->BaseOpcode);
3661 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3662 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3663 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3664 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3668 assert(SrsrcIdx > VAddr0Idx);
3675 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3676 unsigned ActualAddrSize =
3677 IsNSA ? SrsrcIdx - VAddr0Idx
3678 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3681 unsigned ExpectedAddrSize =
3682 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16,
hasG16());
3685 if (ExpectedAddrSize > 12)
3686 ExpectedAddrSize = 16;
3691 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3695 return ActualAddrSize == ExpectedAddrSize;
3698bool AMDGPUAsmParser::validateMIMGAtomicDMask(
const MCInst &Inst) {
3708 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3715 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3718bool AMDGPUAsmParser::validateMIMGGatherDMask(
const MCInst &Inst) {
3726 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3734 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3737bool AMDGPUAsmParser::validateMIMGMSAA(
const MCInst &Inst) {
3746 AMDGPU::getMIMGBaseOpcodeInfo(
Info->BaseOpcode);
3748 if (!BaseOpcode->
MSAA)
3751 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3757 return DimInfo->
MSAA;
3763 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3764 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3765 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3775bool AMDGPUAsmParser::validateMovrels(
const MCInst &Inst,
3784 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3799 Error(ErrLoc,
"source operand must be a VGPR");
3803bool AMDGPUAsmParser::validateMAIAccWrite(
const MCInst &Inst,
3808 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3811 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3822 "source operand must be either a VGPR or an inline constant");
3829bool AMDGPUAsmParser::validateMAISrc2(
const MCInst &Inst,
3835 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
3842 if (Inst.
getOperand(Src2Idx).
isImm() && isInlineConstant(Inst, Src2Idx)) {
3844 "inline constants are not allowed for this operand");
3851bool AMDGPUAsmParser::validateMFMA(
const MCInst &Inst,
3859 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3869 if (Src2Reg == DstReg)
3873 if (
TRI->getRegClass(Desc.
operands()[0].RegClass).getSizeInBits() <= 128)
3876 if (
TRI->regsOverlap(Src2Reg, DstReg)) {
3878 "source 2 operand must not partially overlap with dst");
3885bool AMDGPUAsmParser::validateDivScale(
const MCInst &Inst) {
3889 case V_DIV_SCALE_F32_gfx6_gfx7:
3890 case V_DIV_SCALE_F32_vi:
3891 case V_DIV_SCALE_F32_gfx10:
3892 case V_DIV_SCALE_F64_gfx6_gfx7:
3893 case V_DIV_SCALE_F64_vi:
3894 case V_DIV_SCALE_F64_gfx10:
3900 for (
auto Name : {AMDGPU::OpName::src0_modifiers,
3901 AMDGPU::OpName::src2_modifiers,
3902 AMDGPU::OpName::src2_modifiers}) {
3913bool AMDGPUAsmParser::validateMIMGD16(
const MCInst &Inst) {
3921 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3933 case AMDGPU::V_SUBREV_F32_e32:
3934 case AMDGPU::V_SUBREV_F32_e64:
3935 case AMDGPU::V_SUBREV_F32_e32_gfx10:
3936 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3937 case AMDGPU::V_SUBREV_F32_e32_vi:
3938 case AMDGPU::V_SUBREV_F32_e64_gfx10:
3939 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3940 case AMDGPU::V_SUBREV_F32_e64_vi:
3942 case AMDGPU::V_SUBREV_CO_U32_e32:
3943 case AMDGPU::V_SUBREV_CO_U32_e64:
3944 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3945 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3947 case AMDGPU::V_SUBBREV_U32_e32:
3948 case AMDGPU::V_SUBBREV_U32_e64:
3949 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3950 case AMDGPU::V_SUBBREV_U32_e32_vi:
3951 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3952 case AMDGPU::V_SUBBREV_U32_e64_vi:
3954 case AMDGPU::V_SUBREV_U32_e32:
3955 case AMDGPU::V_SUBREV_U32_e64:
3956 case AMDGPU::V_SUBREV_U32_e32_gfx9:
3957 case AMDGPU::V_SUBREV_U32_e32_vi:
3958 case AMDGPU::V_SUBREV_U32_e64_gfx9:
3959 case AMDGPU::V_SUBREV_U32_e64_vi:
3961 case AMDGPU::V_SUBREV_F16_e32:
3962 case AMDGPU::V_SUBREV_F16_e64:
3963 case AMDGPU::V_SUBREV_F16_e32_gfx10:
3964 case AMDGPU::V_SUBREV_F16_e32_vi:
3965 case AMDGPU::V_SUBREV_F16_e64_gfx10:
3966 case AMDGPU::V_SUBREV_F16_e64_vi:
3968 case AMDGPU::V_SUBREV_U16_e32:
3969 case AMDGPU::V_SUBREV_U16_e64:
3970 case AMDGPU::V_SUBREV_U16_e32_vi:
3971 case AMDGPU::V_SUBREV_U16_e64_vi:
3973 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3974 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3975 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3977 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3978 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3980 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3981 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3983 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3984 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3986 case AMDGPU::V_LSHRREV_B32_e32:
3987 case AMDGPU::V_LSHRREV_B32_e64:
3988 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3989 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3990 case AMDGPU::V_LSHRREV_B32_e32_vi:
3991 case AMDGPU::V_LSHRREV_B32_e64_vi:
3992 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3993 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3995 case AMDGPU::V_ASHRREV_I32_e32:
3996 case AMDGPU::V_ASHRREV_I32_e64:
3997 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3998 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3999 case AMDGPU::V_ASHRREV_I32_e32_vi:
4000 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4001 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4002 case AMDGPU::V_ASHRREV_I32_e64_vi:
4004 case AMDGPU::V_LSHLREV_B32_e32:
4005 case AMDGPU::V_LSHLREV_B32_e64:
4006 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4007 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4008 case AMDGPU::V_LSHLREV_B32_e32_vi:
4009 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4010 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4011 case AMDGPU::V_LSHLREV_B32_e64_vi:
4013 case AMDGPU::V_LSHLREV_B16_e32:
4014 case AMDGPU::V_LSHLREV_B16_e64:
4015 case AMDGPU::V_LSHLREV_B16_e32_vi:
4016 case AMDGPU::V_LSHLREV_B16_e64_vi:
4017 case AMDGPU::V_LSHLREV_B16_gfx10:
4019 case AMDGPU::V_LSHRREV_B16_e32:
4020 case AMDGPU::V_LSHRREV_B16_e64:
4021 case AMDGPU::V_LSHRREV_B16_e32_vi:
4022 case AMDGPU::V_LSHRREV_B16_e64_vi:
4023 case AMDGPU::V_LSHRREV_B16_gfx10:
4025 case AMDGPU::V_ASHRREV_I16_e32:
4026 case AMDGPU::V_ASHRREV_I16_e64:
4027 case AMDGPU::V_ASHRREV_I16_e32_vi:
4028 case AMDGPU::V_ASHRREV_I16_e64_vi:
4029 case AMDGPU::V_ASHRREV_I16_gfx10:
4031 case AMDGPU::V_LSHLREV_B64_e64:
4032 case AMDGPU::V_LSHLREV_B64_gfx10:
4033 case AMDGPU::V_LSHLREV_B64_vi:
4035 case AMDGPU::V_LSHRREV_B64_e64:
4036 case AMDGPU::V_LSHRREV_B64_gfx10:
4037 case AMDGPU::V_LSHRREV_B64_vi:
4039 case AMDGPU::V_ASHRREV_I64_e64:
4040 case AMDGPU::V_ASHRREV_I64_gfx10:
4041 case AMDGPU::V_ASHRREV_I64_vi:
4043 case AMDGPU::V_PK_LSHLREV_B16:
4044 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4045 case AMDGPU::V_PK_LSHLREV_B16_vi:
4047 case AMDGPU::V_PK_LSHRREV_B16:
4048 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4049 case AMDGPU::V_PK_LSHRREV_B16_vi:
4050 case AMDGPU::V_PK_ASHRREV_I16:
4051 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4052 case AMDGPU::V_PK_ASHRREV_I16_vi:
4059std::optional<StringRef>
4060AMDGPUAsmParser::validateLdsDirect(
const MCInst &Inst) {
4062 using namespace SIInstrFlags;
4063 const unsigned Opcode = Inst.
getOpcode();
4069 if ((Desc.
TSFlags & Enc) == 0)
4070 return std::nullopt;
4072 for (
auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4077 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4080 return StringRef(
"lds_direct is not supported on this GPU");
4083 return StringRef(
"lds_direct cannot be used with this instruction");
4085 if (SrcName != OpName::src0)
4086 return StringRef(
"lds_direct may be used as src0 only");
4090 return std::nullopt;
4094 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
4095 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4096 if (
Op.isFlatOffset())
4097 return Op.getStartLoc();
4102bool AMDGPUAsmParser::validateFlatOffset(
const MCInst &Inst,
4109 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4113 if (!hasFlatOffsets() &&
Op.getImm() != 0) {
4115 "flat offset modifier is not supported on this GPU");
4121 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4122 bool AllowNegative =
4124 if (!
isIntN(OffsetSize,
Op.getImm()) || (!AllowNegative &&
Op.getImm() < 0)) {
4126 Twine(
"expected a ") +
4127 (AllowNegative ?
Twine(OffsetSize) +
"-bit signed offset"
4128 :
Twine(OffsetSize - 1) +
"-bit unsigned offset"));
4137 for (
unsigned i = 2, e =
Operands.size(); i != e; ++i) {
4138 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4139 if (
Op.isSMEMOffset())
4140 return Op.getStartLoc();
4145bool AMDGPUAsmParser::validateSMEMOffset(
const MCInst &Inst,
4155 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4164 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4165 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(),
Offset) ||
4166 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(),
Offset, IsBuffer))
4170 (
isVI() || IsBuffer) ?
"expected a 20-bit unsigned offset" :
4171 "expected a 21-bit signed offset");
4176bool AMDGPUAsmParser::validateSOPLiteral(
const MCInst &Inst)
const {
4182 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4183 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4185 const int OpIndices[] = { Src0Idx, Src1Idx };
4187 unsigned NumExprs = 0;
4188 unsigned NumLiterals = 0;
4191 for (
int OpIdx : OpIndices) {
4192 if (OpIdx == -1)
break;
4196 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4197 if (MO.
isImm() && !isInlineConstant(Inst, OpIdx)) {
4199 if (NumLiterals == 0 || LiteralValue !=
Value) {
4203 }
else if (MO.
isExpr()) {
4209 return NumLiterals + NumExprs <= 1;
4212bool AMDGPUAsmParser::validateOpSel(
const MCInst &Inst) {
4215 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4225 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4226 if (OpSelIdx != -1) {
4230 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4231 if (OpSelHiIdx != -1) {
4240 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4249bool AMDGPUAsmParser::validateDPP(
const MCInst &Inst,
4252 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4257 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4259 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4263 Error(S,
"64 bit dpp only supports row_newbcast");
4272bool AMDGPUAsmParser::validateVccOperand(
unsigned Reg)
const {
4273 auto FB = getFeatureBits();
4274 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4275 (FB[AMDGPU::FeatureWavefrontSize32] &&
Reg == AMDGPU::VCC_LO);
4279bool AMDGPUAsmParser::validateVOPLiteral(
const MCInst &Inst,
4285 !HasMandatoryLiteral && !
isVOPD(Opcode))
4290 unsigned NumExprs = 0;
4291 unsigned NumLiterals = 0;
4294 for (
int OpIdx : OpIndices) {
4304 if (MO.
isImm() && !isInlineConstant(Inst, OpIdx)) {
4306 if (NumLiterals == 0 || LiteralValue !=
Value) {
4310 }
else if (MO.
isExpr()) {
4314 NumLiterals += NumExprs;
4319 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4320 Error(getLitLoc(
Operands),
"literal operands are not supported");
4324 if (NumLiterals > 1) {
4325 Error(getLitLoc(
Operands,
true),
"only one unique literal operand is allowed");
4335 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), NameIdx);
4343 unsigned Sub =
MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4344 auto Reg = Sub ? Sub : Op.getReg();
4346 return AGPR32.
contains(Reg) ? 1 : 0;
4349bool AMDGPUAsmParser::validateAGPRLdSt(
const MCInst &Inst)
const {
4357 : AMDGPU::OpName::vdata;
4365 if (Data2Areg >= 0 && Data2Areg != DataAreg)
4369 auto FB = getFeatureBits();
4370 if (FB[AMDGPU::FeatureGFX90AInsts]) {
4371 if (DataAreg < 0 || DstAreg < 0)
4373 return DstAreg == DataAreg;
4376 return DstAreg < 1 && DataAreg < 1;
4379bool AMDGPUAsmParser::validateVGPRAlign(
const MCInst &Inst)
const {
4380 auto FB = getFeatureBits();
4381 if (!FB[AMDGPU::FeatureGFX90AInsts])
4392 unsigned Sub =
MRI->getSubReg(
Op.getReg(), AMDGPU::sub0);
4396 if (VGPR32.
contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4398 if (AGPR32.
contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4406 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
4407 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4409 return Op.getStartLoc();
4414bool AMDGPUAsmParser::validateBLGP(
const MCInst &Inst,
4417 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4424 auto FB = getFeatureBits();
4425 bool UsesNeg =
false;
4426 if (FB[AMDGPU::FeatureGFX940Insts]) {
4428 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4429 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4430 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4431 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4436 if (IsNeg == UsesNeg)
4440 UsesNeg ?
"invalid modifier: blgp is not supported"
4441 :
"invalid modifier: neg is not supported");
4446bool AMDGPUAsmParser::validateWaitCnt(
const MCInst &Inst,
4452 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4453 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4454 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4455 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4458 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
4461 if (Reg == AMDGPU::SGPR_NULL)
4465 Error(RegLoc,
"src0 must be null");
4471bool AMDGPUAsmParser::validateGWS(
const MCInst &Inst,
4473 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4477 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4478 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4484 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::data0);
4487 auto RegIdx =
Reg - (VGPR32.
contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4490 Error(RegLoc,
"vgpr must be even aligned");
4497bool AMDGPUAsmParser::validateCoherencyBits(
const MCInst &Inst,
4499 const SMLoc &IDLoc) {
4500 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.
getOpcode(),
4501 AMDGPU::OpName::cpol);
4511 Error(S,
"cache policy is not supported for SMRD instructions");
4515 Error(IDLoc,
"invalid cache policy for SMEM instruction");
4524 Error(S,
"scc is not supported on this GPU");
4534 :
"instruction must use glc");
4542 &CStr.data()[CStr.find(
isGFX940() ?
"sc0" :
"glc")]);
4544 :
"instruction must not use glc");
4556 if (!Operand->isReg())
4558 unsigned Reg = Operand->getReg();
4559 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
4561 "execz and vccz are not supported on this GPU");
4568bool AMDGPUAsmParser::validateTFE(
const MCInst &Inst,
4575 Error(Loc,
"TFE modifier has no meaning for store instructions");
4583bool AMDGPUAsmParser::validateInstruction(
const MCInst &Inst,
4586 if (
auto ErrMsg = validateLdsDirect(Inst)) {
4590 if (!validateSOPLiteral(Inst)) {
4592 "only one unique literal operand is allowed");
4595 if (!validateVOPLiteral(Inst,
Operands)) {
4598 if (!validateConstantBusLimitations(Inst,
Operands)) {
4601 if (!validateVOPDRegBankConstraints(Inst,
Operands)) {
4604 if (!validateIntClampSupported(Inst)) {
4606 "integer clamping is not supported on this GPU");
4609 if (!validateOpSel(Inst)) {
4611 "invalid op_sel operand");
4614 if (!validateDPP(Inst,
Operands)) {
4618 if (!validateMIMGD16(Inst)) {
4620 "d16 modifier is not supported on this GPU");
4623 if (!validateMIMGMSAA(Inst)) {
4625 "invalid dim; must be MSAA type");
4628 if (!validateMIMGDataSize(Inst, IDLoc)) {
4631 if (!validateMIMGAddrSize(Inst)) {
4633 "image address size does not match dim and a16");
4636 if (!validateMIMGAtomicDMask(Inst)) {
4638 "invalid atomic image dmask");
4641 if (!validateMIMGGatherDMask(Inst)) {
4643 "invalid image_gather dmask: only one bit must be set");
4646 if (!validateMovrels(Inst,
Operands)) {
4649 if (!validateFlatOffset(Inst,
Operands)) {
4652 if (!validateSMEMOffset(Inst,
Operands)) {
4655 if (!validateMAIAccWrite(Inst,
Operands)) {
4658 if (!validateMAISrc2(Inst,
Operands)) {
4661 if (!validateMFMA(Inst,
Operands)) {
4664 if (!validateCoherencyBits(Inst,
Operands, IDLoc)) {
4668 if (!validateAGPRLdSt(Inst)) {
4669 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4670 ?
"invalid register class: data and dst should be all VGPR or AGPR"
4671 :
"invalid register class: agpr loads and stores not supported on this GPU"
4675 if (!validateVGPRAlign(Inst)) {
4677 "invalid register class: vgpr tuples must be 64 bit aligned");
4680 if (!validateGWS(Inst,
Operands)) {
4684 if (!validateBLGP(Inst,
Operands)) {
4688 if (!validateDivScale(Inst)) {
4689 Error(IDLoc,
"ABS not allowed in VOP3B instructions");
4692 if (!validateWaitCnt(Inst,
Operands)) {
4695 if (!validateExeczVcczOperands(
Operands)) {
4698 if (!validateTFE(Inst,
Operands)) {
4707 unsigned VariantID = 0);
4711 unsigned VariantID);
4713bool AMDGPUAsmParser::isSupportedMnemo(
StringRef Mnemo,
4718bool AMDGPUAsmParser::isSupportedMnemo(
StringRef Mnemo,
4721 for (
auto Variant : Variants) {
4729bool AMDGPUAsmParser::checkUnsupportedInstruction(
StringRef Mnemo,
4730 const SMLoc &IDLoc) {
4731 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
4734 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4739 getParser().clearPendingErrors();
4743 StringRef VariantName = getMatchedVariantName();
4744 if (!VariantName.
empty() && isSupportedMnemo(Mnemo, FBS)) {
4747 " variant of this instruction is not supported"));
4751 if (
isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
4752 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
4755 FeaturesWS32.
flip(AMDGPU::FeatureWavefrontSize64)
4756 .
flip(AMDGPU::FeatureWavefrontSize32);
4758 ComputeAvailableFeatures(FeaturesWS32);
4760 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
4761 return Error(IDLoc,
"instruction requires wavesize=32");
4766 return Error(IDLoc,
"instruction not supported on this GPU");
4771 return Error(IDLoc,
"invalid instruction" + Suggestion);
4777 const auto &Op = ((AMDGPUOperand &)*
Operands[InvalidOprIdx]);
4778 if (Op.isToken() && InvalidOprIdx > 1) {
4779 const auto &PrevOp = ((AMDGPUOperand &)*
Operands[InvalidOprIdx - 1]);
4780 return PrevOp.isToken() && PrevOp.getToken() ==
"::";
4785bool AMDGPUAsmParser::MatchAndEmitInstruction(
SMLoc IDLoc,
unsigned &Opcode,
4789 bool MatchingInlineAsm) {
4791 unsigned Result = Match_Success;
4792 for (
auto Variant : getMatchedVariants()) {
4794 auto R = MatchInstructionImpl(
Operands, Inst, EI, MatchingInlineAsm,
4799 if ((R == Match_Success) ||
4800 (R == Match_PreferE32) ||
4801 (R == Match_MissingFeature && Result != Match_PreferE32) ||
4802 (R == Match_InvalidOperand && Result != Match_MissingFeature
4803 && Result != Match_PreferE32) ||
4804 (R == Match_MnemonicFail && Result != Match_InvalidOperand
4805 && Result != Match_MissingFeature
4806 && Result != Match_PreferE32)) {
4810 if (R == Match_Success)
4814 if (Result == Match_Success) {
4815 if (!validateInstruction(Inst, IDLoc,
Operands)) {
4824 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4830 case Match_MissingFeature:
4834 return Error(IDLoc,
"operands are not valid for this GPU or mode");
4836 case Match_InvalidOperand: {
4837 SMLoc ErrorLoc = IDLoc;
4840 return Error(IDLoc,
"too few operands for instruction");
4843 if (ErrorLoc ==
SMLoc())
4847 return Error(ErrorLoc,
"invalid VOPDY instruction");
4849 return Error(ErrorLoc,
"invalid operand for instruction");
4852 case Match_PreferE32:
4853 return Error(IDLoc,
"internal error: instruction without _e64 suffix "
4854 "should be encoded as e32");
4855 case Match_MnemonicFail:
4861bool AMDGPUAsmParser::ParseAsAbsoluteExpression(
uint32_t &Ret) {
4866 if (getParser().parseAbsoluteExpression(Tmp)) {
4873bool AMDGPUAsmParser::ParseDirectiveMajorMinor(
uint32_t &Major,
4875 if (ParseAsAbsoluteExpression(Major))
4876 return TokError(
"invalid major version");
4879 return TokError(
"minor version number required, comma expected");
4881 if (ParseAsAbsoluteExpression(Minor))
4882 return TokError(
"invalid minor version");
4887bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4889 return TokError(
"directive only supported for amdgcn architecture");
4891 std::string TargetIDDirective;
4892 SMLoc TargetStart = getTok().getLoc();
4893 if (getParser().parseEscapedString(TargetIDDirective))
4897 if (getTargetStreamer().getTargetID()->
toString() != TargetIDDirective)
4898 return getParser().Error(TargetRange.
Start,
4899 (
Twine(
".amdgcn_target directive's target id ") +
4900 Twine(TargetIDDirective) +
4901 Twine(
" does not match the specified target id ") +
4902 Twine(getTargetStreamer().getTargetID()->
toString())).str());
4907bool AMDGPUAsmParser::OutOfRangeError(
SMRange Range) {
4908 return Error(
Range.Start,
"value out of range", Range);
4911bool AMDGPUAsmParser::calculateGPRBlocks(
4912 const FeatureBitset &Features,
bool VCCUsed,
bool FlatScrUsed,
4913 bool XNACKUsed, std::optional<bool> EnableWavefrontSize32,
4914 unsigned NextFreeVGPR,
SMRange VGPRRange,
unsigned NextFreeSGPR,
4915 SMRange SGPRRange,
unsigned &VGPRBlocks,
unsigned &SGPRBlocks) {
4926 unsigned MaxAddressableNumSGPRs =
4929 if (
Version.Major >= 8 && !Features.
test(FeatureSGPRInitBug) &&
4930 NumSGPRs > MaxAddressableNumSGPRs)
4931 return OutOfRangeError(SGPRRange);
4936 if ((
Version.Major <= 7 || Features.
test(FeatureSGPRInitBug)) &&
4937 NumSGPRs > MaxAddressableNumSGPRs)
4938 return OutOfRangeError(SGPRRange);
4940 if (Features.
test(FeatureSGPRInitBug))
4951bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4953 return TokError(
"directive only supported for amdgcn architecture");
4956 return TokError(
"directive only supported for amdhsa OS");
4959 if (getParser().parseIdentifier(KernelName))
4976 unsigned ImpliedUserSGPRCount = 0;
4980 std::optional<unsigned> ExplicitUserSGPRCount;
4981 bool ReserveVCC =
true;
4982 bool ReserveFlatScr =
true;
4983 std::optional<bool> EnableWavefrontSize32;
4989 SMRange IDRange = getTok().getLocRange();
4990 if (!parseId(
ID,
"expected .amdhsa_ directive or .end_amdhsa_kernel"))
4993 if (
ID ==
".end_amdhsa_kernel")
4997 return TokError(
".amdhsa_ directives cannot be repeated");
4999 SMLoc ValStart = getLoc();
5001 if (getParser().parseAbsoluteExpression(IVal))
5003 SMLoc ValEnd = getLoc();
5007 return OutOfRangeError(ValRange);
5011#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
5012 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
5013 return OutOfRangeError(RANGE); \
5014 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
5016 if (
ID ==
".amdhsa_group_segment_fixed_size") {
5018 return OutOfRangeError(ValRange);
5020 }
else if (
ID ==
".amdhsa_private_segment_fixed_size") {
5022 return OutOfRangeError(ValRange);
5024 }
else if (
ID ==
".amdhsa_kernarg_size") {
5026 return OutOfRangeError(ValRange);
5028 }
else if (
ID ==
".amdhsa_user_sgpr_count") {
5029 ExplicitUserSGPRCount = Val;
5030 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_buffer") {
5033 "directive is not supported with architected flat scratch",
5036 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5039 ImpliedUserSGPRCount += 4;
5040 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_ptr") {
5042 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
5045 ImpliedUserSGPRCount += 2;
5046 }
else if (
ID ==
".amdhsa_user_sgpr_queue_ptr") {
5048 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
5051 ImpliedUserSGPRCount += 2;
5052 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_segment_ptr") {
5054 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5057 ImpliedUserSGPRCount += 2;
5058 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_id") {
5060 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
5063 ImpliedUserSGPRCount += 2;
5064 }
else if (
ID ==
".amdhsa_user_sgpr_flat_scratch_init") {
5067 "directive is not supported with architected flat scratch",
5070 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
5073 ImpliedUserSGPRCount += 2;
5074 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_size") {
5076 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5079 ImpliedUserSGPRCount += 1;
5080 }
else if (
ID ==
".amdhsa_wavefront_size32") {
5081 if (IVersion.
Major < 10)
5082 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5083 EnableWavefrontSize32 = Val;
5085 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
5087 }
else if (
ID ==
".amdhsa_uses_dynamic_stack") {
5089 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange);
5090 }
else if (
ID ==
".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5093 "directive is not supported with architected flat scratch",
5096 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5097 }
else if (
ID ==
".amdhsa_enable_private_segment") {
5101 "directive is not supported without architected flat scratch",
5104 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5105 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_x") {
5107 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
5109 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_y") {
5111 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
5113 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_z") {
5115 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
5117 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_info") {
5119 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
5121 }
else if (
ID ==
".amdhsa_system_vgpr_workitem_id") {
5123 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
5125 }
else if (
ID ==
".amdhsa_next_free_vgpr") {
5126 VGPRRange = ValRange;
5128 }
else if (
ID ==
".amdhsa_next_free_sgpr") {
5129 SGPRRange = ValRange;
5131 }
else if (
ID ==
".amdhsa_accum_offset") {
5133 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5135 }
else if (
ID ==
".amdhsa_reserve_vcc") {
5136 if (!isUInt<1>(Val))
5137 return OutOfRangeError(ValRange);
5139 }
else if (
ID ==
".amdhsa_reserve_flat_scratch") {
5140 if (IVersion.
Major < 7)
5141 return Error(IDRange.
Start,
"directive requires gfx7+", IDRange);
5144 "directive is not supported with architected flat scratch",
5146 if (!isUInt<1>(Val))
5147 return OutOfRangeError(ValRange);
5148 ReserveFlatScr = Val;
5149 }
else if (
ID ==
".amdhsa_reserve_xnack_mask") {
5150 if (IVersion.
Major < 8)
5151 return Error(IDRange.
Start,
"directive requires gfx8+", IDRange);
5152 if (!isUInt<1>(Val))
5153 return OutOfRangeError(ValRange);
5154 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5155 return getParser().Error(IDRange.
Start,
".amdhsa_reserve_xnack_mask does not match target id",
5157 }
else if (
ID ==
".amdhsa_float_round_mode_32") {
5159 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
5160 }
else if (
ID ==
".amdhsa_float_round_mode_16_64") {
5162 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
5163 }
else if (
ID ==
".amdhsa_float_denorm_mode_32") {
5165 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
5166 }
else if (
ID ==
".amdhsa_float_denorm_mode_16_64") {
5168 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
5170 }
else if (
ID ==
".amdhsa_dx10_clamp") {
5172 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
5173 }
else if (
ID ==
".amdhsa_ieee_mode") {
5176 }
else if (
ID ==
".amdhsa_fp16_overflow") {
5177 if (IVersion.
Major < 9)
5178 return Error(IDRange.
Start,
"directive requires gfx9+", IDRange);
5181 }
else if (
ID ==
".amdhsa_tg_split") {
5183 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5186 }
else if (
ID ==
".amdhsa_workgroup_processor_mode") {
5187 if (IVersion.
Major < 10)
5188 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5191 }
else if (
ID ==
".amdhsa_memory_ordered") {
5192 if (IVersion.
Major < 10)
5193 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5196 }
else if (
ID ==
".amdhsa_forward_progress") {
5197 if (IVersion.
Major < 10)
5198 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5201 }
else if (
ID ==
".amdhsa_shared_vgpr_count") {
5202 if (IVersion.
Major < 10)
5203 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5204 SharedVGPRCount = Val;
5206 COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT, Val,
5208 }
else if (
ID ==
".amdhsa_exception_fp_ieee_invalid_op") {
5211 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
5213 }
else if (
ID ==
".amdhsa_exception_fp_denorm_src") {
5215 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5217 }
else if (
ID ==
".amdhsa_exception_fp_ieee_div_zero") {
5220 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
5222 }
else if (
ID ==
".amdhsa_exception_fp_ieee_overflow") {
5224 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5226 }
else if (
ID ==
".amdhsa_exception_fp_ieee_underflow") {
5228 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5230 }
else if (
ID ==
".amdhsa_exception_fp_ieee_inexact") {
5232 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5234 }
else if (
ID ==
".amdhsa_exception_int_div_zero") {
5236 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5239 return Error(IDRange.
Start,
"unknown .amdhsa_kernel directive", IDRange);
5242#undef PARSE_BITS_ENTRY
5245 if (Seen.
find(
".amdhsa_next_free_vgpr") == Seen.
end())
5246 return TokError(
".amdhsa_next_free_vgpr directive is required");
5248 if (Seen.
find(
".amdhsa_next_free_sgpr") == Seen.
end())
5249 return TokError(
".amdhsa_next_free_sgpr directive is required");
5251 unsigned VGPRBlocks;
5252 unsigned SGPRBlocks;
5253 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5254 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5255 EnableWavefrontSize32, NextFreeVGPR,
5256 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5260 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5262 return OutOfRangeError(VGPRRange);
5264 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
5266 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5268 return OutOfRangeError(SGPRRange);
5270 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
5273 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5274 return TokError(
"amdgpu_user_sgpr_count smaller than than implied by "
5275 "enabled user SGPRs");
5277 unsigned UserSGPRCount =
5278 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5280 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5281 return TokError(
"too many user SGPRs enabled");
5286 if (Seen.
find(
".amdhsa_accum_offset") == Seen.
end())
5287 return TokError(
".amdhsa_accum_offset directive is required");
5288 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5289 return TokError(
"accum_offset should be in range [4..256] in "
5292 return TokError(
"accum_offset exceeds total VGPR allocation");
5294 (AccumOffset / 4 - 1));
5297 if (IVersion.
Major == 10) {
5299 if (SharedVGPRCount && EnableWavefrontSize32) {
5300 return TokError(
"shared_vgpr_count directive not valid on "
5301 "wavefront size 32");
5303 if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5304 return TokError(
"shared_vgpr_count*2 + "
5305 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5310 getTargetStreamer().EmitAmdhsaKernelDescriptor(
5311 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,