51enum RegisterKind { IS_UNKNOWN,
IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
65 SMLoc StartLoc, EndLoc;
66 const AMDGPUAsmParser *AsmParser;
69 AMDGPUOperand(KindTy Kind_,
const AMDGPUAsmParser *AsmParser_)
70 :
Kind(Kind_), AsmParser(AsmParser_) {}
72 using Ptr = std::unique_ptr<AMDGPUOperand>;
79 bool hasFPModifiers()
const {
return Abs || Neg; }
80 bool hasIntModifiers()
const {
return Sext; }
81 bool hasModifiers()
const {
return hasFPModifiers() || hasIntModifiers(); }
83 int64_t getFPModifiersOperand()
const {
90 int64_t getIntModifiersOperand()
const {
96 int64_t getModifiersOperand()
const {
97 assert(!(hasFPModifiers() && hasIntModifiers())
98 &&
"fp and int modifiers should not be used simultaneously");
99 if (hasFPModifiers()) {
100 return getFPModifiersOperand();
101 }
else if (hasIntModifiers()) {
102 return getIntModifiersOperand();
180 ImmKindTyMandatoryLiteral,
194 mutable ImmKindTy
Kind;
211 bool isToken()
const override {
return Kind == Token; }
213 bool isSymbolRefExpr()
const {
214 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
217 bool isImm()
const override {
218 return Kind == Immediate;
221 void setImmKindNone()
const {
223 Imm.Kind = ImmKindTyNone;
226 void setImmKindLiteral()
const {
228 Imm.Kind = ImmKindTyLiteral;
231 void setImmKindMandatoryLiteral()
const {
233 Imm.Kind = ImmKindTyMandatoryLiteral;
236 void setImmKindConst()
const {
238 Imm.Kind = ImmKindTyConst;
241 bool IsImmKindLiteral()
const {
242 return isImm() &&
Imm.Kind == ImmKindTyLiteral;
245 bool IsImmKindMandatoryLiteral()
const {
246 return isImm() &&
Imm.Kind == ImmKindTyMandatoryLiteral;
249 bool isImmKindConst()
const {
250 return isImm() &&
Imm.Kind == ImmKindTyConst;
253 bool isInlinableImm(
MVT type)
const;
254 bool isLiteralImm(
MVT type)
const;
256 bool isRegKind()
const {
260 bool isReg()
const override {
261 return isRegKind() && !hasModifiers();
264 bool isRegOrInline(
unsigned RCID,
MVT type)
const {
265 return isRegClass(RCID) || isInlinableImm(type);
269 return isRegOrInline(RCID, type) || isLiteralImm(type);
272 bool isRegOrImmWithInt16InputMods()
const {
276 bool isRegOrImmWithIntT16InputMods()
const {
280 bool isRegOrImmWithInt32InputMods()
const {
284 bool isRegOrInlineImmWithInt16InputMods()
const {
285 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
288 bool isRegOrInlineImmWithInt32InputMods()
const {
289 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
292 bool isRegOrImmWithInt64InputMods()
const {
296 bool isRegOrImmWithFP16InputMods()
const {
300 bool isRegOrImmWithFPT16InputMods()
const {
304 bool isRegOrImmWithFP32InputMods()
const {
308 bool isRegOrImmWithFP64InputMods()
const {
312 bool isRegOrInlineImmWithFP16InputMods()
const {
313 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16);
316 bool isRegOrInlineImmWithFP32InputMods()
const {
317 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
321 bool isVReg()
const {
322 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
323 isRegClass(AMDGPU::VReg_64RegClassID) ||
324 isRegClass(AMDGPU::VReg_96RegClassID) ||
325 isRegClass(AMDGPU::VReg_128RegClassID) ||
326 isRegClass(AMDGPU::VReg_160RegClassID) ||
327 isRegClass(AMDGPU::VReg_192RegClassID) ||
328 isRegClass(AMDGPU::VReg_256RegClassID) ||
329 isRegClass(AMDGPU::VReg_512RegClassID) ||
330 isRegClass(AMDGPU::VReg_1024RegClassID);
333 bool isVReg32()
const {
334 return isRegClass(AMDGPU::VGPR_32RegClassID);
337 bool isVReg32OrOff()
const {
338 return isOff() || isVReg32();
341 bool isNull()
const {
342 return isRegKind() &&
getReg() == AMDGPU::SGPR_NULL;
345 bool isVRegWithInputMods()
const;
346 bool isT16VRegWithInputMods()
const;
348 bool isSDWAOperand(
MVT type)
const;
349 bool isSDWAFP16Operand()
const;
350 bool isSDWAFP32Operand()
const;
351 bool isSDWAInt16Operand()
const;
352 bool isSDWAInt32Operand()
const;
354 bool isImmTy(ImmTy ImmT)
const {
358 bool isImmLiteral()
const {
return isImmTy(ImmTyNone); }
360 bool isImmModifier()
const {
361 return isImm() &&
Imm.Type != ImmTyNone;
364 bool isClampSI()
const {
return isImmTy(ImmTyClampSI); }
365 bool isOModSI()
const {
return isImmTy(ImmTyOModSI); }
366 bool isDMask()
const {
return isImmTy(ImmTyDMask); }
367 bool isDim()
const {
return isImmTy(ImmTyDim); }
368 bool isUNorm()
const {
return isImmTy(ImmTyUNorm); }
369 bool isDA()
const {
return isImmTy(ImmTyDA); }
370 bool isR128A16()
const {
return isImmTy(ImmTyR128A16); }
371 bool isA16()
const {
return isImmTy(ImmTyA16); }
372 bool isLWE()
const {
return isImmTy(ImmTyLWE); }
373 bool isOff()
const {
return isImmTy(ImmTyOff); }
374 bool isExpTgt()
const {
return isImmTy(ImmTyExpTgt); }
375 bool isExpVM()
const {
return isImmTy(ImmTyExpVM); }
376 bool isExpCompr()
const {
return isImmTy(ImmTyExpCompr); }
377 bool isOffen()
const {
return isImmTy(ImmTyOffen); }
378 bool isIdxen()
const {
return isImmTy(ImmTyIdxen); }
379 bool isAddr64()
const {
return isImmTy(ImmTyAddr64); }
380 bool isOffset()
const {
return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
381 bool isOffset0()
const {
return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
382 bool isOffset1()
const {
return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
383 bool isSMEMOffsetMod()
const {
return isImmTy(ImmTySMEMOffsetMod); }
384 bool isFlatOffset()
const {
return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
385 bool isGDS()
const {
return isImmTy(ImmTyGDS); }
386 bool isLDS()
const {
return isImmTy(ImmTyLDS); }
387 bool isCPol()
const {
return isImmTy(ImmTyCPol); }
388 bool isTFE()
const {
return isImmTy(ImmTyTFE); }
389 bool isD16()
const {
return isImmTy(ImmTyD16); }
390 bool isFORMAT()
const {
return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
391 bool isDppBankMask()
const {
return isImmTy(ImmTyDppBankMask); }
392 bool isDppRowMask()
const {
return isImmTy(ImmTyDppRowMask); }
393 bool isDppBoundCtrl()
const {
return isImmTy(ImmTyDppBoundCtrl); }
394 bool isDppFI()
const {
return isImmTy(ImmTyDppFI); }
395 bool isSDWADstSel()
const {
return isImmTy(ImmTySDWADstSel); }
396 bool isSDWASrc0Sel()
const {
return isImmTy(ImmTySDWASrc0Sel); }
397 bool isSDWASrc1Sel()
const {
return isImmTy(ImmTySDWASrc1Sel); }
398 bool isSDWADstUnused()
const {
return isImmTy(ImmTySDWADstUnused); }
399 bool isInterpSlot()
const {
return isImmTy(ImmTyInterpSlot); }
400 bool isInterpAttr()
const {
return isImmTy(ImmTyInterpAttr); }
401 bool isInterpAttrChan()
const {
return isImmTy(ImmTyInterpAttrChan); }
402 bool isOpSel()
const {
return isImmTy(ImmTyOpSel); }
403 bool isOpSelHi()
const {
return isImmTy(ImmTyOpSelHi); }
404 bool isNegLo()
const {
return isImmTy(ImmTyNegLo); }
405 bool isNegHi()
const {
return isImmTy(ImmTyNegHi); }
406 bool isHigh()
const {
return isImmTy(ImmTyHigh); }
408 bool isRegOrImm()
const {
412 bool isRegClass(
unsigned RCID)
const;
416 bool isRegOrInlineNoMods(
unsigned RCID,
MVT type)
const {
417 return isRegOrInline(RCID, type) && !hasModifiers();
420 bool isSCSrcB16()
const {
421 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
424 bool isSCSrcV2B16()
const {
428 bool isSCSrcB32()
const {
429 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
432 bool isSCSrcB64()
const {
433 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
436 bool isBoolReg()
const;
438 bool isSCSrcF16()
const {
439 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
442 bool isSCSrcV2F16()
const {
446 bool isSCSrcF32()
const {
447 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
450 bool isSCSrcF64()
const {
451 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
454 bool isSSrcB32()
const {
455 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
458 bool isSSrcB16()
const {
459 return isSCSrcB16() || isLiteralImm(MVT::i16);
462 bool isSSrcV2B16()
const {
467 bool isSSrcB64()
const {
470 return isSCSrcB64() || isLiteralImm(MVT::i64);
473 bool isSSrcF32()
const {
474 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
477 bool isSSrcF64()
const {
478 return isSCSrcB64() || isLiteralImm(MVT::f64);
481 bool isSSrcF16()
const {
482 return isSCSrcB16() || isLiteralImm(MVT::f16);
485 bool isSSrcV2F16()
const {
490 bool isSSrcV2FP32()
const {
495 bool isSCSrcV2FP32()
const {
500 bool isSSrcV2INT32()
const {
505 bool isSCSrcV2INT32()
const {
510 bool isSSrcOrLdsB32()
const {
511 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
512 isLiteralImm(MVT::i32) || isExpr();
515 bool isVCSrcB32()
const {
516 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
519 bool isVCSrcB64()
const {
520 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
523 bool isVCSrcTB16()
const {
524 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
527 bool isVCSrcTB16_Lo128()
const {
528 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
531 bool isVCSrcFake16B16_Lo128()
const {
532 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
535 bool isVCSrcB16()
const {
536 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
539 bool isVCSrcV2B16()
const {
543 bool isVCSrcF32()
const {
544 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
547 bool isVCSrcF64()
const {
548 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
551 bool isVCSrcTF16()
const {
552 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
555 bool isVCSrcTF16_Lo128()
const {
556 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
559 bool isVCSrcFake16F16_Lo128()
const {
560 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
563 bool isVCSrcF16()
const {
564 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
567 bool isVCSrcV2F16()
const {
571 bool isVSrcB32()
const {
572 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
575 bool isVSrcB64()
const {
576 return isVCSrcF64() || isLiteralImm(MVT::i64);
579 bool isVSrcTB16()
const {
return isVCSrcTB16() || isLiteralImm(MVT::i16); }
581 bool isVSrcTB16_Lo128()
const {
582 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
585 bool isVSrcFake16B16_Lo128()
const {
586 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
589 bool isVSrcB16()
const {
590 return isVCSrcB16() || isLiteralImm(MVT::i16);
593 bool isVSrcV2B16()
const {
594 return isVSrcB16() || isLiteralImm(MVT::v2i16);
597 bool isVCSrcV2FP32()
const {
601 bool isVSrcV2FP32()
const {
602 return isVSrcF64() || isLiteralImm(MVT::v2f32);
605 bool isVCSrcV2INT32()
const {
609 bool isVSrcV2INT32()
const {
610 return isVSrcB64() || isLiteralImm(MVT::v2i32);
613 bool isVSrcF32()
const {
614 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
617 bool isVSrcF64()
const {
618 return isVCSrcF64() || isLiteralImm(MVT::f64);
621 bool isVSrcTF16()
const {
return isVCSrcTF16() || isLiteralImm(MVT::f16); }
623 bool isVSrcTF16_Lo128()
const {
624 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
627 bool isVSrcFake16F16_Lo128()
const {
628 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
631 bool isVSrcF16()
const {
632 return isVCSrcF16() || isLiteralImm(MVT::f16);
635 bool isVSrcV2F16()
const {
636 return isVSrcF16() || isLiteralImm(MVT::v2f16);
639 bool isVISrcB32()
const {
640 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
643 bool isVISrcB16()
const {
644 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
647 bool isVISrcV2B16()
const {
651 bool isVISrcF32()
const {
652 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
655 bool isVISrcF16()
const {
656 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
659 bool isVISrcV2F16()
const {
660 return isVISrcF16() || isVISrcB32();
663 bool isVISrc_64B64()
const {
664 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
667 bool isVISrc_64F64()
const {
668 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
671 bool isVISrc_64V2FP32()
const {
672 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
675 bool isVISrc_64V2INT32()
const {
676 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
679 bool isVISrc_256B64()
const {
680 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
683 bool isVISrc_256F64()
const {
684 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
687 bool isVISrc_128B16()
const {
688 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
691 bool isVISrc_128V2B16()
const {
692 return isVISrc_128B16();
695 bool isVISrc_128B32()
const {
696 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
699 bool isVISrc_128F32()
const {
700 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
703 bool isVISrc_256V2FP32()
const {
704 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
707 bool isVISrc_256V2INT32()
const {
708 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
711 bool isVISrc_512B32()
const {
712 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
715 bool isVISrc_512B16()
const {
716 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
719 bool isVISrc_512V2B16()
const {
720 return isVISrc_512B16();
723 bool isVISrc_512F32()
const {
724 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
727 bool isVISrc_512F16()
const {
728 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
731 bool isVISrc_512V2F16()
const {
732 return isVISrc_512F16() || isVISrc_512B32();
735 bool isVISrc_1024B32()
const {
736 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
739 bool isVISrc_1024B16()
const {
740 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
743 bool isVISrc_1024V2B16()
const {
744 return isVISrc_1024B16();
747 bool isVISrc_1024F32()
const {
748 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
751 bool isVISrc_1024F16()
const {
752 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
755 bool isVISrc_1024V2F16()
const {
756 return isVISrc_1024F16() || isVISrc_1024B32();
759 bool isAISrcB32()
const {
760 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
763 bool isAISrcB16()
const {
764 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
767 bool isAISrcV2B16()
const {
771 bool isAISrcF32()
const {
772 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
775 bool isAISrcF16()
const {
776 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
779 bool isAISrcV2F16()
const {
780 return isAISrcF16() || isAISrcB32();
783 bool isAISrc_64B64()
const {
784 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
787 bool isAISrc_64F64()
const {
788 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
791 bool isAISrc_128B32()
const {
792 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
795 bool isAISrc_128B16()
const {
796 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
799 bool isAISrc_128V2B16()
const {
800 return isAISrc_128B16();
803 bool isAISrc_128F32()
const {
804 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
807 bool isAISrc_128F16()
const {
808 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
811 bool isAISrc_128V2F16()
const {
812 return isAISrc_128F16() || isAISrc_128B32();
815 bool isVISrc_128F16()
const {
816 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
819 bool isVISrc_128V2F16()
const {
820 return isVISrc_128F16() || isVISrc_128B32();
823 bool isAISrc_256B64()
const {
824 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
827 bool isAISrc_256F64()
const {
828 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
831 bool isAISrc_512B32()
const {
832 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
835 bool isAISrc_512B16()
const {
836 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
839 bool isAISrc_512V2B16()
const {
840 return isAISrc_512B16();
843 bool isAISrc_512F32()
const {
844 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
847 bool isAISrc_512F16()
const {
848 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
851 bool isAISrc_512V2F16()
const {
852 return isAISrc_512F16() || isAISrc_512B32();
855 bool isAISrc_1024B32()
const {
856 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
859 bool isAISrc_1024B16()
const {
860 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
863 bool isAISrc_1024V2B16()
const {
864 return isAISrc_1024B16();
867 bool isAISrc_1024F32()
const {
868 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
871 bool isAISrc_1024F16()
const {
872 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
875 bool isAISrc_1024V2F16()
const {
876 return isAISrc_1024F16() || isAISrc_1024B32();
879 bool isKImmFP32()
const {
880 return isLiteralImm(MVT::f32);
883 bool isKImmFP16()
const {
884 return isLiteralImm(MVT::f16);
887 bool isMem()
const override {
891 bool isExpr()
const {
895 bool isSOPPBrTarget()
const {
return isExpr() ||
isImm(); }
897 bool isSWaitCnt()
const;
898 bool isDepCtr()
const;
899 bool isSDelayALU()
const;
900 bool isHwreg()
const;
901 bool isSendMsg()
const;
902 bool isSwizzle()
const;
903 bool isSMRDOffset8()
const;
904 bool isSMEMOffset()
const;
905 bool isSMRDLiteralOffset()
const;
907 bool isDPPCtrl()
const;
911 bool isGPRIdxMode()
const;
912 bool isS16Imm()
const;
913 bool isU16Imm()
const;
914 bool isEndpgm()
const;
915 bool isWaitVDST()
const;
916 bool isWaitEXP()
const;
923 int64_t getImm()
const {
928 void setImm(int64_t Val) {
933 ImmTy getImmTy()
const {
938 unsigned getReg()
const override {
952 return SMRange(StartLoc, EndLoc);
955 Modifiers getModifiers()
const {
956 assert(isRegKind() || isImmTy(ImmTyNone));
957 return isRegKind() ?
Reg.Mods :
Imm.Mods;
960 void setModifiers(Modifiers Mods) {
961 assert(isRegKind() || isImmTy(ImmTyNone));
968 bool hasModifiers()
const {
969 return getModifiers().hasModifiers();
972 bool hasFPModifiers()
const {
973 return getModifiers().hasFPModifiers();
976 bool hasIntModifiers()
const {
977 return getModifiers().hasIntModifiers();
982 void addImmOperands(
MCInst &Inst,
unsigned N,
bool ApplyModifiers =
true)
const;
984 void addLiteralImmOperand(
MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const;
986 void addRegOperands(
MCInst &Inst,
unsigned N)
const;
988 void addRegOrImmOperands(
MCInst &Inst,
unsigned N)
const {
990 addRegOperands(Inst,
N);
992 addImmOperands(Inst,
N);
995 void addRegOrImmWithInputModsOperands(
MCInst &Inst,
unsigned N)
const {
996 Modifiers Mods = getModifiers();
999 addRegOperands(Inst,
N);
1001 addImmOperands(Inst,
N,
false);
1005 void addRegOrImmWithFPInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1006 assert(!hasIntModifiers());
1007 addRegOrImmWithInputModsOperands(Inst,
N);
1010 void addRegOrImmWithIntInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1011 assert(!hasFPModifiers());
1012 addRegOrImmWithInputModsOperands(Inst,
N);
1015 void addRegWithInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1016 Modifiers Mods = getModifiers();
1019 addRegOperands(Inst,
N);
1022 void addRegWithFPInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1023 assert(!hasIntModifiers());
1024 addRegWithInputModsOperands(Inst,
N);
1027 void addRegWithIntInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1028 assert(!hasFPModifiers());
1029 addRegWithInputModsOperands(Inst,
N);
1034 case ImmTyNone:
OS <<
"None";
break;
1035 case ImmTyGDS:
OS <<
"GDS";
break;
1036 case ImmTyLDS:
OS <<
"LDS";
break;
1037 case ImmTyOffen:
OS <<
"Offen";
break;
1038 case ImmTyIdxen:
OS <<
"Idxen";
break;
1039 case ImmTyAddr64:
OS <<
"Addr64";
break;
1040 case ImmTyOffset:
OS <<
"Offset";
break;
1041 case ImmTyInstOffset:
OS <<
"InstOffset";
break;
1042 case ImmTyOffset0:
OS <<
"Offset0";
break;
1043 case ImmTyOffset1:
OS <<
"Offset1";
break;
1044 case ImmTySMEMOffsetMod:
OS <<
"SMEMOffsetMod";
break;
1045 case ImmTyCPol:
OS <<
"CPol";
break;
1046 case ImmTyTFE:
OS <<
"TFE";
break;
1047 case ImmTyD16:
OS <<
"D16";
break;
1048 case ImmTyFORMAT:
OS <<
"FORMAT";
break;
1049 case ImmTyClampSI:
OS <<
"ClampSI";
break;
1050 case ImmTyOModSI:
OS <<
"OModSI";
break;
1051 case ImmTyDPP8:
OS <<
"DPP8";
break;
1052 case ImmTyDppCtrl:
OS <<
"DppCtrl";
break;
1053 case ImmTyDppRowMask:
OS <<
"DppRowMask";
break;
1054 case ImmTyDppBankMask:
OS <<
"DppBankMask";
break;
1055 case ImmTyDppBoundCtrl:
OS <<
"DppBoundCtrl";
break;
1056 case ImmTyDppFI:
OS <<
"DppFI";
break;
1057 case ImmTySDWADstSel:
OS <<
"SDWADstSel";
break;
1058 case ImmTySDWASrc0Sel:
OS <<
"SDWASrc0Sel";
break;
1059 case ImmTySDWASrc1Sel:
OS <<
"SDWASrc1Sel";
break;
1060 case ImmTySDWADstUnused:
OS <<
"SDWADstUnused";
break;
1061 case ImmTyDMask:
OS <<
"DMask";
break;
1062 case ImmTyDim:
OS <<
"Dim";
break;
1063 case ImmTyUNorm:
OS <<
"UNorm";
break;
1064 case ImmTyDA:
OS <<
"DA";
break;
1065 case ImmTyR128A16:
OS <<
"R128A16";
break;
1066 case ImmTyA16:
OS <<
"A16";
break;
1067 case ImmTyLWE:
OS <<
"LWE";
break;
1068 case ImmTyOff:
OS <<
"Off";
break;
1069 case ImmTyExpTgt:
OS <<
"ExpTgt";
break;
1070 case ImmTyExpCompr:
OS <<
"ExpCompr";
break;
1071 case ImmTyExpVM:
OS <<
"ExpVM";
break;
1072 case ImmTyHwreg:
OS <<
"Hwreg";
break;
1073 case ImmTySendMsg:
OS <<
"SendMsg";
break;
1074 case ImmTyInterpSlot:
OS <<
"InterpSlot";
break;
1075 case ImmTyInterpAttr:
OS <<
"InterpAttr";
break;
1076 case ImmTyInterpAttrChan:
OS <<
"InterpAttrChan";
break;
1077 case ImmTyOpSel:
OS <<
"OpSel";
break;
1078 case ImmTyOpSelHi:
OS <<
"OpSelHi";
break;
1079 case ImmTyNegLo:
OS <<
"NegLo";
break;
1080 case ImmTyNegHi:
OS <<
"NegHi";
break;
1081 case ImmTySwizzle:
OS <<
"Swizzle";
break;
1082 case ImmTyGprIdxMode:
OS <<
"GprIdxMode";
break;
1083 case ImmTyHigh:
OS <<
"High";
break;
1084 case ImmTyBLGP:
OS <<
"BLGP";
break;
1085 case ImmTyCBSZ:
OS <<
"CBSZ";
break;
1086 case ImmTyABID:
OS <<
"ABID";
break;
1087 case ImmTyEndpgm:
OS <<
"Endpgm";
break;
1088 case ImmTyWaitVDST:
OS <<
"WaitVDST";
break;
1089 case ImmTyWaitEXP:
OS <<
"WaitEXP";
break;
1096 OS <<
"<register " <<
getReg() <<
" mods: " <<
Reg.Mods <<
'>';
1099 OS <<
'<' << getImm();
1100 if (getImmTy() != ImmTyNone) {
1101 OS <<
" type: "; printImmTy(
OS, getImmTy());
1103 OS <<
" mods: " <<
Imm.Mods <<
'>';
1106 OS <<
'\'' << getToken() <<
'\'';
1109 OS <<
"<expr " << *Expr <<
'>';
1114 static AMDGPUOperand::Ptr CreateImm(
const AMDGPUAsmParser *AsmParser,
1115 int64_t Val,
SMLoc Loc,
1116 ImmTy
Type = ImmTyNone,
1117 bool IsFPImm =
false) {
1118 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1120 Op->Imm.IsFPImm = IsFPImm;
1121 Op->Imm.Kind = ImmKindTyNone;
1123 Op->Imm.Mods = Modifiers();
1129 static AMDGPUOperand::Ptr CreateToken(
const AMDGPUAsmParser *AsmParser,
1131 bool HasExplicitEncodingSize =
true) {
1132 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1133 Res->Tok.Data = Str.data();
1134 Res->Tok.Length = Str.size();
1135 Res->StartLoc = Loc;
1140 static AMDGPUOperand::Ptr CreateReg(
const AMDGPUAsmParser *AsmParser,
1141 unsigned RegNo,
SMLoc S,
1143 auto Op = std::make_unique<AMDGPUOperand>(
Register, AsmParser);
1144 Op->Reg.RegNo = RegNo;
1145 Op->Reg.Mods = Modifiers();
1151 static AMDGPUOperand::Ptr CreateExpr(
const AMDGPUAsmParser *AsmParser,
1153 auto Op = std::make_unique<AMDGPUOperand>(
Expression, AsmParser);
1162 OS <<
"abs:" << Mods.Abs <<
" neg: " << Mods.Neg <<
" sext:" << Mods.Sext;
1173class KernelScopeInfo {
1174 int SgprIndexUnusedMin = -1;
1175 int VgprIndexUnusedMin = -1;
1176 int AgprIndexUnusedMin = -1;
1180 void usesSgprAt(
int i) {
1181 if (i >= SgprIndexUnusedMin) {
1182 SgprIndexUnusedMin = ++i;
1191 void usesVgprAt(
int i) {
1192 if (i >= VgprIndexUnusedMin) {
1193 VgprIndexUnusedMin = ++i;
1198 VgprIndexUnusedMin);
1204 void usesAgprAt(
int i) {
1209 if (i >= AgprIndexUnusedMin) {
1210 AgprIndexUnusedMin = ++i;
1220 VgprIndexUnusedMin);
1227 KernelScopeInfo() =
default;
1233 usesSgprAt(SgprIndexUnusedMin = -1);
1234 usesVgprAt(VgprIndexUnusedMin = -1);
1236 usesAgprAt(AgprIndexUnusedMin = -1);
1240 void usesRegister(RegisterKind RegKind,
unsigned DwordRegIndex,
1241 unsigned RegWidth) {
1244 usesSgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1247 usesAgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1250 usesVgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1261 unsigned ForcedEncodingSize = 0;
1262 bool ForcedDPP =
false;
1263 bool ForcedSDWA =
false;
1264 KernelScopeInfo KernelScope;
1269#define GET_ASSEMBLER_HEADER
1270#include "AMDGPUGenAsmMatcher.inc"
1275 bool ParseAsAbsoluteExpression(
uint32_t &Ret);
1276 bool OutOfRangeError(
SMRange Range);
1292 bool calculateGPRBlocks(
const FeatureBitset &Features,
bool VCCUsed,
1293 bool FlatScrUsed,
bool XNACKUsed,
1294 std::optional<bool> EnableWavefrontSize32,
1295 unsigned NextFreeVGPR,
SMRange VGPRRange,
1296 unsigned NextFreeSGPR,
SMRange SGPRRange,
1297 unsigned &VGPRBlocks,
unsigned &SGPRBlocks);
1298 bool ParseDirectiveAMDGCNTarget();
1299 bool ParseDirectiveAMDHSAKernel();
1301 bool ParseDirectiveHSACodeObjectVersion();
1302 bool ParseDirectiveHSACodeObjectISA();
1304 bool ParseDirectiveAMDKernelCodeT();
1307 bool ParseDirectiveAMDGPUHsaKernel();
1309 bool ParseDirectiveISAVersion();
1310 bool ParseDirectiveHSAMetadata();
1311 bool ParseDirectivePALMetadataBegin();
1312 bool ParseDirectivePALMetadata();
1313 bool ParseDirectiveAMDGPULDS();
1317 bool ParseToEndDirective(
const char *AssemblerDirectiveBegin,
1318 const char *AssemblerDirectiveEnd,
1319 std::string &CollectString);
1321 bool AddNextRegisterToList(
unsigned& Reg,
unsigned& RegWidth,
1322 RegisterKind RegKind,
unsigned Reg1,
SMLoc Loc);
1323 bool ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
1324 unsigned &RegNum,
unsigned &RegWidth,
1325 bool RestoreOnFailure =
false);
1326 bool ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
1327 unsigned &RegNum,
unsigned &RegWidth,
1329 unsigned ParseRegularReg(RegisterKind &RegKind,
unsigned &RegNum,
1332 unsigned ParseSpecialReg(RegisterKind &RegKind,
unsigned &RegNum,
1335 unsigned ParseRegList(RegisterKind &RegKind,
unsigned &RegNum,
1337 bool ParseRegRange(
unsigned& Num,
unsigned& Width);
1338 unsigned getRegularReg(RegisterKind RegKind,
1345 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1346 void initializeGprCountSymbol(RegisterKind RegKind);
1347 bool updateGprCountSymbols(RegisterKind RegKind,
unsigned DwordRegIndex,
1353 enum AMDGPUMatchResultTy {
1354 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1357 OperandMode_Default,
1361 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1369 if (getFeatureBits().
none()) {
1401 initializeGprCountSymbol(IS_VGPR);
1402 initializeGprCountSymbol(IS_SGPR);
1471 bool hasInv2PiInlineImm()
const {
1472 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1475 bool hasFlatOffsets()
const {
1476 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1480 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1483 bool hasSGPR102_SGPR103()
const {
1487 bool hasSGPR104_SGPR105()
const {
return isGFX10Plus(); }
1489 bool hasIntClamp()
const {
1490 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1493 bool hasPartialNSAEncoding()
const {
1494 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1526 void setForcedEncodingSize(
unsigned Size) { ForcedEncodingSize =
Size; }
1527 void setForcedDPP(
bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1528 void setForcedSDWA(
bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1530 unsigned getForcedEncodingSize()
const {
return ForcedEncodingSize; }
1531 bool isForcedVOP3()
const {
return ForcedEncodingSize == 64; }
1532 bool isForcedDPP()
const {
return ForcedDPP; }
1533 bool isForcedSDWA()
const {
return ForcedSDWA; }
1535 StringRef getMatchedVariantName()
const;
1537 std::unique_ptr<AMDGPUOperand>
parseRegister(
bool RestoreOnFailure =
false);
1539 bool RestoreOnFailure);
1542 SMLoc &EndLoc)
override;
1545 unsigned Kind)
override;
1549 bool MatchingInlineAsm)
override;
1552 OperandMode Mode = OperandMode_Default);
1560 ParseStatus parseIntWithPrefix(
const char *Prefix, int64_t &
Int);
1564 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1565 std::function<
bool(int64_t &)> ConvertResult =
nullptr);
1569 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1570 bool (*ConvertResult)(int64_t &) =
nullptr);
1574 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1581 bool isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1582 bool isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1583 bool isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1584 bool isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1585 bool parseSP3NegModifier();
1590 bool AllowImm =
true);
1592 bool AllowImm =
true);
1603 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1608 bool tryParseFmt(
const char *Pref, int64_t MaxVal, int64_t &Val);
1609 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt,
StringRef FormatStr,
SMLoc Loc);
1613 bool parseCnt(int64_t &IntVal);
1616 bool parseDepCtr(int64_t &IntVal,
unsigned &Mask);
1620 bool parseDelay(int64_t &Delay);
1626 struct OperandInfoTy {
1629 bool IsSymbolic =
false;
1630 bool IsDefined =
false;
1632 OperandInfoTy(int64_t Id_) :
Id(Id_) {}
1635 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &
Op, OperandInfoTy &Stream);
1636 bool validateSendMsg(
const OperandInfoTy &Msg,
1637 const OperandInfoTy &
Op,
1638 const OperandInfoTy &Stream);
1640 bool parseHwregBody(OperandInfoTy &HwReg,
1642 OperandInfoTy &Width);
1643 bool validateHwreg(
const OperandInfoTy &HwReg,
1644 const OperandInfoTy &
Offset,
1645 const OperandInfoTy &Width);
1651 SMLoc getOperandLoc(std::function<
bool(
const AMDGPUOperand&)>
Test,
1656 bool SearchMandatoryLiterals =
false)
const;
1664 bool validateSOPLiteral(
const MCInst &Inst)
const;
1666 bool validateVOPDRegBankConstraints(
const MCInst &Inst,
1668 bool validateIntClampSupported(
const MCInst &Inst);
1669 bool validateMIMGAtomicDMask(
const MCInst &Inst);
1670 bool validateMIMGGatherDMask(
const MCInst &Inst);
1672 bool validateMIMGDataSize(
const MCInst &Inst,
const SMLoc &IDLoc);
1673 bool validateMIMGAddrSize(
const MCInst &Inst,
const SMLoc &IDLoc);
1674 bool validateMIMGD16(
const MCInst &Inst);
1675 bool validateMIMGMSAA(
const MCInst &Inst);
1676 bool validateOpSel(
const MCInst &Inst);
1678 bool validateVccOperand(
unsigned Reg)
const;
1683 bool validateAGPRLdSt(
const MCInst &Inst)
const;
1684 bool validateVGPRAlign(
const MCInst &Inst)
const;
1688 bool validateDivScale(
const MCInst &Inst);
1691 const SMLoc &IDLoc);
1694 std::optional<StringRef> validateLdsDirect(
const MCInst &Inst);
1695 unsigned getConstantBusLimit(
unsigned Opcode)
const;
1696 bool usesConstantBus(
const MCInst &Inst,
unsigned OpIdx);
1697 bool isInlineConstant(
const MCInst &Inst,
unsigned OpIdx)
const;
1698 unsigned findImplicitSGPRReadInVOP(
const MCInst &Inst)
const;
1724 AsmToken peekToken(
bool ShouldSkipSpace =
true);
1726 SMLoc getLoc()
const;
1741 bool parseSwizzleOperand(int64_t &
Op,
1742 const unsigned MinVal,
1743 const unsigned MaxVal,
1746 bool parseSwizzleOperands(
const unsigned OpNum, int64_t*
Op,
1747 const unsigned MinVal,
1748 const unsigned MaxVal,
1751 bool parseSwizzleOffset(int64_t &Imm);
1752 bool parseSwizzleMacro(int64_t &Imm);
1753 bool parseSwizzleQuadPerm(int64_t &Imm);
1754 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1755 bool parseSwizzleBroadcast(int64_t &Imm);
1756 bool parseSwizzleSwap(int64_t &Imm);
1757 bool parseSwizzleReverse(int64_t &Imm);
1760 int64_t parseGPRIdxMacro();
1768 OptionalImmIndexMap &OptionalIdx);
1774 OptionalImmIndexMap &OptionalIdx);
1776 OptionalImmIndexMap &OptionalIdx);
1782 bool parseDimId(
unsigned &Encoding);
1784 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1788 int64_t parseDPPCtrlSel(
StringRef Ctrl);
1789 int64_t parseDPPCtrlPerm();
1795 bool IsDPP8 =
false);
1801 AMDGPUOperand::ImmTy
Type);
1810 bool SkipDstVcc =
false,
1811 bool SkipSrcVcc =
false);
1824 return &APFloat::IEEEsingle();
1826 return &APFloat::IEEEdouble();
1828 return &APFloat::IEEEhalf();
1852 return &APFloat::IEEEsingle();
1858 return &APFloat::IEEEdouble();
1873 return &APFloat::IEEEhalf();
1888 APFloat::rmNearestTiesToEven,
1891 if (
Status != APFloat::opOK &&
1893 ((
Status & APFloat::opOverflow) != 0 ||
1894 (
Status & APFloat::opUnderflow) != 0)) {
1915bool AMDGPUOperand::isInlinableImm(
MVT type)
const {
1925 if (!isImmTy(ImmTyNone)) {
1936 if (type == MVT::f64 || type == MVT::i64) {
1938 AsmParser->hasInv2PiInlineImm());
1947 static_cast<int16_t
>(FPLiteral.bitcastToAPInt().getZExtValue()),
1948 type, AsmParser->hasInv2PiInlineImm());
1953 static_cast<int32_t
>(FPLiteral.bitcastToAPInt().getZExtValue()),
1954 AsmParser->hasInv2PiInlineImm());
1958 if (type == MVT::f64 || type == MVT::i64) {
1960 AsmParser->hasInv2PiInlineImm());
1969 static_cast<int16_t
>(
Literal.getLoBits(16).getSExtValue()),
1970 type, AsmParser->hasInv2PiInlineImm());
1974 static_cast<int32_t
>(
Literal.getLoBits(32).getZExtValue()),
1975 AsmParser->hasInv2PiInlineImm());
1978bool AMDGPUOperand::isLiteralImm(
MVT type)
const {
1980 if (!isImmTy(ImmTyNone)) {
1987 if (type == MVT::f64 && hasFPModifiers()) {
2004 if (type == MVT::f64) {
2009 if (type == MVT::i64) {
2018 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
2019 (type == MVT::v2i16)? MVT::i16 :
2020 (type == MVT::v2f32)? MVT::f32 : type;
2026bool AMDGPUOperand::isRegClass(
unsigned RCID)
const {
2027 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(
getReg());
2030bool AMDGPUOperand::isVRegWithInputMods()
const {
2031 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2033 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2034 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2037bool AMDGPUOperand::isT16VRegWithInputMods()
const {
2038 return isRegClass(AMDGPU::VGPR_32_Lo128RegClassID);
2041bool AMDGPUOperand::isSDWAOperand(
MVT type)
const {
2042 if (AsmParser->isVI())
2044 else if (AsmParser->isGFX9Plus())
2045 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2050bool AMDGPUOperand::isSDWAFP16Operand()
const {
2051 return isSDWAOperand(MVT::f16);
2054bool AMDGPUOperand::isSDWAFP32Operand()
const {
2055 return isSDWAOperand(MVT::f32);
2058bool AMDGPUOperand::isSDWAInt16Operand()
const {
2059 return isSDWAOperand(MVT::i16);
2062bool AMDGPUOperand::isSDWAInt32Operand()
const {
2063 return isSDWAOperand(MVT::i32);
2066bool AMDGPUOperand::isBoolReg()
const {
2067 auto FB = AsmParser->getFeatureBits();
2068 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2069 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2074 assert(isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2089void AMDGPUOperand::addImmOperands(
MCInst &Inst,
unsigned N,
bool ApplyModifiers)
const {
2097 addLiteralImmOperand(Inst,
Imm.Val,
2099 isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2101 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2107void AMDGPUOperand::addLiteralImmOperand(
MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const {
2108 const auto& InstDesc = AsmParser->getMII()->get(Inst.
getOpcode());
2113 if (ApplyModifiers) {
2116 Val = applyInputFPModifiers(Val,
Size);
2120 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2130 AsmParser->hasInv2PiInlineImm())) {
2139 if (
Literal.getLoBits(32) != 0) {
2140 const_cast<AMDGPUAsmParser *
>(AsmParser)->
Warning(Inst.
getLoc(),
2141 "Can't encode literal as exact 64-bit floating-point operand. "
2142 "Low 32-bits will be set to zero");
2146 setImmKindLiteral();
2185 APFloat::rmNearestTiesToEven, &lost);
2189 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2192 setImmKindMandatoryLiteral();
2194 setImmKindLiteral();
2223 AsmParser->hasInv2PiInlineImm())) {
2230 setImmKindLiteral();
2245 setImmKindLiteral();
2257 AsmParser->hasInv2PiInlineImm())) {
2264 setImmKindLiteral();
2273 AsmParser->hasInv2PiInlineImm()));
2280 setImmKindMandatoryLiteral();
2284 setImmKindMandatoryLiteral();
2291void AMDGPUOperand::addRegOperands(
MCInst &Inst,
unsigned N)
const {
2295bool AMDGPUOperand::isInlineValue()
const {
2304 if (Is == IS_VGPR) {
2308 return AMDGPU::VGPR_32RegClassID;
2310 return AMDGPU::VReg_64RegClassID;
2312 return AMDGPU::VReg_96RegClassID;
2314 return AMDGPU::VReg_128RegClassID;
2316 return AMDGPU::VReg_160RegClassID;
2318 return AMDGPU::VReg_192RegClassID;
2320 return AMDGPU::VReg_224RegClassID;
2322 return AMDGPU::VReg_256RegClassID;
2324 return AMDGPU::VReg_288RegClassID;
2326 return AMDGPU::VReg_320RegClassID;
2328 return AMDGPU::VReg_352RegClassID;
2330 return AMDGPU::VReg_384RegClassID;
2332 return AMDGPU::VReg_512RegClassID;
2334 return AMDGPU::VReg_1024RegClassID;
2336 }
else if (Is == IS_TTMP) {
2340 return AMDGPU::TTMP_32RegClassID;
2342 return AMDGPU::TTMP_64RegClassID;
2344 return AMDGPU::TTMP_128RegClassID;
2346 return AMDGPU::TTMP_256RegClassID;
2348 return AMDGPU::TTMP_512RegClassID;
2350 }
else if (Is == IS_SGPR) {
2354 return AMDGPU::SGPR_32RegClassID;
2356 return AMDGPU::SGPR_64RegClassID;
2358 return AMDGPU::SGPR_96RegClassID;
2360 return AMDGPU::SGPR_128RegClassID;
2362 return AMDGPU::SGPR_160RegClassID;
2364 return AMDGPU::SGPR_192RegClassID;
2366 return AMDGPU::SGPR_224RegClassID;
2368 return AMDGPU::SGPR_256RegClassID;
2370 return AMDGPU::SGPR_288RegClassID;
2372 return AMDGPU::SGPR_320RegClassID;
2374 return AMDGPU::SGPR_352RegClassID;
2376 return AMDGPU::SGPR_384RegClassID;
2378 return AMDGPU::SGPR_512RegClassID;
2380 }
else if (Is == IS_AGPR) {
2384 return AMDGPU::AGPR_32RegClassID;
2386 return AMDGPU::AReg_64RegClassID;
2388 return AMDGPU::AReg_96RegClassID;
2390 return AMDGPU::AReg_128RegClassID;
2392 return AMDGPU::AReg_160RegClassID;
2394 return AMDGPU::AReg_192RegClassID;
2396 return AMDGPU::AReg_224RegClassID;
2398 return AMDGPU::AReg_256RegClassID;
2400 return AMDGPU::AReg_288RegClassID;
2402 return AMDGPU::AReg_320RegClassID;
2404 return AMDGPU::AReg_352RegClassID;
2406 return AMDGPU::AReg_384RegClassID;
2408 return AMDGPU::AReg_512RegClassID;
2410 return AMDGPU::AReg_1024RegClassID;
2418 .
Case(
"exec", AMDGPU::EXEC)
2419 .
Case(
"vcc", AMDGPU::VCC)
2420 .
Case(
"flat_scratch", AMDGPU::FLAT_SCR)
2421 .
Case(
"xnack_mask", AMDGPU::XNACK_MASK)
2422 .
Case(
"shared_base", AMDGPU::SRC_SHARED_BASE)
2423 .
Case(
"src_shared_base", AMDGPU::SRC_SHARED_BASE)
2424 .
Case(
"shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2425 .
Case(
"src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2426 .
Case(
"private_base", AMDGPU::SRC_PRIVATE_BASE)
2427 .
Case(
"src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2428 .
Case(
"private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2429 .
Case(
"src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2430 .
Case(
"pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2431 .
Case(
"src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2432 .
Case(
"lds_direct", AMDGPU::LDS_DIRECT)
2433 .
Case(
"src_lds_direct", AMDGPU::LDS_DIRECT)
2434 .
Case(
"m0", AMDGPU::M0)
2435 .
Case(
"vccz", AMDGPU::SRC_VCCZ)
2436 .
Case(
"src_vccz", AMDGPU::SRC_VCCZ)
2437 .
Case(
"execz", AMDGPU::SRC_EXECZ)
2438 .
Case(
"src_execz", AMDGPU::SRC_EXECZ)
2439 .
Case(
"scc", AMDGPU::SRC_SCC)
2440 .
Case(
"src_scc", AMDGPU::SRC_SCC)
2441 .
Case(
"tba", AMDGPU::TBA)
2442 .
Case(
"tma", AMDGPU::TMA)
2443 .
Case(
"flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2444 .
Case(
"flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2445 .
Case(
"xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2446 .
Case(
"xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2447 .
Case(
"vcc_lo", AMDGPU::VCC_LO)
2448 .
Case(
"vcc_hi", AMDGPU::VCC_HI)
2449 .
Case(
"exec_lo", AMDGPU::EXEC_LO)
2450 .
Case(
"exec_hi", AMDGPU::EXEC_HI)
2451 .
Case(
"tma_lo", AMDGPU::TMA_LO)
2452 .
Case(
"tma_hi", AMDGPU::TMA_HI)
2453 .
Case(
"tba_lo", AMDGPU::TBA_LO)
2454 .
Case(
"tba_hi", AMDGPU::TBA_HI)
2455 .
Case(
"pc", AMDGPU::PC_REG)
2456 .
Case(
"null", AMDGPU::SGPR_NULL)
2460bool AMDGPUAsmParser::ParseRegister(
MCRegister &RegNo,
SMLoc &StartLoc,
2461 SMLoc &EndLoc,
bool RestoreOnFailure) {
2462 auto R = parseRegister();
2463 if (!R)
return true;
2465 RegNo =
R->getReg();
2466 StartLoc =
R->getStartLoc();
2467 EndLoc =
R->getEndLoc();
2473 return ParseRegister(Reg, StartLoc, EndLoc,
false);
2478 bool Result = ParseRegister(Reg, StartLoc, EndLoc,
true);
2479 bool PendingErrors = getParser().hasPendingError();
2480 getParser().clearPendingErrors();
2488bool AMDGPUAsmParser::AddNextRegisterToList(
unsigned &Reg,
unsigned &RegWidth,
2489 RegisterKind RegKind,
unsigned Reg1,
2493 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2498 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2499 Reg = AMDGPU::FLAT_SCR;
2503 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2504 Reg = AMDGPU::XNACK_MASK;
2508 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2513 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2518 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2523 Error(Loc,
"register does not fit in the list");
2529 if (Reg1 != Reg + RegWidth / 32) {
2530 Error(Loc,
"registers in a list must have consecutive indices");
2548 {{
"ttmp"}, IS_TTMP},
2554 return Kind == IS_VGPR ||
2562 if (Str.startswith(Reg.Name))
2568 return !Str.getAsInteger(10, Num);
2572AMDGPUAsmParser::isRegister(
const AsmToken &Token,
2589 if (!RegSuffix.
empty()) {
2605AMDGPUAsmParser::isRegister()
2607 return isRegister(getToken(), peekToken());
2611AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2618 unsigned AlignSize = 1;
2619 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2622 AlignSize = std::min(RegWidth / 32, 4u);
2625 if (RegNum % AlignSize != 0) {
2626 Error(Loc,
"invalid register alignment");
2627 return AMDGPU::NoRegister;
2630 unsigned RegIdx = RegNum / AlignSize;
2633 Error(Loc,
"invalid or unsupported register size");
2634 return AMDGPU::NoRegister;
2640 Error(Loc,
"register index is out of range");
2641 return AMDGPU::NoRegister;
2647bool AMDGPUAsmParser::ParseRegRange(
unsigned &Num,
unsigned &RegWidth) {
2648 int64_t RegLo, RegHi;
2652 SMLoc FirstIdxLoc = getLoc();
2655 if (!parseExpr(RegLo))
2659 SecondIdxLoc = getLoc();
2660 if (!parseExpr(RegHi))
2669 if (!isUInt<32>(RegLo)) {
2670 Error(FirstIdxLoc,
"invalid register index");
2674 if (!isUInt<32>(RegHi)) {
2675 Error(SecondIdxLoc,
"invalid register index");
2679 if (RegLo > RegHi) {
2680 Error(FirstIdxLoc,
"first register index should not exceed second index");
2684 Num =
static_cast<unsigned>(RegLo);
2685 RegWidth = 32 * ((RegHi - RegLo) + 1);
2689unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2690 unsigned &RegNum,
unsigned &RegWidth,
2697 RegKind = IS_SPECIAL;
2704unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2705 unsigned &RegNum,
unsigned &RegWidth,
2709 auto Loc = getLoc();
2713 Error(Loc,
"invalid register name");
2714 return AMDGPU::NoRegister;
2722 if (!RegSuffix.
empty()) {
2725 Error(Loc,
"invalid register index");
2726 return AMDGPU::NoRegister;
2731 if (!ParseRegRange(RegNum, RegWidth))
2732 return AMDGPU::NoRegister;
2735 return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2738unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
unsigned &RegNum,
2741 unsigned Reg = AMDGPU::NoRegister;
2742 auto ListLoc = getLoc();
2745 "expected a register or a list of registers")) {
2746 return AMDGPU::NoRegister;
2751 auto Loc = getLoc();
2752 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2753 return AMDGPU::NoRegister;
2754 if (RegWidth != 32) {
2755 Error(Loc,
"expected a single 32-bit register");
2756 return AMDGPU::NoRegister;
2760 RegisterKind NextRegKind;
2761 unsigned NextReg, NextRegNum, NextRegWidth;
2764 if (!ParseAMDGPURegister(NextRegKind, NextReg,
2765 NextRegNum, NextRegWidth,
2767 return AMDGPU::NoRegister;
2769 if (NextRegWidth != 32) {
2770 Error(Loc,
"expected a single 32-bit register");
2771 return AMDGPU::NoRegister;
2773 if (NextRegKind != RegKind) {
2774 Error(Loc,
"registers in a list must be of the same kind");
2775 return AMDGPU::NoRegister;
2777 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2778 return AMDGPU::NoRegister;
2782 "expected a comma or a closing square bracket")) {
2783 return AMDGPU::NoRegister;
2787 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2792bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
2793 unsigned &RegNum,
unsigned &RegWidth,
2795 auto Loc = getLoc();
2796 Reg = AMDGPU::NoRegister;
2799 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2800 if (Reg == AMDGPU::NoRegister)
2801 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2803 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2807 if (Reg == AMDGPU::NoRegister) {
2812 if (!subtargetHasRegister(*
TRI, Reg)) {
2813 if (Reg == AMDGPU::SGPR_NULL) {
2814 Error(Loc,
"'null' operand is not supported on this GPU");
2816 Error(Loc,
"register not available on this GPU");
2824bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
2825 unsigned &RegNum,
unsigned &RegWidth,
2826 bool RestoreOnFailure ) {
2827 Reg = AMDGPU::NoRegister;
2830 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2831 if (RestoreOnFailure) {
2832 while (!Tokens.
empty()) {
2841std::optional<StringRef>
2842AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2845 return StringRef(
".amdgcn.next_free_vgpr");
2847 return StringRef(
".amdgcn.next_free_sgpr");
2849 return std::nullopt;
2853void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2854 auto SymbolName = getGprCountSymbolName(RegKind);
2855 assert(SymbolName &&
"initializing invalid register kind");
2856 MCSymbol *
Sym = getContext().getOrCreateSymbol(*SymbolName);
2860bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2861 unsigned DwordRegIndex,
2862 unsigned RegWidth) {
2867 auto SymbolName = getGprCountSymbolName(RegKind);
2870 MCSymbol *
Sym = getContext().getOrCreateSymbol(*SymbolName);
2872 int64_t NewMax = DwordRegIndex +
divideCeil(RegWidth, 32) - 1;
2875 if (!
Sym->isVariable())
2876 return !
Error(getLoc(),
2877 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2878 if (!
Sym->getVariableValue(
false)->evaluateAsAbsolute(OldCount))
2881 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2883 if (OldCount <= NewMax)
2889std::unique_ptr<AMDGPUOperand>
2890AMDGPUAsmParser::parseRegister(
bool RestoreOnFailure) {
2891 const auto &Tok = getToken();
2892 SMLoc StartLoc = Tok.getLoc();
2893 SMLoc EndLoc = Tok.getEndLoc();
2894 RegisterKind RegKind;
2895 unsigned Reg, RegNum, RegWidth;
2897 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2901 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2904 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2905 return AMDGPUOperand::CreateReg(
this, Reg, StartLoc, EndLoc);
2909 bool HasSP3AbsModifier) {
2916 const auto& Tok = getToken();
2917 const auto& NextTok = peekToken();
2920 bool Negate =
false;
2936 APFloat RealVal(APFloat::IEEEdouble());
2937 auto roundMode = APFloat::rmNearestTiesToEven;
2938 if (
errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
2941 RealVal.changeSign();
2944 AMDGPUOperand::CreateImm(
this, RealVal.bitcastToAPInt().getZExtValue(), S,
2945 AMDGPUOperand::ImmTyNone,
true));
2954 if (HasSP3AbsModifier) {
2963 if (getParser().parsePrimaryExpr(Expr, EndLoc,
nullptr))
2970 if (Expr->evaluateAsAbsolute(IntVal)) {
2971 Operands.push_back(AMDGPUOperand::CreateImm(
this, IntVal, S));
2973 Operands.push_back(AMDGPUOperand::CreateExpr(
this, Expr, S));
2986 if (
auto R = parseRegister()) {
2995 bool HasSP3AbsMod) {
3001 return parseImm(
Operands, HasSP3AbsMod);
3005AMDGPUAsmParser::isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3008 return str ==
"abs" || str ==
"neg" || str ==
"sext";
3014AMDGPUAsmParser::isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3019AMDGPUAsmParser::isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3020 return isNamedOperandModifier(Token, NextToken) || Token.
is(
AsmToken::Pipe);
3024AMDGPUAsmParser::isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3025 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3042AMDGPUAsmParser::isModifier() {
3046 peekTokens(NextToken);
3048 return isOperandModifier(Tok, NextToken[0]) ||
3049 (Tok.
is(
AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3050 isOpcodeModifierWithVal(Tok, NextToken[0]);
3076AMDGPUAsmParser::parseSP3NegModifier() {
3079 peekTokens(NextToken);
3082 (isRegister(NextToken[0], NextToken[1]) ||
3084 isId(NextToken[0],
"abs"))) {
3101 return Error(getLoc(),
"invalid syntax, expected 'neg' modifier");
3103 SP3Neg = parseSP3NegModifier();
3106 Neg = trySkipId(
"neg");
3108 return Error(Loc,
"expected register or immediate");
3112 Abs = trySkipId(
"abs");
3119 return Error(Loc,
"expected register or immediate");
3123 Res = parseRegOrImm(
Operands, SP3Abs);
3130 if (SP3Abs && !skipToken(
AsmToken::Pipe,
"expected vertical bar"))
3137 AMDGPUOperand::Modifiers Mods;
3138 Mods.Abs = Abs || SP3Abs;
3139 Mods.Neg = Neg || SP3Neg;
3141 if (Mods.hasFPModifiers()) {
3142 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3144 return Error(
Op.getStartLoc(),
"expected an absolute expression");
3145 Op.setModifiers(Mods);
3153 bool Sext = trySkipId(
"sext");
3154 if (Sext && !skipToken(
AsmToken::LParen,
"expected left paren after sext"))
3169 AMDGPUOperand::Modifiers Mods;
3172 if (Mods.hasIntModifiers()) {
3173 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3175 return Error(
Op.getStartLoc(),
"expected an absolute expression");
3176 Op.setModifiers(Mods);
3183 return parseRegOrImmWithFPInputMods(
Operands,
false);
3187 return parseRegOrImmWithIntInputMods(
Operands,
false);
3191 auto Loc = getLoc();
3192 if (trySkipId(
"off")) {
3193 Operands.push_back(AMDGPUOperand::CreateImm(
this, 0, Loc,
3194 AMDGPUOperand::ImmTyOff,
false));
3201 std::unique_ptr<AMDGPUOperand>
Reg = parseRegister();
3203 Operands.push_back(std::move(Reg));
3210unsigned AMDGPUAsmParser::checkTargetMatchPredicate(
MCInst &Inst) {
3217 return Match_InvalidOperand;
3221 getForcedEncodingSize() != 64)
3222 return Match_PreferE32;
3224 if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3225 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3230 if (!
Op.isImm() ||
Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3231 return Match_InvalidOperand;
3235 return Match_Success;
3239 static const unsigned Variants[] = {
3250 if (isForcedDPP() && isForcedVOP3()) {
3254 if (getForcedEncodingSize() == 32) {
3259 if (isForcedVOP3()) {
3264 if (isForcedSDWA()) {
3270 if (isForcedDPP()) {
3278StringRef AMDGPUAsmParser::getMatchedVariantName()
const {
3279 if (isForcedDPP() && isForcedVOP3())
3282 if (getForcedEncodingSize() == 32)
3297unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(
const MCInst &Inst)
const {
3301 case AMDGPU::FLAT_SCR:
3303 case AMDGPU::VCC_LO:
3304 case AMDGPU::VCC_HI:
3311 return AMDGPU::NoRegister;
3318bool AMDGPUAsmParser::isInlineConstant(
const MCInst &Inst,
3319 unsigned OpIdx)
const {
3329 int64_t Val = MO.
getImm();
3361unsigned AMDGPUAsmParser::getConstantBusLimit(
unsigned Opcode)
const {
3367 case AMDGPU::V_LSHLREV_B64_e64:
3368 case AMDGPU::V_LSHLREV_B64_gfx10:
3369 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3370 case AMDGPU::V_LSHRREV_B64_e64:
3371 case AMDGPU::V_LSHRREV_B64_gfx10:
3372 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3373 case AMDGPU::V_ASHRREV_I64_e64:
3374 case AMDGPU::V_ASHRREV_I64_gfx10:
3375 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3376 case AMDGPU::V_LSHL_B64_e64:
3377 case AMDGPU::V_LSHR_B64_e64:
3378 case AMDGPU::V_ASHR_I64_e64:
3391 bool AddMandatoryLiterals =
false) {
3397 int16_t ImmDeferredIdx =
3414bool AMDGPUAsmParser::usesConstantBus(
const MCInst &Inst,
unsigned OpIdx) {
3417 return !isInlineConstant(Inst, OpIdx);
3418 }
else if (MO.
isReg()) {
3422 return isSGPR(PReg,
TRI) && PReg != SGPR_NULL;
3428bool AMDGPUAsmParser::validateConstantBusLimitations(
3430 const unsigned Opcode = Inst.
getOpcode();
3432 unsigned LastSGPR = AMDGPU::NoRegister;
3433 unsigned ConstantBusUseCount = 0;
3434 unsigned NumLiterals = 0;
3435 unsigned LiteralSize;
3437 if (!(
Desc.TSFlags &
3450 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3451 if (SGPRUsed != AMDGPU::NoRegister) {
3452 SGPRsUsed.
insert(SGPRUsed);
3453 ++ConstantBusUseCount;
3458 for (
int OpIdx : OpIndices) {
3463 if (usesConstantBus(Inst, OpIdx)) {
3472 if (SGPRsUsed.
insert(LastSGPR).second) {
3473 ++ConstantBusUseCount;
3493 if (NumLiterals == 0) {
3496 }
else if (LiteralSize !=
Size) {
3502 ConstantBusUseCount += NumLiterals;
3504 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3510 Error(Loc,
"invalid operand (violates constant bus restrictions)");
3514bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3517 const unsigned Opcode = Inst.
getOpcode();
3523 auto getVRegIdx = [&](
unsigned,
unsigned OperandIdx) {
3531 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(getVRegIdx);
3532 if (!InvalidCompOprIdx)
3535 auto CompOprIdx = *InvalidCompOprIdx;
3537 std::max(InstInfo[
VOPD::X].getIndexInParsedOperands(CompOprIdx),
3538 InstInfo[
VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3541 auto Loc = ((AMDGPUOperand &)*
Operands[ParsedIdx]).getStartLoc();
3542 if (CompOprIdx == VOPD::Component::DST) {
3543 Error(Loc,
"one dst register must be even and the other odd");
3545 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3547 " operands must use different VGPR banks");
3553bool AMDGPUAsmParser::validateIntClampSupported(
const MCInst &Inst) {
3567bool AMDGPUAsmParser::validateMIMGDataSize(
const MCInst &Inst,
3568 const SMLoc &IDLoc) {
3586 unsigned TFESize = (TFEIdx != -1 && Inst.
getOperand(TFEIdx).
getImm()) ? 1 : 0;
3591 bool IsPackedD16 =
false;
3596 IsPackedD16 = D16Idx >= 0;
3598 DataSize = (DataSize + 1) / 2;
3601 if ((VDataSize / 4) == DataSize + TFESize)
3606 Modifiers = IsPackedD16 ?
"dmask and d16" :
"dmask";
3608 Modifiers = IsPackedD16 ?
"dmask, d16 and tfe" :
"dmask and tfe";
3610 Error(IDLoc,
Twine(
"image data size does not match ") + Modifiers);
3614bool AMDGPUAsmParser::validateMIMGAddrSize(
const MCInst &Inst,
3615 const SMLoc &IDLoc) {
3633 assert(SrsrcIdx > VAddr0Idx);
3636 if (BaseOpcode->
BVH) {
3637 if (IsA16 == BaseOpcode->
A16)
3639 Error(IDLoc,
"image address size does not match a16");
3645 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3646 unsigned ActualAddrSize =
3647 IsNSA ? SrsrcIdx - VAddr0Idx
3650 unsigned ExpectedAddrSize =
3654 if (hasPartialNSAEncoding() && ExpectedAddrSize >
getNSAMaxSize()) {
3655 int VAddrLastIdx = SrsrcIdx - 1;
3656 unsigned VAddrLastSize =
3659 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3662 if (ExpectedAddrSize > 12)
3663 ExpectedAddrSize = 16;
3668 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3672 if (ActualAddrSize == ExpectedAddrSize)
3675 Error(IDLoc,
"image address size does not match dim and a16");
3679bool AMDGPUAsmParser::validateMIMGAtomicDMask(
const MCInst &Inst) {
3686 if (!
Desc.mayLoad() || !
Desc.mayStore())
3696 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3699bool AMDGPUAsmParser::validateMIMGGatherDMask(
const MCInst &Inst) {
3715 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3718bool AMDGPUAsmParser::validateMIMGMSAA(
const MCInst &Inst) {
3729 if (!BaseOpcode->
MSAA)
3738 return DimInfo->
MSAA;
3744 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3745 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3746 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3756bool AMDGPUAsmParser::validateMovrels(
const MCInst &Inst,
3780 Error(ErrLoc,
"source operand must be a VGPR");
3784bool AMDGPUAsmParser::validateMAIAccWrite(
const MCInst &Inst,
3789 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3803 "source operand must be either a VGPR or an inline constant");
3810bool AMDGPUAsmParser::validateMAISrc2(
const MCInst &Inst,
3816 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
3823 if (Inst.
getOperand(Src2Idx).
isImm() && isInlineConstant(Inst, Src2Idx)) {
3825 "inline constants are not allowed for this operand");
3832bool AMDGPUAsmParser::validateMFMA(
const MCInst &Inst,
3850 if (Src2Reg == DstReg)
3854 if (
TRI->getRegClass(
Desc.operands()[0].RegClass).getSizeInBits() <= 128)
3857 if (
TRI->regsOverlap(Src2Reg, DstReg)) {
3859 "source 2 operand must not partially overlap with dst");
3866bool AMDGPUAsmParser::validateDivScale(
const MCInst &Inst) {
3870 case V_DIV_SCALE_F32_gfx6_gfx7:
3871 case V_DIV_SCALE_F32_vi:
3872 case V_DIV_SCALE_F32_gfx10:
3873 case V_DIV_SCALE_F64_gfx6_gfx7:
3874 case V_DIV_SCALE_F64_vi:
3875 case V_DIV_SCALE_F64_gfx10:
3881 for (
auto Name : {AMDGPU::OpName::src0_modifiers,
3882 AMDGPU::OpName::src2_modifiers,
3883 AMDGPU::OpName::src2_modifiers}) {
3894bool AMDGPUAsmParser::validateMIMGD16(
const MCInst &Inst) {
3914 case AMDGPU::V_SUBREV_F32_e32:
3915 case AMDGPU::V_SUBREV_F32_e64:
3916 case AMDGPU::V_SUBREV_F32_e32_gfx10:
3917 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3918 case AMDGPU::V_SUBREV_F32_e32_vi:
3919 case AMDGPU::V_SUBREV_F32_e64_gfx10:
3920 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3921 case AMDGPU::V_SUBREV_F32_e64_vi:
3923 case AMDGPU::V_SUBREV_CO_U32_e32:
3924 case AMDGPU::V_SUBREV_CO_U32_e64:
3925 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3926 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3928 case AMDGPU::V_SUBBREV_U32_e32:
3929 case AMDGPU::V_SUBBREV_U32_e64:
3930 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3931 case AMDGPU::V_SUBBREV_U32_e32_vi:
3932 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3933 case AMDGPU::V_SUBBREV_U32_e64_vi:
3935 case AMDGPU::V_SUBREV_U32_e32:
3936 case AMDGPU::V_SUBREV_U32_e64:
3937 case AMDGPU::V_SUBREV_U32_e32_gfx9:
3938 case AMDGPU::V_SUBREV_U32_e32_vi:
3939 case AMDGPU::V_SUBREV_U32_e64_gfx9:
3940 case AMDGPU::V_SUBREV_U32_e64_vi:
3942 case AMDGPU::V_SUBREV_F16_e32:
3943 case AMDGPU::V_SUBREV_F16_e64:
3944 case AMDGPU::V_SUBREV_F16_e32_gfx10:
3945 case AMDGPU::V_SUBREV_F16_e32_vi:
3946 case AMDGPU::V_SUBREV_F16_e64_gfx10:
3947 case AMDGPU::V_SUBREV_F16_e64_vi:
3949 case AMDGPU::V_SUBREV_U16_e32:
3950 case AMDGPU::V_SUBREV_U16_e64:
3951 case AMDGPU::V_SUBREV_U16_e32_vi:
3952 case AMDGPU::V_SUBREV_U16_e64_vi:
3954 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3955 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3956 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3958 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3959 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3961 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3962 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3964 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3965 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3967 case AMDGPU::V_LSHRREV_B32_e32:
3968 case AMDGPU::V_LSHRREV_B32_e64:
3969 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3970 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3971 case AMDGPU::V_LSHRREV_B32_e32_vi:
3972 case AMDGPU::V_LSHRREV_B32_e64_vi:
3973 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3974 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3976 case AMDGPU::V_ASHRREV_I32_e32:
3977 case AMDGPU::V_ASHRREV_I32_e64:
3978 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3979 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3980 case AMDGPU::V_ASHRREV_I32_e32_vi:
3981 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3982 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3983 case AMDGPU::V_ASHRREV_I32_e64_vi:
3985 case AMDGPU::V_LSHLREV_B32_e32:
3986 case AMDGPU::V_LSHLREV_B32_e64:
3987 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3988 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3989 case AMDGPU::V_LSHLREV_B32_e32_vi:
3990 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3991 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3992 case AMDGPU::V_LSHLREV_B32_e64_vi:
3994 case AMDGPU::V_LSHLREV_B16_e32:
3995 case AMDGPU::V_LSHLREV_B16_e64:
3996 case AMDGPU::V_LSHLREV_B16_e32_vi:
3997 case AMDGPU::V_LSHLREV_B16_e64_vi:
3998 case AMDGPU::V_LSHLREV_B16_gfx10:
4000 case AMDGPU::V_LSHRREV_B16_e32:
4001 case AMDGPU::V_LSHRREV_B16_e64:
4002 case AMDGPU::V_LSHRREV_B16_e32_vi:
4003 case AMDGPU::V_LSHRREV_B16_e64_vi:
4004 case AMDGPU::V_LSHRREV_B16_gfx10:
4006 case AMDGPU::V_ASHRREV_I16_e32:
4007 case AMDGPU::V_ASHRREV_I16_e64:
4008 case AMDGPU::V_ASHRREV_I16_e32_vi:
4009 case AMDGPU::V_ASHRREV_I16_e64_vi:
4010 case AMDGPU::V_ASHRREV_I16_gfx10:
4012 case AMDGPU::V_LSHLREV_B64_e64:
4013 case AMDGPU::V_LSHLREV_B64_gfx10:
4014 case AMDGPU::V_LSHLREV_B64_vi:
4016 case AMDGPU::V_LSHRREV_B64_e64:
4017 case AMDGPU::V_LSHRREV_B64_gfx10:
4018 case AMDGPU::V_LSHRREV_B64_vi:
4020 case AMDGPU::V_ASHRREV_I64_e64:
4021 case AMDGPU::V_ASHRREV_I64_gfx10:
4022 case AMDGPU::V_ASHRREV_I64_vi:
4024 case AMDGPU::V_PK_LSHLREV_B16:
4025 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4026 case AMDGPU::V_PK_LSHLREV_B16_vi:
4028 case AMDGPU::V_PK_LSHRREV_B16:
4029 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4030 case AMDGPU::V_PK_LSHRREV_B16_vi:
4031 case AMDGPU::V_PK_ASHRREV_I16:
4032 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4033 case AMDGPU::V_PK_ASHRREV_I16_vi:
4040std::optional<StringRef>
4041AMDGPUAsmParser::validateLdsDirect(
const MCInst &Inst) {
4043 using namespace SIInstrFlags;
4044 const unsigned Opcode = Inst.
getOpcode();
4050 if ((
Desc.TSFlags & Enc) == 0)
4051 return std::nullopt;
4053 for (
auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4058 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4061 return StringRef(
"lds_direct is not supported on this GPU");
4064 return StringRef(
"lds_direct cannot be used with this instruction");
4066 if (SrcName != OpName::src0)
4067 return StringRef(
"lds_direct may be used as src0 only");
4071 return std::nullopt;
4075 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
4076 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4077 if (
Op.isFlatOffset())
4078 return Op.getStartLoc();
4083bool AMDGPUAsmParser::validateFlatOffset(
const MCInst &Inst,
4094 if (!hasFlatOffsets() &&
Op.getImm() != 0) {
4096 "flat offset modifier is not supported on this GPU");
4103 bool AllowNegative =
4105 if (!
isIntN(OffsetSize,
Op.getImm()) || (!AllowNegative &&
Op.getImm() < 0)) {
4107 Twine(
"expected a ") +
4108 (AllowNegative ?
Twine(OffsetSize) +
"-bit signed offset"
4109 :
Twine(OffsetSize - 1) +
"-bit unsigned offset"));
4118 for (
unsigned i = 2, e =
Operands.size(); i != e; ++i) {
4119 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4120 if (
Op.isSMEMOffset() ||
Op.isSMEMOffsetMod())
4121 return Op.getStartLoc();
4126bool AMDGPUAsmParser::validateSMEMOffset(
const MCInst &Inst,
4151 (
isVI() || IsBuffer) ?
"expected a 20-bit unsigned offset" :
4152 "expected a 21-bit signed offset");
4157bool AMDGPUAsmParser::validateSOPLiteral(
const MCInst &Inst)
const {
4166 const int OpIndices[] = { Src0Idx, Src1Idx };
4168 unsigned NumExprs = 0;
4169 unsigned NumLiterals = 0;
4172 for (
int OpIdx : OpIndices) {
4173 if (OpIdx == -1)
break;
4178 if (MO.
isImm() && !isInlineConstant(Inst, OpIdx)) {
4180 if (NumLiterals == 0 || LiteralValue !=
Value) {
4184 }
else if (MO.
isExpr()) {
4190 return NumLiterals + NumExprs <= 1;
4193bool AMDGPUAsmParser::validateOpSel(
const MCInst &Inst) {
4207 if (OpSelIdx != -1) {
4212 if (OpSelHiIdx != -1) {
4230bool AMDGPUAsmParser::validateDPP(
const MCInst &Inst,
4242 Error(S,
"DP ALU dpp only supports row_newbcast");
4250bool AMDGPUAsmParser::validateVccOperand(
unsigned Reg)
const {
4251 auto FB = getFeatureBits();
4252 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4253 (FB[AMDGPU::FeatureWavefrontSize32] &&
Reg == AMDGPU::VCC_LO);
4257bool AMDGPUAsmParser::validateVOPLiteral(
const MCInst &Inst,
4263 !HasMandatoryLiteral && !
isVOPD(Opcode))
4268 unsigned NumExprs = 0;
4269 unsigned NumLiterals = 0;
4272 for (
int OpIdx : OpIndices) {
4282 if (MO.
isImm() && !isInlineConstant(Inst, OpIdx)) {
4284 if (NumLiterals == 0 || LiteralValue !=
Value) {
4288 }
else if (MO.
isExpr()) {
4292 NumLiterals += NumExprs;
4297 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4298 Error(getLitLoc(
Operands),
"literal operands are not supported");
4302 if (NumLiterals > 1) {
4303 Error(getLitLoc(
Operands,
true),
"only one unique literal operand is allowed");
4321 unsigned Sub =
MRI->getSubReg(
Op.getReg(), AMDGPU::sub0);
4322 auto Reg = Sub ? Sub :
Op.getReg();
4324 return AGPR32.
contains(Reg) ? 1 : 0;
4327bool AMDGPUAsmParser::validateAGPRLdSt(
const MCInst &Inst)
const {
4335 : AMDGPU::OpName::vdata;
4343 if (Data2Areg >= 0 && Data2Areg != DataAreg)
4347 auto FB = getFeatureBits();
4348 if (FB[AMDGPU::FeatureGFX90AInsts]) {
4349 if (DataAreg < 0 || DstAreg < 0)
4351 return DstAreg == DataAreg;
4354 return DstAreg < 1 && DataAreg < 1;
4357bool AMDGPUAsmParser::validateVGPRAlign(
const MCInst &Inst)
const {
4358 auto FB = getFeatureBits();
4359 if (!FB[AMDGPU::FeatureGFX90AInsts])
4370 unsigned Sub =
MRI->getSubReg(
Op.getReg(), AMDGPU::sub0);
4374 if (VGPR32.
contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4376 if (AGPR32.
contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4384 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
4385 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4387 return Op.getStartLoc();
4392bool AMDGPUAsmParser::validateBLGP(
const MCInst &Inst,
4402 auto FB = getFeatureBits();
4403 bool UsesNeg =
false;
4404 if (FB[AMDGPU::FeatureGFX940Insts]) {
4406 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4407 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4408 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4409 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4414 if (IsNeg == UsesNeg)
4418 UsesNeg ?
"invalid modifier: blgp is not supported"
4419 :
"invalid modifier: neg is not supported");
4424bool AMDGPUAsmParser::validateWaitCnt(
const MCInst &Inst,
4430 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4431 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4432 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4433 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4439 if (Reg == AMDGPU::SGPR_NULL)
4443 Error(RegLoc,
"src0 must be null");
4447bool AMDGPUAsmParser::validateDS(
const MCInst &Inst,
4453 return validateGWS(Inst,
Operands);
4464 Error(S,
"gds modifier is not supported on this GPU");
4472bool AMDGPUAsmParser::validateGWS(
const MCInst &Inst,
4474 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4478 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4479 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4488 auto RegIdx =
Reg - (VGPR32.
contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4491 Error(RegLoc,
"vgpr must be even aligned");
4498bool AMDGPUAsmParser::validateCoherencyBits(
const MCInst &Inst,
4500 const SMLoc &IDLoc) {
4502 AMDGPU::OpName::cpol);
4512 Error(S,
"cache policy is not supported for SMRD instructions");
4516 Error(IDLoc,
"invalid cache policy for SMEM instruction");
4525 if (!(
TSFlags & AllowSCCModifier)) {
4530 "scc modifier is not supported for this instruction on this GPU");
4541 :
"instruction must use glc");
4549 &CStr.data()[CStr.find(
isGFX940() ?
"sc0" :
"glc")]);
4551 :
"instruction must not use glc");
4563 if (!Operand->isReg())
4565 unsigned Reg = Operand->getReg();
4566 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
4568 "execz and vccz are not supported on this GPU");
4575bool AMDGPUAsmParser::validateTFE(
const MCInst &Inst,
4578 if (
Desc.mayStore() &&
4582 Error(Loc,
"TFE modifier has no meaning for store instructions");
4590bool AMDGPUAsmParser::validateInstruction(
const MCInst &Inst,
4593 if (
auto ErrMsg = validateLdsDirect(Inst)) {
4597 if (!validateSOPLiteral(Inst)) {
4599 "only one unique literal operand is allowed");
4602 if (!validateVOPLiteral(Inst,
Operands)) {
4605 if (!validateConstantBusLimitations(Inst,
Operands)) {
4608 if (!validateVOPDRegBankConstraints(Inst,
Operands)) {
4611 if (!validateIntClampSupported(Inst)) {
4613 "integer clamping is not supported on this GPU");
4616 if (!validateOpSel(Inst)) {
4618 "invalid op_sel operand");
4621 if (!validateDPP(Inst,
Operands)) {
4625 if (!validateMIMGD16(Inst)) {
4627 "d16 modifier is not supported on this GPU");
4630 if (!validateMIMGMSAA(Inst)) {
4632 "invalid dim; must be MSAA type");
4635 if (!validateMIMGDataSize(Inst, IDLoc)) {
4638 if (!validateMIMGAddrSize(Inst, IDLoc))
4640 if (!validateMIMGAtomicDMask(Inst)) {
4642 "invalid atomic image dmask");
4645 if (!validateMIMGGatherDMask(Inst)) {
4647 "invalid image_gather dmask: only one bit must be set");
4650 if (!validateMovrels(Inst,
Operands)) {
4653 if (!validateFlatOffset(Inst,
Operands)) {
4656 if (!validateSMEMOffset(Inst,
Operands)) {
4659 if (!validateMAIAccWrite(Inst,
Operands)) {
4662 if (!validateMAISrc2(Inst,
Operands)) {
4665 if (!validateMFMA(Inst,
Operands)) {
4668 if (!validateCoherencyBits(Inst,
Operands, IDLoc)) {
4672 if (!validateAGPRLdSt(Inst)) {
4673 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4674 ?
"invalid register class: data and dst should be all VGPR or AGPR"
4675 :
"invalid register class: agpr loads and stores not supported on this GPU"
4679 if (!validateVGPRAlign(Inst)) {
4681 "invalid register class: vgpr tuples must be 64 bit aligned");
4688 if (!validateBLGP(Inst,
Operands)) {
4692 if (!validateDivScale(Inst)) {
4693 Error(IDLoc,
"ABS not allowed in VOP3B instructions");
4696 if (!validateWaitCnt(Inst,
Operands)) {
4699 if (!validateExeczVcczOperands(
Operands)) {
4702 if (!validateTFE(Inst,
Operands)) {
4711 unsigned VariantID = 0);
4715 unsigned VariantID);
4717bool AMDGPUAsmParser::isSupportedMnemo(
StringRef Mnemo,
4722bool AMDGPUAsmParser::isSupportedMnemo(
StringRef Mnemo,
4725 for (
auto Variant : Variants) {
4733bool AMDGPUAsmParser::checkUnsupportedInstruction(
StringRef Mnemo,
4734 const SMLoc &IDLoc) {
4735 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
4738 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4743 getParser().clearPendingErrors();
4747 StringRef VariantName = getMatchedVariantName();
4748 if (!VariantName.
empty() && isSupportedMnemo(Mnemo, FBS)) {
4751 " variant of this instruction is not supported"));
4755 if (
isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
4756 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
4759 FeaturesWS32.
flip(AMDGPU::FeatureWavefrontSize64)
4760 .
flip(AMDGPU::FeatureWavefrontSize32);
4762 ComputeAvailableFeatures(FeaturesWS32);
4764 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
4765 return Error(IDLoc,
"instruction requires wavesize=32");
4770 return Error(IDLoc,
"instruction not supported on this GPU");
4775 return Error(IDLoc,
"invalid instruction" + Suggestion);
4781 const auto &
Op = ((AMDGPUOperand &)*
Operands[InvalidOprIdx]);
4782 if (
Op.isToken() && InvalidOprIdx > 1) {
4783 const auto &PrevOp = ((AMDGPUOperand &)*
Operands[InvalidOprIdx - 1]);
4784 return PrevOp.isToken() && PrevOp.getToken() ==
"::";
4789bool AMDGPUAsmParser::MatchAndEmitInstruction(
SMLoc IDLoc,
unsigned &Opcode,
4793 bool MatchingInlineAsm) {
4795 unsigned Result = Match_Success;
4796 for (
auto Variant : getMatchedVariants()) {
4798 auto R = MatchInstructionImpl(
Operands, Inst, EI, MatchingInlineAsm,
4803 if ((R == Match_Success) ||
4804 (R == Match_PreferE32) ||
4805 (R == Match_MissingFeature && Result != Match_PreferE32) ||
4806 (R == Match_InvalidOperand && Result != Match_MissingFeature
4807 && Result != Match_PreferE32) ||
4808 (R == Match_MnemonicFail && Result != Match_InvalidOperand
4809 && Result != Match_MissingFeature
4810 && Result != Match_PreferE32)) {
4814 if (R == Match_Success)
4818 if (Result == Match_Success) {
4819 if (!validateInstruction(Inst, IDLoc,
Operands)) {
4828 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4834 case Match_MissingFeature:
4838 return Error(IDLoc,
"operands are not valid for this GPU or mode");
4840 case Match_InvalidOperand: {
4841 SMLoc ErrorLoc = IDLoc;
4844 return Error(IDLoc,
"too few operands for instruction");
4847 if (ErrorLoc ==
SMLoc())
4851 return Error(ErrorLoc,
"invalid VOPDY instruction");
4853 return Error(ErrorLoc,
"invalid operand for instruction");
4856 case Match_PreferE32:
4857 return Error(IDLoc,
"internal error: instruction without _e64 suffix "
4858 "should be encoded as e32");
4859 case Match_MnemonicFail:
4865bool AMDGPUAsmParser::ParseAsAbsoluteExpression(
uint32_t &Ret) {
4870 if (getParser().parseAbsoluteExpression(Tmp)) {
4877bool AMDGPUAsmParser::ParseDirectiveMajorMinor(
uint32_t &Major,
4879 if (ParseAsAbsoluteExpression(Major))
4880 return TokError(
"invalid major version");
4883 return TokError(
"minor version number required, comma expected");
4885 if (ParseAsAbsoluteExpression(Minor))
4886 return TokError(
"invalid minor version");
4891bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4893 return TokError(
"directive only supported for amdgcn architecture");
4895 std::string TargetIDDirective;
4896 SMLoc TargetStart = getTok().getLoc();
4897 if (getParser().parseEscapedString(TargetIDDirective))
4901 if (getTargetStreamer().getTargetID()->
toString() != TargetIDDirective)
4902 return getParser().Error(TargetRange.
Start,
4903 (
Twine(
".amdgcn_target directive's target id ") +
4904 Twine(TargetIDDirective) +
4905 Twine(
" does not match the specified target id ") +
4906 Twine(getTargetStreamer().getTargetID()->
toString())).str());
4911bool AMDGPUAsmParser::OutOfRangeError(
SMRange Range) {
4912 return Error(
Range.Start,
"value out of range", Range);
4915bool AMDGPUAsmParser::calculateGPRBlocks(
4916 const FeatureBitset &Features,
bool VCCUsed,
bool FlatScrUsed,
4917 bool XNACKUsed, std::optional<bool> EnableWavefrontSize32,
4918 unsigned NextFreeVGPR,
SMRange VGPRRange,
unsigned NextFreeSGPR,
4919 SMRange SGPRRange,
unsigned &VGPRBlocks,
unsigned &SGPRBlocks) {
4930 unsigned MaxAddressableNumSGPRs =
4933 if (
Version.Major >= 8 && !Features.
test(FeatureSGPRInitBug) &&
4934 NumSGPRs > MaxAddressableNumSGPRs)
4935 return OutOfRangeError(SGPRRange);
4940 if ((
Version.Major <= 7 || Features.
test(FeatureSGPRInitBug)) &&
4941 NumSGPRs > MaxAddressableNumSGPRs)
4942 return OutOfRangeError(SGPRRange);
4944 if (Features.
test(FeatureSGPRInitBug))
4955bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4957 return TokError(
"directive only supported for amdgcn architecture");
4960 return TokError(
"directive only supported for amdhsa OS");
4963 if (getParser().parseIdentifier(KernelName))
4982 unsigned ImpliedUserSGPRCount = 0;
4986 std::optional<unsigned> ExplicitUserSGPRCount;
4987 bool ReserveVCC =
true;
4988 bool ReserveFlatScr =
true;
4989 std::optional<bool> EnableWavefrontSize32;
4995 SMRange IDRange = getTok().getLocRange();
4996 if (!parseId(
ID,
"expected .amdhsa_ directive or .end_amdhsa_kernel"))
4999 if (
ID ==
".end_amdhsa_kernel")
5003 return TokError(
".amdhsa_ directives cannot be repeated");
5005 SMLoc ValStart = getLoc();
5007 if (getParser().parseAbsoluteExpression(IVal))
5009 SMLoc ValEnd = getLoc();
5013 return OutOfRangeError(ValRange);
5017#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
5018 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
5019 return OutOfRangeError(RANGE); \
5020 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
5022 if (
ID ==
".amdhsa_group_segment_fixed_size") {
5024 return OutOfRangeError(ValRange);
5026 }
else if (
ID ==
".amdhsa_private_segment_fixed_size") {
5028 return OutOfRangeError(ValRange);
5030 }
else if (
ID ==
".amdhsa_kernarg_size") {
5032 return OutOfRangeError(ValRange);
5034 }
else if (
ID ==
".amdhsa_user_sgpr_count") {
5035 ExplicitUserSGPRCount = Val;
5036 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_buffer") {
5039 "directive is not supported with architected flat scratch",
5042 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5045 ImpliedUserSGPRCount += 4;
5046 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_preload_length") {
5048 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5051 return OutOfRangeError(ValRange);
5055 ImpliedUserSGPRCount += Val;
5056 PreloadLength = Val;
5058 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_preload_offset") {
5060 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5063 return OutOfRangeError(ValRange);
5067 PreloadOffset = Val;
5068 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_ptr") {
5070 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
5073 ImpliedUserSGPRCount += 2;
5074 }
else if (
ID ==
".amdhsa_user_sgpr_queue_ptr") {
5076 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
5079 ImpliedUserSGPRCount += 2;
5080 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_segment_ptr") {
5082 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5085 ImpliedUserSGPRCount += 2;
5086 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_id") {
5088 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
5091 ImpliedUserSGPRCount += 2;
5092 }
else if (
ID ==
".amdhsa_user_sgpr_flat_scratch_init") {
5095 "directive is not supported with architected flat scratch",
5098 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
5101 ImpliedUserSGPRCount += 2;
5102 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_size") {
5104 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5107 ImpliedUserSGPRCount += 1;
5108 }
else if (
ID ==
".amdhsa_wavefront_size32") {
5109 if (IVersion.
Major < 10)
5110 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5111 EnableWavefrontSize32 = Val;
5113 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
5115 }
else if (
ID ==
".amdhsa_uses_dynamic_stack") {
5117 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange);
5118 }
else if (
ID ==
".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5121 "directive is not supported with architected flat scratch",
5124 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5125 }
else if (
ID ==
".amdhsa_enable_private_segment") {
5129 "directive is not supported without architected flat scratch",
5132 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5133 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_x") {
5135 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
5137 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_y") {
5139 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
5141 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_z") {
5143 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
5145 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_info") {
5147 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
5149 }
else if (
ID ==
".amdhsa_system_vgpr_workitem_id") {
5151 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
5153 }
else if (
ID ==
".amdhsa_next_free_vgpr") {
5154 VGPRRange = ValRange;
5156 }
else if (
ID ==
".amdhsa_next_free_sgpr") {
5157 SGPRRange = ValRange;
5159 }
else if (
ID ==
".amdhsa_accum_offset") {
5161 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5163 }
else if (
ID ==
".amdhsa_reserve_vcc") {
5164 if (!isUInt<1>(Val))
5165 return OutOfRangeError(ValRange);
5167 }
else if (
ID ==
".amdhsa_reserve_flat_scratch") {
5168 if (IVersion.
Major < 7)
5169 return Error(IDRange.
Start,
"directive requires gfx7+", IDRange);
5172 "directive is not supported with architected flat scratch",
5174 if (!isUInt<1>(Val))
5175 return OutOfRangeError(ValRange);
5176 ReserveFlatScr = Val;
5177 }
else if (
ID ==
".amdhsa_reserve_xnack_mask") {
5178 if (IVersion.
Major < 8)
5179 return Error(IDRange.
Start,
"directive requires gfx8+", IDRange);
5180 if (!isUInt<1>(Val))
5181 return OutOfRangeError(ValRange);
5182 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5183 return getParser().Error(IDRange.
Start,
".amdhsa_reserve_xnack_mask does not match target id",
5185 }
else if (
ID ==
".amdhsa_float_round_mode_32") {
5187 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
5188 }
else if (
ID ==
".amdhsa_float_round_mode_16_64") {
5190 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
5191 }
else if (
ID ==
".amdhsa_float_denorm_mode_32") {
5193 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
5194 }
else if (
ID ==
".amdhsa_float_denorm_mode_16_64") {
5196 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
5198 }
else if (
ID ==
".amdhsa_dx10_clamp") {
5200 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
5201 }
else if (
ID ==
".amdhsa_ieee_mode") {
5204 }
else if (
ID ==
".amdhsa_fp16_overflow") {
5205 if (IVersion.
Major < 9)
5206 return Error(IDRange.
Start,
"directive requires gfx9+", IDRange);
5209 }
else if (
ID ==
".amdhsa_tg_split") {
5211 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5214 }
else if (
ID ==
".amdhsa_workgroup_processor_mode") {
5215 if (IVersion.
Major < 10)
5216 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5219 }
else if (
ID ==
".amdhsa_memory_ordered") {
5220 if (IVersion.
Major < 10)
5221 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5224 }
else if (
ID ==
".amdhsa_forward_progress") {
5225 if (IVersion.
Major < 10)
5226 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5229 }
else if (
ID ==
".amdhsa_shared_vgpr_count") {
5230 if (IVersion.
Major < 10)
5231 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5232 SharedVGPRCount = Val;
5234 COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT, Val,