53enum RegisterKind { IS_UNKNOWN,
IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
67 SMLoc StartLoc, EndLoc;
68 const AMDGPUAsmParser *AsmParser;
71 AMDGPUOperand(KindTy Kind_,
const AMDGPUAsmParser *AsmParser_)
72 :
Kind(Kind_), AsmParser(AsmParser_) {}
74 using Ptr = std::unique_ptr<AMDGPUOperand>;
82 bool hasFPModifiers()
const {
return Abs || Neg; }
83 bool hasIntModifiers()
const {
return Sext; }
84 bool hasModifiers()
const {
return hasFPModifiers() || hasIntModifiers(); }
86 int64_t getFPModifiersOperand()
const {
93 int64_t getIntModifiersOperand()
const {
99 int64_t getModifiersOperand()
const {
100 assert(!(hasFPModifiers() && hasIntModifiers())
101 &&
"fp and int modifiers should not be used simultaneously");
102 if (hasFPModifiers()) {
103 return getFPModifiersOperand();
104 }
else if (hasIntModifiers()) {
105 return getIntModifiersOperand();
188 ImmKindTyMandatoryLiteral,
202 mutable ImmKindTy
Kind;
219 bool isToken()
const override {
return Kind == Token; }
221 bool isSymbolRefExpr()
const {
222 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
225 bool isImm()
const override {
226 return Kind == Immediate;
229 void setImmKindNone()
const {
231 Imm.Kind = ImmKindTyNone;
234 void setImmKindLiteral()
const {
236 Imm.Kind = ImmKindTyLiteral;
239 void setImmKindMandatoryLiteral()
const {
241 Imm.Kind = ImmKindTyMandatoryLiteral;
244 void setImmKindConst()
const {
246 Imm.Kind = ImmKindTyConst;
249 bool IsImmKindLiteral()
const {
250 return isImm() &&
Imm.Kind == ImmKindTyLiteral;
253 bool IsImmKindMandatoryLiteral()
const {
254 return isImm() &&
Imm.Kind == ImmKindTyMandatoryLiteral;
257 bool isImmKindConst()
const {
258 return isImm() &&
Imm.Kind == ImmKindTyConst;
261 bool isInlinableImm(
MVT type)
const;
262 bool isLiteralImm(
MVT type)
const;
264 bool isRegKind()
const {
268 bool isReg()
const override {
269 return isRegKind() && !hasModifiers();
272 bool isRegOrInline(
unsigned RCID,
MVT type)
const {
273 return isRegClass(RCID) || isInlinableImm(type);
277 return isRegOrInline(RCID, type) || isLiteralImm(type);
280 bool isRegOrImmWithInt16InputMods()
const {
284 bool isRegOrImmWithIntT16InputMods()
const {
288 bool isRegOrImmWithInt32InputMods()
const {
292 bool isRegOrInlineImmWithInt16InputMods()
const {
293 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
296 bool isRegOrInlineImmWithInt32InputMods()
const {
297 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
300 bool isRegOrImmWithInt64InputMods()
const {
304 bool isRegOrImmWithFP16InputMods()
const {
308 bool isRegOrImmWithFPT16InputMods()
const {
312 bool isRegOrImmWithFP32InputMods()
const {
316 bool isRegOrImmWithFP64InputMods()
const {
320 template <
bool IsFake16>
bool isRegOrInlineImmWithFP16InputMods()
const {
321 return isRegOrInline(
322 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
325 bool isRegOrInlineImmWithFP32InputMods()
const {
326 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
329 bool isPackedFP16InputMods()
const {
333 bool isVReg()
const {
334 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
335 isRegClass(AMDGPU::VReg_64RegClassID) ||
336 isRegClass(AMDGPU::VReg_96RegClassID) ||
337 isRegClass(AMDGPU::VReg_128RegClassID) ||
338 isRegClass(AMDGPU::VReg_160RegClassID) ||
339 isRegClass(AMDGPU::VReg_192RegClassID) ||
340 isRegClass(AMDGPU::VReg_256RegClassID) ||
341 isRegClass(AMDGPU::VReg_512RegClassID) ||
342 isRegClass(AMDGPU::VReg_1024RegClassID);
345 bool isVReg32()
const {
346 return isRegClass(AMDGPU::VGPR_32RegClassID);
349 bool isVReg32OrOff()
const {
350 return isOff() || isVReg32();
353 bool isNull()
const {
354 return isRegKind() &&
getReg() == AMDGPU::SGPR_NULL;
357 bool isVRegWithInputMods()
const;
358 template <
bool IsFake16>
bool isT16VRegWithInputMods()
const;
360 bool isSDWAOperand(
MVT type)
const;
361 bool isSDWAFP16Operand()
const;
362 bool isSDWAFP32Operand()
const;
363 bool isSDWAInt16Operand()
const;
364 bool isSDWAInt32Operand()
const;
366 bool isImmTy(ImmTy ImmT)
const {
370 template <ImmTy Ty>
bool isImmTy()
const {
return isImmTy(Ty); }
372 bool isImmLiteral()
const {
return isImmTy(ImmTyNone); }
374 bool isImmModifier()
const {
375 return isImm() &&
Imm.Type != ImmTyNone;
378 bool isOModSI()
const {
return isImmTy(ImmTyOModSI); }
379 bool isDMask()
const {
return isImmTy(ImmTyDMask); }
380 bool isDim()
const {
return isImmTy(ImmTyDim); }
381 bool isR128A16()
const {
return isImmTy(ImmTyR128A16); }
382 bool isOff()
const {
return isImmTy(ImmTyOff); }
383 bool isExpTgt()
const {
return isImmTy(ImmTyExpTgt); }
384 bool isOffen()
const {
return isImmTy(ImmTyOffen); }
385 bool isIdxen()
const {
return isImmTy(ImmTyIdxen); }
386 bool isAddr64()
const {
return isImmTy(ImmTyAddr64); }
387 bool isOffset()
const {
return isImmTy(ImmTyOffset); }
388 bool isOffset0()
const {
return isImmTy(ImmTyOffset0); }
389 bool isOffset1()
const {
return isImmTy(ImmTyOffset1); }
390 bool isSMEMOffsetMod()
const {
return isImmTy(ImmTySMEMOffsetMod); }
391 bool isFlatOffset()
const {
return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
392 bool isGDS()
const {
return isImmTy(ImmTyGDS); }
393 bool isLDS()
const {
return isImmTy(ImmTyLDS); }
394 bool isCPol()
const {
return isImmTy(ImmTyCPol); }
395 bool isIndexKey8bit()
const {
return isImmTy(ImmTyIndexKey8bit); }
396 bool isIndexKey16bit()
const {
return isImmTy(ImmTyIndexKey16bit); }
397 bool isTFE()
const {
return isImmTy(ImmTyTFE); }
398 bool isFORMAT()
const {
return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
399 bool isDppBankMask()
const {
return isImmTy(ImmTyDppBankMask); }
400 bool isDppRowMask()
const {
return isImmTy(ImmTyDppRowMask); }
401 bool isDppBoundCtrl()
const {
return isImmTy(ImmTyDppBoundCtrl); }
402 bool isDppFI()
const {
return isImmTy(ImmTyDppFI); }
403 bool isSDWADstSel()
const {
return isImmTy(ImmTySDWADstSel); }
404 bool isSDWASrc0Sel()
const {
return isImmTy(ImmTySDWASrc0Sel); }
405 bool isSDWASrc1Sel()
const {
return isImmTy(ImmTySDWASrc1Sel); }
406 bool isSDWADstUnused()
const {
return isImmTy(ImmTySDWADstUnused); }
407 bool isInterpSlot()
const {
return isImmTy(ImmTyInterpSlot); }
408 bool isInterpAttr()
const {
return isImmTy(ImmTyInterpAttr); }
409 bool isInterpAttrChan()
const {
return isImmTy(ImmTyInterpAttrChan); }
410 bool isOpSel()
const {
return isImmTy(ImmTyOpSel); }
411 bool isOpSelHi()
const {
return isImmTy(ImmTyOpSelHi); }
412 bool isNegLo()
const {
return isImmTy(ImmTyNegLo); }
413 bool isNegHi()
const {
return isImmTy(ImmTyNegHi); }
414 bool isByteSel()
const {
return isImmTy(ImmTyByteSel); }
416 bool isRegOrImm()
const {
420 bool isRegClass(
unsigned RCID)
const;
424 bool isRegOrInlineNoMods(
unsigned RCID,
MVT type)
const {
425 return isRegOrInline(RCID, type) && !hasModifiers();
428 bool isSCSrcB16()
const {
429 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
432 bool isSCSrcV2B16()
const {
436 bool isSCSrc_b32()
const {
437 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
440 bool isSCSrc_b64()
const {
441 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
444 bool isBoolReg()
const;
446 bool isSCSrcF16()
const {
447 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
450 bool isSCSrcV2F16()
const {
454 bool isSCSrcF32()
const {
455 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
458 bool isSCSrcF64()
const {
459 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
462 bool isSSrc_b32()
const {
463 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
466 bool isSSrc_b16()
const {
return isSCSrcB16() || isLiteralImm(MVT::i16); }
468 bool isSSrcV2B16()
const {
473 bool isSSrc_b64()
const {
476 return isSCSrc_b64() || isLiteralImm(MVT::i64);
479 bool isSSrc_f32()
const {
480 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
483 bool isSSrcF64()
const {
return isSCSrc_b64() || isLiteralImm(MVT::f64); }
485 bool isSSrc_bf16()
const {
return isSCSrcB16() || isLiteralImm(MVT::bf16); }
487 bool isSSrc_f16()
const {
return isSCSrcB16() || isLiteralImm(MVT::f16); }
489 bool isSSrcV2F16()
const {
494 bool isSSrcV2FP32()
const {
499 bool isSCSrcV2FP32()
const {
504 bool isSSrcV2INT32()
const {
509 bool isSCSrcV2INT32()
const {
511 return isSCSrc_b32();
514 bool isSSrcOrLds_b32()
const {
515 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
516 isLiteralImm(MVT::i32) || isExpr();
519 bool isVCSrc_b32()
const {
520 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
523 bool isVCSrcB64()
const {
524 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
527 bool isVCSrcTB16()
const {
528 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
531 bool isVCSrcTB16_Lo128()
const {
532 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
535 bool isVCSrcFake16B16_Lo128()
const {
536 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
539 bool isVCSrc_b16()
const {
540 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
543 bool isVCSrc_v2b16()
const {
return isVCSrc_b16(); }
545 bool isVCSrc_f32()
const {
546 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
549 bool isVCSrcF64()
const {
550 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
553 bool isVCSrcTBF16()
const {
554 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
557 bool isVCSrcTF16()
const {
558 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
561 bool isVCSrcTBF16_Lo128()
const {
562 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
565 bool isVCSrcTF16_Lo128()
const {
566 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
569 bool isVCSrcFake16BF16_Lo128()
const {
570 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
573 bool isVCSrcFake16F16_Lo128()
const {
574 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
577 bool isVCSrc_bf16()
const {
578 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
581 bool isVCSrc_f16()
const {
582 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
585 bool isVCSrc_v2bf16()
const {
return isVCSrc_bf16(); }
587 bool isVCSrc_v2f16()
const {
return isVCSrc_f16(); }
589 bool isVSrc_b32()
const {
590 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
593 bool isVSrc_b64()
const {
return isVCSrcF64() || isLiteralImm(MVT::i64); }
595 bool isVSrcT_b16()
const {
return isVCSrcTB16() || isLiteralImm(MVT::i16); }
597 bool isVSrcT_b16_Lo128()
const {
598 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
601 bool isVSrcFake16_b16_Lo128()
const {
602 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
605 bool isVSrc_b16()
const {
return isVCSrc_b16() || isLiteralImm(MVT::i16); }
607 bool isVSrc_v2b16()
const {
return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
609 bool isVCSrcV2FP32()
const {
613 bool isVSrc_v2f32()
const {
return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
615 bool isVCSrcV2INT32()
const {
619 bool isVSrc_v2b32()
const {
return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
621 bool isVSrc_f32()
const {
622 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
625 bool isVSrc_f64()
const {
return isVCSrcF64() || isLiteralImm(MVT::f64); }
627 bool isVSrcT_bf16()
const {
return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
629 bool isVSrcT_f16()
const {
return isVCSrcTF16() || isLiteralImm(MVT::f16); }
631 bool isVSrcT_bf16_Lo128()
const {
632 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
635 bool isVSrcT_f16_Lo128()
const {
636 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
639 bool isVSrcFake16_bf16_Lo128()
const {
640 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
643 bool isVSrcFake16_f16_Lo128()
const {
644 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
647 bool isVSrc_bf16()
const {
return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
649 bool isVSrc_f16()
const {
return isVCSrc_f16() || isLiteralImm(MVT::f16); }
651 bool isVSrc_v2bf16()
const {
652 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
655 bool isVSrc_v2f16()
const {
return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
657 bool isVISrcB32()
const {
658 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
661 bool isVISrcB16()
const {
662 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
665 bool isVISrcV2B16()
const {
669 bool isVISrcF32()
const {
670 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
673 bool isVISrcF16()
const {
674 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
677 bool isVISrcV2F16()
const {
678 return isVISrcF16() || isVISrcB32();
681 bool isVISrc_64_bf16()
const {
682 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
685 bool isVISrc_64_f16()
const {
686 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
689 bool isVISrc_64_b32()
const {
690 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
693 bool isVISrc_64B64()
const {
694 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
697 bool isVISrc_64_f64()
const {
698 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
701 bool isVISrc_64V2FP32()
const {
702 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
705 bool isVISrc_64V2INT32()
const {
706 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
709 bool isVISrc_256_b32()
const {
710 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
713 bool isVISrc_256_f32()
const {
714 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
717 bool isVISrc_256B64()
const {
718 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
721 bool isVISrc_256_f64()
const {
722 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
725 bool isVISrc_128B16()
const {
726 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
729 bool isVISrc_128V2B16()
const {
730 return isVISrc_128B16();
733 bool isVISrc_128_b32()
const {
734 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
737 bool isVISrc_128_f32()
const {
738 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
741 bool isVISrc_256V2FP32()
const {
742 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
745 bool isVISrc_256V2INT32()
const {
746 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
749 bool isVISrc_512_b32()
const {
750 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
753 bool isVISrc_512B16()
const {
754 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
757 bool isVISrc_512V2B16()
const {
758 return isVISrc_512B16();
761 bool isVISrc_512_f32()
const {
762 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
765 bool isVISrc_512F16()
const {
766 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
769 bool isVISrc_512V2F16()
const {
770 return isVISrc_512F16() || isVISrc_512_b32();
773 bool isVISrc_1024_b32()
const {
774 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
777 bool isVISrc_1024B16()
const {
778 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
781 bool isVISrc_1024V2B16()
const {
782 return isVISrc_1024B16();
785 bool isVISrc_1024_f32()
const {
786 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
789 bool isVISrc_1024F16()
const {
790 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
793 bool isVISrc_1024V2F16()
const {
794 return isVISrc_1024F16() || isVISrc_1024_b32();
797 bool isAISrcB32()
const {
798 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
801 bool isAISrcB16()
const {
802 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
805 bool isAISrcV2B16()
const {
809 bool isAISrcF32()
const {
810 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
813 bool isAISrcF16()
const {
814 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
817 bool isAISrcV2F16()
const {
818 return isAISrcF16() || isAISrcB32();
821 bool isAISrc_64B64()
const {
822 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
825 bool isAISrc_64_f64()
const {
826 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
829 bool isAISrc_128_b32()
const {
830 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
833 bool isAISrc_128B16()
const {
834 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
837 bool isAISrc_128V2B16()
const {
838 return isAISrc_128B16();
841 bool isAISrc_128_f32()
const {
842 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
845 bool isAISrc_128F16()
const {
846 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
849 bool isAISrc_128V2F16()
const {
850 return isAISrc_128F16() || isAISrc_128_b32();
853 bool isVISrc_128_bf16()
const {
854 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
857 bool isVISrc_128_f16()
const {
858 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
861 bool isVISrc_128V2F16()
const {
862 return isVISrc_128_f16() || isVISrc_128_b32();
865 bool isAISrc_256B64()
const {
866 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
869 bool isAISrc_256_f64()
const {
870 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
873 bool isAISrc_512_b32()
const {
874 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
877 bool isAISrc_512B16()
const {
878 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
881 bool isAISrc_512V2B16()
const {
882 return isAISrc_512B16();
885 bool isAISrc_512_f32()
const {
886 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
889 bool isAISrc_512F16()
const {
890 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
893 bool isAISrc_512V2F16()
const {
894 return isAISrc_512F16() || isAISrc_512_b32();
897 bool isAISrc_1024_b32()
const {
898 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
901 bool isAISrc_1024B16()
const {
902 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
905 bool isAISrc_1024V2B16()
const {
906 return isAISrc_1024B16();
909 bool isAISrc_1024_f32()
const {
910 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
913 bool isAISrc_1024F16()
const {
914 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
917 bool isAISrc_1024V2F16()
const {
918 return isAISrc_1024F16() || isAISrc_1024_b32();
921 bool isKImmFP32()
const {
922 return isLiteralImm(MVT::f32);
925 bool isKImmFP16()
const {
926 return isLiteralImm(MVT::f16);
929 bool isMem()
const override {
933 bool isExpr()
const {
937 bool isSOPPBrTarget()
const {
return isExpr() ||
isImm(); }
939 bool isSWaitCnt()
const;
940 bool isDepCtr()
const;
941 bool isSDelayALU()
const;
942 bool isHwreg()
const;
943 bool isSendMsg()
const;
944 bool isSplitBarrier()
const;
945 bool isSwizzle()
const;
946 bool isSMRDOffset8()
const;
947 bool isSMEMOffset()
const;
948 bool isSMRDLiteralOffset()
const;
950 bool isDPPCtrl()
const;
954 bool isGPRIdxMode()
const;
955 bool isS16Imm()
const;
956 bool isU16Imm()
const;
957 bool isEndpgm()
const;
958 bool isWaitVDST()
const;
959 bool isWaitEXP()
const;
960 bool isWaitVAVDst()
const;
961 bool isWaitVMVSrc()
const;
963 auto getPredicate(std::function<
bool(
const AMDGPUOperand &
Op)>
P)
const {
964 return std::bind(
P, *
this);
972 int64_t getImm()
const {
977 void setImm(int64_t Val) {
982 ImmTy getImmTy()
const {
992 SMLoc getStartLoc()
const override {
996 SMLoc getEndLoc()
const override {
1001 return SMRange(StartLoc, EndLoc);
1004 Modifiers getModifiers()
const {
1005 assert(isRegKind() || isImmTy(ImmTyNone));
1006 return isRegKind() ?
Reg.Mods :
Imm.Mods;
1009 void setModifiers(Modifiers Mods) {
1010 assert(isRegKind() || isImmTy(ImmTyNone));
1017 bool hasModifiers()
const {
1018 return getModifiers().hasModifiers();
1021 bool hasFPModifiers()
const {
1022 return getModifiers().hasFPModifiers();
1025 bool hasIntModifiers()
const {
1026 return getModifiers().hasIntModifiers();
1031 void addImmOperands(
MCInst &Inst,
unsigned N,
bool ApplyModifiers =
true)
const;
1033 void addLiteralImmOperand(
MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const;
1035 void addRegOperands(
MCInst &Inst,
unsigned N)
const;
1037 void addRegOrImmOperands(
MCInst &Inst,
unsigned N)
const {
1039 addRegOperands(Inst,
N);
1041 addImmOperands(Inst,
N);
1044 void addRegOrImmWithInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1045 Modifiers Mods = getModifiers();
1048 addRegOperands(Inst,
N);
1050 addImmOperands(Inst,
N,
false);
1054 void addRegOrImmWithFPInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1055 assert(!hasIntModifiers());
1056 addRegOrImmWithInputModsOperands(Inst,
N);
1059 void addRegOrImmWithIntInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1060 assert(!hasFPModifiers());
1061 addRegOrImmWithInputModsOperands(Inst,
N);
1064 void addRegWithInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1065 Modifiers Mods = getModifiers();
1068 addRegOperands(Inst,
N);
1071 void addRegWithFPInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1072 assert(!hasIntModifiers());
1073 addRegWithInputModsOperands(Inst,
N);
1076 void addRegWithIntInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1077 assert(!hasFPModifiers());
1078 addRegWithInputModsOperands(Inst,
N);
1084 case ImmTyNone:
OS <<
"None";
break;
1085 case ImmTyGDS:
OS <<
"GDS";
break;
1086 case ImmTyLDS:
OS <<
"LDS";
break;
1087 case ImmTyOffen:
OS <<
"Offen";
break;
1088 case ImmTyIdxen:
OS <<
"Idxen";
break;
1089 case ImmTyAddr64:
OS <<
"Addr64";
break;
1090 case ImmTyOffset:
OS <<
"Offset";
break;
1091 case ImmTyInstOffset:
OS <<
"InstOffset";
break;
1092 case ImmTyOffset0:
OS <<
"Offset0";
break;
1093 case ImmTyOffset1:
OS <<
"Offset1";
break;
1094 case ImmTySMEMOffsetMod:
OS <<
"SMEMOffsetMod";
break;
1095 case ImmTyCPol:
OS <<
"CPol";
break;
1096 case ImmTyIndexKey8bit:
OS <<
"index_key";
break;
1097 case ImmTyIndexKey16bit:
OS <<
"index_key";
break;
1098 case ImmTyTFE:
OS <<
"TFE";
break;
1099 case ImmTyD16:
OS <<
"D16";
break;
1100 case ImmTyFORMAT:
OS <<
"FORMAT";
break;
1101 case ImmTyClampSI:
OS <<
"ClampSI";
break;
1102 case ImmTyOModSI:
OS <<
"OModSI";
break;
1103 case ImmTyDPP8:
OS <<
"DPP8";
break;
1104 case ImmTyDppCtrl:
OS <<
"DppCtrl";
break;
1105 case ImmTyDppRowMask:
OS <<
"DppRowMask";
break;
1106 case ImmTyDppBankMask:
OS <<
"DppBankMask";
break;
1107 case ImmTyDppBoundCtrl:
OS <<
"DppBoundCtrl";
break;
1108 case ImmTyDppFI:
OS <<
"DppFI";
break;
1109 case ImmTySDWADstSel:
OS <<
"SDWADstSel";
break;
1110 case ImmTySDWASrc0Sel:
OS <<
"SDWASrc0Sel";
break;
1111 case ImmTySDWASrc1Sel:
OS <<
"SDWASrc1Sel";
break;
1112 case ImmTySDWADstUnused:
OS <<
"SDWADstUnused";
break;
1113 case ImmTyDMask:
OS <<
"DMask";
break;
1114 case ImmTyDim:
OS <<
"Dim";
break;
1115 case ImmTyUNorm:
OS <<
"UNorm";
break;
1116 case ImmTyDA:
OS <<
"DA";
break;
1117 case ImmTyR128A16:
OS <<
"R128A16";
break;
1118 case ImmTyA16:
OS <<
"A16";
break;
1119 case ImmTyLWE:
OS <<
"LWE";
break;
1120 case ImmTyOff:
OS <<
"Off";
break;
1121 case ImmTyExpTgt:
OS <<
"ExpTgt";
break;
1122 case ImmTyExpCompr:
OS <<
"ExpCompr";
break;
1123 case ImmTyExpVM:
OS <<
"ExpVM";
break;
1124 case ImmTyHwreg:
OS <<
"Hwreg";
break;
1125 case ImmTySendMsg:
OS <<
"SendMsg";
break;
1126 case ImmTyInterpSlot:
OS <<
"InterpSlot";
break;
1127 case ImmTyInterpAttr:
OS <<
"InterpAttr";
break;
1128 case ImmTyInterpAttrChan:
OS <<
"InterpAttrChan";
break;
1129 case ImmTyOpSel:
OS <<
"OpSel";
break;
1130 case ImmTyOpSelHi:
OS <<
"OpSelHi";
break;
1131 case ImmTyNegLo:
OS <<
"NegLo";
break;
1132 case ImmTyNegHi:
OS <<
"NegHi";
break;
1133 case ImmTySwizzle:
OS <<
"Swizzle";
break;
1134 case ImmTyGprIdxMode:
OS <<
"GprIdxMode";
break;
1135 case ImmTyHigh:
OS <<
"High";
break;
1136 case ImmTyBLGP:
OS <<
"BLGP";
break;
1137 case ImmTyCBSZ:
OS <<
"CBSZ";
break;
1138 case ImmTyABID:
OS <<
"ABID";
break;
1139 case ImmTyEndpgm:
OS <<
"Endpgm";
break;
1140 case ImmTyWaitVDST:
OS <<
"WaitVDST";
break;
1141 case ImmTyWaitEXP:
OS <<
"WaitEXP";
break;
1142 case ImmTyWaitVAVDst:
OS <<
"WaitVAVDst";
break;
1143 case ImmTyWaitVMVSrc:
OS <<
"WaitVMVSrc";
break;
1144 case ImmTyByteSel:
OS <<
"ByteSel" ;
break;
1152 OS <<
"<register " <<
getReg() <<
" mods: " <<
Reg.Mods <<
'>';
1155 OS <<
'<' << getImm();
1156 if (getImmTy() != ImmTyNone) {
1157 OS <<
" type: "; printImmTy(
OS, getImmTy());
1159 OS <<
" mods: " <<
Imm.Mods <<
'>';
1162 OS <<
'\'' << getToken() <<
'\'';
1165 OS <<
"<expr " << *Expr <<
'>';
1170 static AMDGPUOperand::Ptr CreateImm(
const AMDGPUAsmParser *AsmParser,
1171 int64_t Val,
SMLoc Loc,
1172 ImmTy
Type = ImmTyNone,
1173 bool IsFPImm =
false) {
1174 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1176 Op->Imm.IsFPImm = IsFPImm;
1177 Op->Imm.Kind = ImmKindTyNone;
1179 Op->Imm.Mods = Modifiers();
1185 static AMDGPUOperand::Ptr CreateToken(
const AMDGPUAsmParser *AsmParser,
1187 bool HasExplicitEncodingSize =
true) {
1188 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1189 Res->Tok.Data = Str.data();
1190 Res->Tok.Length = Str.size();
1191 Res->StartLoc = Loc;
1196 static AMDGPUOperand::Ptr CreateReg(
const AMDGPUAsmParser *AsmParser,
1197 unsigned RegNo,
SMLoc S,
1199 auto Op = std::make_unique<AMDGPUOperand>(
Register, AsmParser);
1200 Op->Reg.RegNo = RegNo;
1201 Op->Reg.Mods = Modifiers();
1207 static AMDGPUOperand::Ptr CreateExpr(
const AMDGPUAsmParser *AsmParser,
1209 auto Op = std::make_unique<AMDGPUOperand>(
Expression, AsmParser);
1218 OS <<
"abs:" << Mods.Abs <<
" neg: " << Mods.Neg <<
" sext:" << Mods.Sext;
1229class KernelScopeInfo {
1230 int SgprIndexUnusedMin = -1;
1231 int VgprIndexUnusedMin = -1;
1232 int AgprIndexUnusedMin = -1;
1236 void usesSgprAt(
int i) {
1237 if (i >= SgprIndexUnusedMin) {
1238 SgprIndexUnusedMin = ++i;
1247 void usesVgprAt(
int i) {
1248 if (i >= VgprIndexUnusedMin) {
1249 VgprIndexUnusedMin = ++i;
1254 VgprIndexUnusedMin);
1260 void usesAgprAt(
int i) {
1265 if (i >= AgprIndexUnusedMin) {
1266 AgprIndexUnusedMin = ++i;
1276 VgprIndexUnusedMin);
1283 KernelScopeInfo() =
default;
1289 usesSgprAt(SgprIndexUnusedMin = -1);
1290 usesVgprAt(VgprIndexUnusedMin = -1);
1292 usesAgprAt(AgprIndexUnusedMin = -1);
1296 void usesRegister(RegisterKind RegKind,
unsigned DwordRegIndex,
1297 unsigned RegWidth) {
1300 usesSgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1303 usesAgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1306 usesVgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1317 unsigned ForcedEncodingSize = 0;
1318 bool ForcedDPP =
false;
1319 bool ForcedSDWA =
false;
1320 KernelScopeInfo KernelScope;
1325#define GET_ASSEMBLER_HEADER
1326#include "AMDGPUGenAsmMatcher.inc"
1331 bool ParseAsAbsoluteExpression(
uint32_t &Ret);
1332 bool OutOfRangeError(
SMRange Range);
1348 bool calculateGPRBlocks(
const FeatureBitset &Features,
bool VCCUsed,
1349 bool FlatScrUsed,
bool XNACKUsed,
1350 std::optional<bool> EnableWavefrontSize32,
1351 unsigned NextFreeVGPR,
SMRange VGPRRange,
1352 unsigned NextFreeSGPR,
SMRange SGPRRange,
1353 unsigned &VGPRBlocks,
unsigned &SGPRBlocks);
1354 bool ParseDirectiveAMDGCNTarget();
1355 bool ParseDirectiveAMDHSACodeObjectVersion();
1356 bool ParseDirectiveAMDHSAKernel();
1358 bool ParseDirectiveAMDKernelCodeT();
1361 bool ParseDirectiveAMDGPUHsaKernel();
1363 bool ParseDirectiveISAVersion();
1364 bool ParseDirectiveHSAMetadata();
1365 bool ParseDirectivePALMetadataBegin();
1366 bool ParseDirectivePALMetadata();
1367 bool ParseDirectiveAMDGPULDS();
1371 bool ParseToEndDirective(
const char *AssemblerDirectiveBegin,
1372 const char *AssemblerDirectiveEnd,
1373 std::string &CollectString);
1375 bool AddNextRegisterToList(
unsigned& Reg,
unsigned& RegWidth,
1376 RegisterKind RegKind,
unsigned Reg1,
SMLoc Loc);
1377 bool ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
1378 unsigned &RegNum,
unsigned &RegWidth,
1379 bool RestoreOnFailure =
false);
1380 bool ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
1381 unsigned &RegNum,
unsigned &RegWidth,
1383 unsigned ParseRegularReg(RegisterKind &RegKind,
unsigned &RegNum,
1386 unsigned ParseSpecialReg(RegisterKind &RegKind,
unsigned &RegNum,
1389 unsigned ParseRegList(RegisterKind &RegKind,
unsigned &RegNum,
1391 bool ParseRegRange(
unsigned& Num,
unsigned& Width);
1392 unsigned getRegularReg(RegisterKind RegKind,
unsigned RegNum,
unsigned SubReg,
1393 unsigned RegWidth,
SMLoc Loc);
1397 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1398 void initializeGprCountSymbol(RegisterKind RegKind);
1399 bool updateGprCountSymbols(RegisterKind RegKind,
unsigned DwordRegIndex,
1405 enum AMDGPUMatchResultTy {
1406 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1409 OperandMode_Default,
1413 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1421 if (getFeatureBits().
none()) {
1453 initializeGprCountSymbol(IS_VGPR);
1454 initializeGprCountSymbol(IS_SGPR);
1527 bool hasInv2PiInlineImm()
const {
1528 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1531 bool hasFlatOffsets()
const {
1532 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1536 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1539 bool hasSGPR102_SGPR103()
const {
1543 bool hasSGPR104_SGPR105()
const {
return isGFX10Plus(); }
1545 bool hasIntClamp()
const {
1546 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1549 bool hasPartialNSAEncoding()
const {
1550 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1582 void setForcedEncodingSize(
unsigned Size) { ForcedEncodingSize =
Size; }
1583 void setForcedDPP(
bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1584 void setForcedSDWA(
bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1586 unsigned getForcedEncodingSize()
const {
return ForcedEncodingSize; }
1587 bool isForcedVOP3()
const {
return ForcedEncodingSize == 64; }
1588 bool isForcedDPP()
const {
return ForcedDPP; }
1589 bool isForcedSDWA()
const {
return ForcedSDWA; }
1591 StringRef getMatchedVariantName()
const;
1593 std::unique_ptr<AMDGPUOperand>
parseRegister(
bool RestoreOnFailure =
false);
1595 bool RestoreOnFailure);
1598 SMLoc &EndLoc)
override;
1601 unsigned Kind)
override;
1605 bool MatchingInlineAsm)
override;
1608 OperandMode Mode = OperandMode_Default);
1616 ParseStatus parseIntWithPrefix(
const char *Prefix, int64_t &
Int);
1620 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1621 std::function<
bool(int64_t &)> ConvertResult =
nullptr);
1625 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1626 bool (*ConvertResult)(int64_t &) =
nullptr);
1630 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1639 bool isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1640 bool isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1641 bool isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1642 bool isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1643 bool parseSP3NegModifier();
1645 bool HasLit =
false);
1648 bool HasLit =
false);
1650 bool AllowImm =
true);
1652 bool AllowImm =
true);
1657 AMDGPUOperand::ImmTy ImmTy);
1668 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1673 bool tryParseFmt(
const char *Pref, int64_t MaxVal, int64_t &Val);
1674 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt,
StringRef FormatStr,
SMLoc Loc);
1678 bool parseCnt(int64_t &IntVal);
1681 bool parseDepCtr(int64_t &IntVal,
unsigned &Mask);
1685 bool parseDelay(int64_t &Delay);
1691 struct OperandInfoTy {
1694 bool IsSymbolic =
false;
1695 bool IsDefined =
false;
1697 OperandInfoTy(int64_t Val) : Val(Val) {}
1700 struct StructuredOpField : OperandInfoTy {
1704 bool IsDefined =
false;
1709 virtual ~StructuredOpField() =
default;
1711 bool Error(AMDGPUAsmParser &Parser,
const Twine &Err)
const {
1712 Parser.Error(Loc,
"invalid " +
Desc +
": " + Err);
1716 virtual bool validate(AMDGPUAsmParser &Parser)
const {
1718 return Error(Parser,
"not supported on this GPU");
1720 return Error(Parser,
"only " +
Twine(Width) +
"-bit values are legal");
1728 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &
Op, OperandInfoTy &Stream);
1729 bool validateSendMsg(
const OperandInfoTy &Msg,
1730 const OperandInfoTy &
Op,
1731 const OperandInfoTy &Stream);
1734 OperandInfoTy &Width);
1740 SMLoc getOperandLoc(std::function<
bool(
const AMDGPUOperand&)>
Test,
1745 bool SearchMandatoryLiterals =
false)
const;
1754 bool validateSOPLiteral(
const MCInst &Inst)
const;
1756 bool validateVOPDRegBankConstraints(
const MCInst &Inst,
1758 bool validateIntClampSupported(
const MCInst &Inst);
1759 bool validateMIMGAtomicDMask(
const MCInst &Inst);
1760 bool validateMIMGGatherDMask(
const MCInst &Inst);
1762 bool validateMIMGDataSize(
const MCInst &Inst,
const SMLoc &IDLoc);
1763 bool validateMIMGAddrSize(
const MCInst &Inst,
const SMLoc &IDLoc);
1764 bool validateMIMGD16(
const MCInst &Inst);
1765 bool validateMIMGMSAA(
const MCInst &Inst);
1766 bool validateOpSel(
const MCInst &Inst);
1769 bool validateVccOperand(
unsigned Reg)
const;
1774 bool validateAGPRLdSt(
const MCInst &Inst)
const;
1775 bool validateVGPRAlign(
const MCInst &Inst)
const;
1779 bool validateDivScale(
const MCInst &Inst);
1782 const SMLoc &IDLoc);
1784 const unsigned CPol);
1787 std::optional<StringRef> validateLdsDirect(
const MCInst &Inst);
1788 unsigned getConstantBusLimit(
unsigned Opcode)
const;
1789 bool usesConstantBus(
const MCInst &Inst,
unsigned OpIdx);
1790 bool isInlineConstant(
const MCInst &Inst,
unsigned OpIdx)
const;
1791 unsigned findImplicitSGPRReadInVOP(
const MCInst &Inst)
const;
1817 AsmToken peekToken(
bool ShouldSkipSpace =
true);
1819 SMLoc getLoc()
const;
1823 void onBeginOfFile()
override;
1824 bool parsePrimaryExpr(
const MCExpr *&Res,
SMLoc &EndLoc)
override;
1835 bool parseSwizzleOperand(int64_t &
Op,
1836 const unsigned MinVal,
1837 const unsigned MaxVal,
1840 bool parseSwizzleOperands(
const unsigned OpNum, int64_t*
Op,
1841 const unsigned MinVal,
1842 const unsigned MaxVal,
1845 bool parseSwizzleOffset(int64_t &Imm);
1846 bool parseSwizzleMacro(int64_t &Imm);
1847 bool parseSwizzleQuadPerm(int64_t &Imm);
1848 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1849 bool parseSwizzleBroadcast(int64_t &Imm);
1850 bool parseSwizzleSwap(int64_t &Imm);
1851 bool parseSwizzleReverse(int64_t &Imm);
1854 int64_t parseGPRIdxMacro();
1862 OptionalImmIndexMap &OptionalIdx);
1870 OptionalImmIndexMap &OptionalIdx);
1872 OptionalImmIndexMap &OptionalIdx);
1877 bool parseDimId(
unsigned &Encoding);
1879 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1883 int64_t parseDPPCtrlSel(
StringRef Ctrl);
1884 int64_t parseDPPCtrlPerm();
1890 bool IsDPP8 =
false);
1896 AMDGPUOperand::ImmTy
Type);
1905 bool SkipDstVcc =
false,
1906 bool SkipSrcVcc =
false);
1919 return &APFloat::IEEEsingle();
1921 return &APFloat::IEEEdouble();
1923 return &APFloat::IEEEhalf();
1956 return &APFloat::IEEEsingle();
1962 return &APFloat::IEEEdouble();
1971 return &APFloat::IEEEhalf();
1979 return &APFloat::BFloat();
1994 APFloat::rmNearestTiesToEven,
1997 if (
Status != APFloat::opOK &&
1999 ((
Status & APFloat::opOverflow) != 0 ||
2000 (
Status & APFloat::opUnderflow) != 0)) {
2023bool AMDGPUOperand::isInlinableImm(
MVT type)
const {
2033 if (!isImmTy(ImmTyNone)) {
2044 if (type == MVT::f64 || type == MVT::i64) {
2046 AsmParser->hasInv2PiInlineImm());
2068 APFloat::rmNearestTiesToEven, &Lost);
2075 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2077 AsmParser->hasInv2PiInlineImm());
2082 static_cast<int32_t
>(FPLiteral.bitcastToAPInt().getZExtValue()),
2083 AsmParser->hasInv2PiInlineImm());
2087 if (type == MVT::f64 || type == MVT::i64) {
2089 AsmParser->hasInv2PiInlineImm());
2098 static_cast<int16_t
>(
Literal.getLoBits(16).getSExtValue()),
2099 type, AsmParser->hasInv2PiInlineImm());
2103 static_cast<int32_t
>(
Literal.getLoBits(32).getZExtValue()),
2104 AsmParser->hasInv2PiInlineImm());
2107bool AMDGPUOperand::isLiteralImm(
MVT type)
const {
2109 if (!isImmTy(ImmTyNone)) {
2116 if (type == MVT::f64 && hasFPModifiers()) {
2133 if (type == MVT::f64) {
2138 if (type == MVT::i64) {
2151 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2152 : (type == MVT::v2i16) ? MVT::f32
2153 : (type == MVT::v2f32) ? MVT::f32
2160bool AMDGPUOperand::isRegClass(
unsigned RCID)
const {
2161 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(
getReg());
2164bool AMDGPUOperand::isVRegWithInputMods()
const {
2165 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2167 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2168 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2171template <
bool IsFake16>
bool AMDGPUOperand::isT16VRegWithInputMods()
const {
2172 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2173 : AMDGPU::VGPR_16_Lo128RegClassID);
2176bool AMDGPUOperand::isSDWAOperand(
MVT type)
const {
2177 if (AsmParser->isVI())
2179 else if (AsmParser->isGFX9Plus())
2180 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2185bool AMDGPUOperand::isSDWAFP16Operand()
const {
2186 return isSDWAOperand(MVT::f16);
2189bool AMDGPUOperand::isSDWAFP32Operand()
const {
2190 return isSDWAOperand(MVT::f32);
2193bool AMDGPUOperand::isSDWAInt16Operand()
const {
2194 return isSDWAOperand(MVT::i16);
2197bool AMDGPUOperand::isSDWAInt32Operand()
const {
2198 return isSDWAOperand(MVT::i32);
2201bool AMDGPUOperand::isBoolReg()
const {
2202 auto FB = AsmParser->getFeatureBits();
2203 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) ||
2204 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
2209 assert(isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2224void AMDGPUOperand::addImmOperands(
MCInst &Inst,
unsigned N,
bool ApplyModifiers)
const {
2232 addLiteralImmOperand(Inst,
Imm.Val,
2234 isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2236 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2242void AMDGPUOperand::addLiteralImmOperand(
MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const {
2243 const auto& InstDesc = AsmParser->getMII()->get(Inst.
getOpcode());
2248 if (ApplyModifiers) {
2251 Val = applyInputFPModifiers(Val,
Size);
2255 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2265 AsmParser->hasInv2PiInlineImm())) {
2274 if (
Literal.getLoBits(32) != 0) {
2275 const_cast<AMDGPUAsmParser *
>(AsmParser)->
Warning(Inst.
getLoc(),
2276 "Can't encode literal as exact 64-bit floating-point operand. "
2277 "Low 32-bits will be set to zero");
2278 Val &= 0xffffffff00000000u;
2282 setImmKindLiteral();
2298 if (AsmParser->hasInv2PiInlineImm() &&
Literal == 0x3fc45f306725feed) {
2304 setImmKindLiteral();
2340 APFloat::rmNearestTiesToEven, &lost);
2344 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2347 setImmKindMandatoryLiteral();
2349 setImmKindLiteral();
2380 AsmParser->hasInv2PiInlineImm())) {
2387 setImmKindLiteral();
2405 setImmKindLiteral();
2419 setImmKindLiteral();
2428 AsmParser->hasInv2PiInlineImm())) {
2435 setImmKindLiteral();
2444 AsmParser->hasInv2PiInlineImm())) {
2451 setImmKindLiteral();
2465 AsmParser->hasInv2PiInlineImm()));
2475 AsmParser->hasInv2PiInlineImm()));
2483 setImmKindMandatoryLiteral();
2487 setImmKindMandatoryLiteral();
2494void AMDGPUOperand::addRegOperands(
MCInst &Inst,
unsigned N)
const {
2498bool AMDGPUOperand::isInlineValue()
const {
2507 if (Is == IS_VGPR) {
2511 return AMDGPU::VGPR_32RegClassID;
2513 return AMDGPU::VReg_64RegClassID;
2515 return AMDGPU::VReg_96RegClassID;
2517 return AMDGPU::VReg_128RegClassID;
2519 return AMDGPU::VReg_160RegClassID;
2521 return AMDGPU::VReg_192RegClassID;
2523 return AMDGPU::VReg_224RegClassID;
2525 return AMDGPU::VReg_256RegClassID;
2527 return AMDGPU::VReg_288RegClassID;
2529 return AMDGPU::VReg_320RegClassID;
2531 return AMDGPU::VReg_352RegClassID;
2533 return AMDGPU::VReg_384RegClassID;
2535 return AMDGPU::VReg_512RegClassID;
2537 return AMDGPU::VReg_1024RegClassID;
2539 }
else if (Is == IS_TTMP) {
2543 return AMDGPU::TTMP_32RegClassID;
2545 return AMDGPU::TTMP_64RegClassID;
2547 return AMDGPU::TTMP_128RegClassID;
2549 return AMDGPU::TTMP_256RegClassID;
2551 return AMDGPU::TTMP_512RegClassID;
2553 }
else if (Is == IS_SGPR) {
2557 return AMDGPU::SGPR_32RegClassID;
2559 return AMDGPU::SGPR_64RegClassID;
2561 return AMDGPU::SGPR_96RegClassID;
2563 return AMDGPU::SGPR_128RegClassID;
2565 return AMDGPU::SGPR_160RegClassID;
2567 return AMDGPU::SGPR_192RegClassID;
2569 return AMDGPU::SGPR_224RegClassID;
2571 return AMDGPU::SGPR_256RegClassID;
2573 return AMDGPU::SGPR_288RegClassID;
2575 return AMDGPU::SGPR_320RegClassID;
2577 return AMDGPU::SGPR_352RegClassID;
2579 return AMDGPU::SGPR_384RegClassID;
2581 return AMDGPU::SGPR_512RegClassID;
2583 }
else if (Is == IS_AGPR) {
2587 return AMDGPU::AGPR_32RegClassID;
2589 return AMDGPU::AReg_64RegClassID;
2591 return AMDGPU::AReg_96RegClassID;
2593 return AMDGPU::AReg_128RegClassID;
2595 return AMDGPU::AReg_160RegClassID;
2597 return AMDGPU::AReg_192RegClassID;
2599 return AMDGPU::AReg_224RegClassID;
2601 return AMDGPU::AReg_256RegClassID;
2603 return AMDGPU::AReg_288RegClassID;
2605 return AMDGPU::AReg_320RegClassID;
2607 return AMDGPU::AReg_352RegClassID;
2609 return AMDGPU::AReg_384RegClassID;
2611 return AMDGPU::AReg_512RegClassID;
2613 return AMDGPU::AReg_1024RegClassID;
2621 .
Case(
"exec", AMDGPU::EXEC)
2622 .
Case(
"vcc", AMDGPU::VCC)
2623 .
Case(
"flat_scratch", AMDGPU::FLAT_SCR)
2624 .
Case(
"xnack_mask", AMDGPU::XNACK_MASK)
2625 .
Case(
"shared_base", AMDGPU::SRC_SHARED_BASE)
2626 .
Case(
"src_shared_base", AMDGPU::SRC_SHARED_BASE)
2627 .
Case(
"shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2628 .
Case(
"src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2629 .
Case(
"private_base", AMDGPU::SRC_PRIVATE_BASE)
2630 .
Case(
"src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2631 .
Case(
"private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2632 .
Case(
"src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2633 .
Case(
"pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2634 .
Case(
"src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2635 .
Case(
"lds_direct", AMDGPU::LDS_DIRECT)
2636 .
Case(
"src_lds_direct", AMDGPU::LDS_DIRECT)
2637 .
Case(
"m0", AMDGPU::M0)
2638 .
Case(
"vccz", AMDGPU::SRC_VCCZ)
2639 .
Case(
"src_vccz", AMDGPU::SRC_VCCZ)
2640 .
Case(
"execz", AMDGPU::SRC_EXECZ)
2641 .
Case(
"src_execz", AMDGPU::SRC_EXECZ)
2642 .
Case(
"scc", AMDGPU::SRC_SCC)
2643 .
Case(
"src_scc", AMDGPU::SRC_SCC)
2644 .
Case(
"tba", AMDGPU::TBA)
2645 .
Case(
"tma", AMDGPU::TMA)
2646 .
Case(
"flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2647 .
Case(
"flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2648 .
Case(
"xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2649 .
Case(
"xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2650 .
Case(
"vcc_lo", AMDGPU::VCC_LO)
2651 .
Case(
"vcc_hi", AMDGPU::VCC_HI)
2652 .
Case(
"exec_lo", AMDGPU::EXEC_LO)
2653 .
Case(
"exec_hi", AMDGPU::EXEC_HI)
2654 .
Case(
"tma_lo", AMDGPU::TMA_LO)
2655 .
Case(
"tma_hi", AMDGPU::TMA_HI)
2656 .
Case(
"tba_lo", AMDGPU::TBA_LO)
2657 .
Case(
"tba_hi", AMDGPU::TBA_HI)
2658 .
Case(
"pc", AMDGPU::PC_REG)
2659 .
Case(
"null", AMDGPU::SGPR_NULL)
2663bool AMDGPUAsmParser::ParseRegister(
MCRegister &RegNo,
SMLoc &StartLoc,
2664 SMLoc &EndLoc,
bool RestoreOnFailure) {
2665 auto R = parseRegister();
2666 if (!R)
return true;
2668 RegNo =
R->getReg();
2669 StartLoc =
R->getStartLoc();
2670 EndLoc =
R->getEndLoc();
2676 return ParseRegister(Reg, StartLoc, EndLoc,
false);
2681 bool Result = ParseRegister(Reg, StartLoc, EndLoc,
true);
2682 bool PendingErrors = getParser().hasPendingError();
2683 getParser().clearPendingErrors();
2691bool AMDGPUAsmParser::AddNextRegisterToList(
unsigned &Reg,
unsigned &RegWidth,
2692 RegisterKind RegKind,
unsigned Reg1,
2696 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2701 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2702 Reg = AMDGPU::FLAT_SCR;
2706 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2707 Reg = AMDGPU::XNACK_MASK;
2711 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2716 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2721 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2726 Error(Loc,
"register does not fit in the list");
2732 if (Reg1 != Reg + RegWidth / 32) {
2733 Error(Loc,
"registers in a list must have consecutive indices");
2751 {{
"ttmp"}, IS_TTMP},
2757 return Kind == IS_VGPR ||
2765 if (Str.starts_with(Reg.Name))
2771 return !Str.getAsInteger(10, Num);
2775AMDGPUAsmParser::isRegister(
const AsmToken &Token,
2792 if (!RegSuffix.
empty()) {
2810AMDGPUAsmParser::isRegister()
2812 return isRegister(getToken(), peekToken());
2815unsigned AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
unsigned RegNum,
2816 unsigned SubReg,
unsigned RegWidth,
2820 unsigned AlignSize = 1;
2821 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2827 if (RegNum % AlignSize != 0) {
2828 Error(Loc,
"invalid register alignment");
2829 return AMDGPU::NoRegister;
2832 unsigned RegIdx = RegNum / AlignSize;
2835 Error(Loc,
"invalid or unsupported register size");
2836 return AMDGPU::NoRegister;
2842 Error(Loc,
"register index is out of range");
2843 return AMDGPU::NoRegister;
2853 assert(Reg &&
"Invalid subregister!");
2859bool AMDGPUAsmParser::ParseRegRange(
unsigned &Num,
unsigned &RegWidth) {
2860 int64_t RegLo, RegHi;
2864 SMLoc FirstIdxLoc = getLoc();
2867 if (!parseExpr(RegLo))
2871 SecondIdxLoc = getLoc();
2872 if (!parseExpr(RegHi))
2881 if (!isUInt<32>(RegLo)) {
2882 Error(FirstIdxLoc,
"invalid register index");
2886 if (!isUInt<32>(RegHi)) {
2887 Error(SecondIdxLoc,
"invalid register index");
2891 if (RegLo > RegHi) {
2892 Error(FirstIdxLoc,
"first register index should not exceed second index");
2896 Num =
static_cast<unsigned>(RegLo);
2897 RegWidth = 32 * ((RegHi - RegLo) + 1);
2901unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2902 unsigned &RegNum,
unsigned &RegWidth,
2909 RegKind = IS_SPECIAL;
2916unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2917 unsigned &RegNum,
unsigned &RegWidth,
2921 auto Loc = getLoc();
2925 Error(Loc,
"invalid register name");
2926 return AMDGPU::NoRegister;
2934 unsigned SubReg = NoSubRegister;
2935 if (!RegSuffix.
empty()) {
2947 Error(Loc,
"invalid register index");
2948 return AMDGPU::NoRegister;
2953 if (!ParseRegRange(RegNum, RegWidth))
2954 return AMDGPU::NoRegister;
2957 return getRegularReg(RegKind, RegNum,
SubReg, RegWidth, Loc);
2960unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
unsigned &RegNum,
2963 unsigned Reg = AMDGPU::NoRegister;
2964 auto ListLoc = getLoc();
2967 "expected a register or a list of registers")) {
2968 return AMDGPU::NoRegister;
2973 auto Loc = getLoc();
2974 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2975 return AMDGPU::NoRegister;
2976 if (RegWidth != 32) {
2977 Error(Loc,
"expected a single 32-bit register");
2978 return AMDGPU::NoRegister;
2982 RegisterKind NextRegKind;
2983 unsigned NextReg, NextRegNum, NextRegWidth;
2986 if (!ParseAMDGPURegister(NextRegKind, NextReg,
2987 NextRegNum, NextRegWidth,
2989 return AMDGPU::NoRegister;
2991 if (NextRegWidth != 32) {
2992 Error(Loc,
"expected a single 32-bit register");
2993 return AMDGPU::NoRegister;
2995 if (NextRegKind != RegKind) {
2996 Error(Loc,
"registers in a list must be of the same kind");
2997 return AMDGPU::NoRegister;
2999 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
3000 return AMDGPU::NoRegister;
3004 "expected a comma or a closing square bracket")) {
3005 return AMDGPU::NoRegister;
3009 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3014bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
3015 unsigned &RegNum,
unsigned &RegWidth,
3017 auto Loc = getLoc();
3018 Reg = AMDGPU::NoRegister;
3021 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3022 if (Reg == AMDGPU::NoRegister)
3023 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3025 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3029 if (Reg == AMDGPU::NoRegister) {
3030 assert(Parser.hasPendingError());
3034 if (!subtargetHasRegister(*
TRI, Reg)) {
3035 if (Reg == AMDGPU::SGPR_NULL) {
3036 Error(Loc,
"'null' operand is not supported on this GPU");
3038 Error(Loc,
"register not available on this GPU");
3046bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
3047 unsigned &RegNum,
unsigned &RegWidth,
3048 bool RestoreOnFailure ) {
3049 Reg = AMDGPU::NoRegister;
3052 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3053 if (RestoreOnFailure) {
3054 while (!Tokens.
empty()) {
3063std::optional<StringRef>
3064AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3067 return StringRef(
".amdgcn.next_free_vgpr");
3069 return StringRef(
".amdgcn.next_free_sgpr");
3071 return std::nullopt;
3075void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3076 auto SymbolName = getGprCountSymbolName(RegKind);
3077 assert(SymbolName &&
"initializing invalid register kind");
3078 MCSymbol *
Sym = getContext().getOrCreateSymbol(*SymbolName);
3082bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3083 unsigned DwordRegIndex,
3084 unsigned RegWidth) {
3089 auto SymbolName = getGprCountSymbolName(RegKind);
3092 MCSymbol *
Sym = getContext().getOrCreateSymbol(*SymbolName);
3094 int64_t NewMax = DwordRegIndex +
divideCeil(RegWidth, 32) - 1;
3097 if (!
Sym->isVariable())
3098 return !
Error(getLoc(),
3099 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3100 if (!
Sym->getVariableValue(
false)->evaluateAsAbsolute(OldCount))
3103 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3105 if (OldCount <= NewMax)
3111std::unique_ptr<AMDGPUOperand>
3112AMDGPUAsmParser::parseRegister(
bool RestoreOnFailure) {
3113 const auto &Tok = getToken();
3114 SMLoc StartLoc = Tok.getLoc();
3115 SMLoc EndLoc = Tok.getEndLoc();
3116 RegisterKind RegKind;
3117 unsigned Reg, RegNum, RegWidth;
3119 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3123 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3126 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3127 return AMDGPUOperand::CreateReg(
this, Reg, StartLoc, EndLoc);
3131 bool HasSP3AbsModifier,
bool HasLit) {
3139 HasLit = trySkipId(
"lit");
3151 const auto& Tok = getToken();
3152 const auto& NextTok = peekToken();
3155 bool Negate =
false;
3163 AMDGPUOperand::Modifiers Mods;
3174 APFloat RealVal(APFloat::IEEEdouble());
3175 auto roundMode = APFloat::rmNearestTiesToEven;
3176 if (
errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3179 RealVal.changeSign();
3182 AMDGPUOperand::CreateImm(
this, RealVal.bitcastToAPInt().getZExtValue(), S,
3183 AMDGPUOperand::ImmTyNone,
true));
3184 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3185 Op.setModifiers(Mods);
3194 if (HasSP3AbsModifier) {
3203 if (getParser().parsePrimaryExpr(Expr, EndLoc,
nullptr))
3206 if (Parser.parseExpression(Expr))
3210 if (Expr->evaluateAsAbsolute(IntVal)) {
3211 Operands.push_back(AMDGPUOperand::CreateImm(
this, IntVal, S));
3212 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3213 Op.setModifiers(Mods);
3217 Operands.push_back(AMDGPUOperand::CreateExpr(
this, Expr, S));
3230 if (
auto R = parseRegister()) {
3239 bool HasSP3AbsMod,
bool HasLit) {
3245 return parseImm(
Operands, HasSP3AbsMod, HasLit);
3249AMDGPUAsmParser::isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3252 return str ==
"abs" || str ==
"neg" || str ==
"sext";
3258AMDGPUAsmParser::isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3263AMDGPUAsmParser::isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3264 return isNamedOperandModifier(Token, NextToken) || Token.
is(
AsmToken::Pipe);
3268AMDGPUAsmParser::isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3269 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3286AMDGPUAsmParser::isModifier() {
3290 peekTokens(NextToken);
3292 return isOperandModifier(Tok, NextToken[0]) ||
3293 (Tok.
is(
AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3294 isOpcodeModifierWithVal(Tok, NextToken[0]);
3320AMDGPUAsmParser::parseSP3NegModifier() {
3323 peekTokens(NextToken);
3326 (isRegister(NextToken[0], NextToken[1]) ||
3328 isId(NextToken[0],
"abs"))) {
3346 return Error(getLoc(),
"invalid syntax, expected 'neg' modifier");
3348 SP3Neg = parseSP3NegModifier();
3351 Neg = trySkipId(
"neg");
3353 return Error(Loc,
"expected register or immediate");
3357 Abs = trySkipId(
"abs");
3361 Lit = trySkipId(
"lit");
3368 return Error(Loc,
"expected register or immediate");
3372 Res = parseRegOrImm(
Operands, SP3Abs, Lit);
3379 if (Lit && !
Operands.back()->isImm())
3380 Error(Loc,
"expected immediate with lit modifier");
3382 if (SP3Abs && !skipToken(
AsmToken::Pipe,
"expected vertical bar"))
3391 AMDGPUOperand::Modifiers Mods;
3392 Mods.Abs = Abs || SP3Abs;
3393 Mods.Neg = Neg || SP3Neg;
3396 if (Mods.hasFPModifiers() || Lit) {
3397 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3399 return Error(
Op.getStartLoc(),
"expected an absolute expression");
3400 Op.setModifiers(Mods);
3408 bool Sext = trySkipId(
"sext");
3409 if (Sext && !skipToken(
AsmToken::LParen,
"expected left paren after sext"))
3424 AMDGPUOperand::Modifiers Mods;
3427 if (Mods.hasIntModifiers()) {
3428 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3430 return Error(
Op.getStartLoc(),
"expected an absolute expression");
3431 Op.setModifiers(Mods);
3438 return parseRegOrImmWithFPInputMods(
Operands,
false);
3442 return parseRegOrImmWithIntInputMods(
Operands,
false);
3446 auto Loc = getLoc();
3447 if (trySkipId(
"off")) {
3448 Operands.push_back(AMDGPUOperand::CreateImm(
this, 0, Loc,
3449 AMDGPUOperand::ImmTyOff,
false));
3456 std::unique_ptr<AMDGPUOperand>
Reg = parseRegister();
3458 Operands.push_back(std::move(Reg));
3465unsigned AMDGPUAsmParser::checkTargetMatchPredicate(
MCInst &Inst) {
3472 return Match_InvalidOperand;
3474 if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3475 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3480 if (!
Op.isImm() ||
Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3481 return Match_InvalidOperand;
3485 return Match_Success;
3489 static const unsigned Variants[] = {
3500 if (isForcedDPP() && isForcedVOP3()) {
3504 if (getForcedEncodingSize() == 32) {
3509 if (isForcedVOP3()) {
3514 if (isForcedSDWA()) {
3520 if (isForcedDPP()) {
3528StringRef AMDGPUAsmParser::getMatchedVariantName()
const {
3529 if (isForcedDPP() && isForcedVOP3())
3532 if (getForcedEncodingSize() == 32)
3547unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(
const MCInst &Inst)
const {
3551 case AMDGPU::FLAT_SCR:
3553 case AMDGPU::VCC_LO:
3554 case AMDGPU::VCC_HI:
3561 return AMDGPU::NoRegister;
3568bool AMDGPUAsmParser::isInlineConstant(
const MCInst &Inst,
3569 unsigned OpIdx)
const {
3579 int64_t Val = MO.
getImm();
3628unsigned AMDGPUAsmParser::getConstantBusLimit(
unsigned Opcode)
const {
3634 case AMDGPU::V_LSHLREV_B64_e64:
3635 case AMDGPU::V_LSHLREV_B64_gfx10:
3636 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3637 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3638 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3639 case AMDGPU::V_LSHRREV_B64_e64:
3640 case AMDGPU::V_LSHRREV_B64_gfx10:
3641 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3642 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3643 case AMDGPU::V_ASHRREV_I64_e64:
3644 case AMDGPU::V_ASHRREV_I64_gfx10:
3645 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3646 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3647 case AMDGPU::V_LSHL_B64_e64:
3648 case AMDGPU::V_LSHR_B64_e64:
3649 case AMDGPU::V_ASHR_I64_e64:
3662 bool AddMandatoryLiterals =
false) {
3668 int16_t ImmDeferredIdx =
3685bool AMDGPUAsmParser::usesConstantBus(
const MCInst &Inst,
unsigned OpIdx) {
3688 return !isInlineConstant(Inst, OpIdx);
3689 }
else if (MO.
isReg()) {
3696 return isSGPR(PReg,
TRI) && PReg != SGPR_NULL;
3708 const unsigned Opcode = Inst.
getOpcode();
3709 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3712 if (!LaneSelOp.
isReg())
3715 return LaneSelReg ==
M0 || LaneSelReg == M0_gfxpre11;
3718bool AMDGPUAsmParser::validateConstantBusLimitations(
3720 const unsigned Opcode = Inst.
getOpcode();
3722 unsigned LastSGPR = AMDGPU::NoRegister;
3723 unsigned ConstantBusUseCount = 0;
3724 unsigned NumLiterals = 0;
3725 unsigned LiteralSize;
3727 if (!(
Desc.TSFlags &
3743 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3744 if (SGPRUsed != AMDGPU::NoRegister) {
3745 SGPRsUsed.
insert(SGPRUsed);
3746 ++ConstantBusUseCount;
3751 for (
int OpIdx : OpIndices) {
3756 if (usesConstantBus(Inst, OpIdx)) {
3765 if (SGPRsUsed.
insert(LastSGPR).second) {
3766 ++ConstantBusUseCount;
3786 if (NumLiterals == 0) {
3789 }
else if (LiteralSize !=
Size) {
3795 ConstantBusUseCount += NumLiterals;
3797 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3803 Error(Loc,
"invalid operand (violates constant bus restrictions)");
3807bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3810 const unsigned Opcode = Inst.
getOpcode();
3816 auto getVRegIdx = [&](
unsigned,
unsigned OperandIdx) {
3824 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3827 auto InvalidCompOprIdx =
3828 InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc);
3829 if (!InvalidCompOprIdx)
3832 auto CompOprIdx = *InvalidCompOprIdx;
3834 std::max(InstInfo[
VOPD::X].getIndexInParsedOperands(CompOprIdx),
3835 InstInfo[
VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3838 auto Loc = ((AMDGPUOperand &)*
Operands[ParsedIdx]).getStartLoc();
3839 if (CompOprIdx == VOPD::Component::DST) {
3840 Error(Loc,
"one dst register must be even and the other odd");
3842 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3844 " operands must use different VGPR banks");
3850bool AMDGPUAsmParser::validateIntClampSupported(
const MCInst &Inst) {
3867bool AMDGPUAsmParser::validateMIMGDataSize(
const MCInst &Inst,
3868 const SMLoc &IDLoc) {
3886 unsigned TFESize = (TFEIdx != -1 && Inst.
getOperand(TFEIdx).
getImm()) ? 1 : 0;
3891 bool IsPackedD16 =
false;
3896 IsPackedD16 = D16Idx >= 0;
3898 DataSize = (DataSize + 1) / 2;
3901 if ((VDataSize / 4) == DataSize + TFESize)
3906 Modifiers = IsPackedD16 ?
"dmask and d16" :
"dmask";
3908 Modifiers = IsPackedD16 ?
"dmask, d16 and tfe" :
"dmask and tfe";
3910 Error(IDLoc,
Twine(
"image data size does not match ") + Modifiers);
3914bool AMDGPUAsmParser::validateMIMGAddrSize(
const MCInst &Inst,
3915 const SMLoc &IDLoc) {
3928 : AMDGPU::OpName::rsrc;
3935 assert(SrsrcIdx > VAddr0Idx);
3938 if (BaseOpcode->
BVH) {
3939 if (IsA16 == BaseOpcode->
A16)
3941 Error(IDLoc,
"image address size does not match a16");
3947 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3948 unsigned ActualAddrSize =
3949 IsNSA ? SrsrcIdx - VAddr0Idx
3952 unsigned ExpectedAddrSize =
3956 if (hasPartialNSAEncoding() &&
3959 int VAddrLastIdx = SrsrcIdx - 1;
3960 unsigned VAddrLastSize =
3963 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3966 if (ExpectedAddrSize > 12)
3967 ExpectedAddrSize = 16;
3972 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3976 if (ActualAddrSize == ExpectedAddrSize)
3979 Error(IDLoc,
"image address size does not match dim and a16");
3983bool AMDGPUAsmParser::validateMIMGAtomicDMask(
const MCInst &Inst) {
3990 if (!
Desc.mayLoad() || !
Desc.mayStore())
4000 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4003bool AMDGPUAsmParser::validateMIMGGatherDMask(
const MCInst &Inst) {
4019 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4022bool AMDGPUAsmParser::validateMIMGMSAA(
const MCInst &Inst) {
4033 if (!BaseOpcode->
MSAA)
4042 return DimInfo->
MSAA;
4048 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4049 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4050 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4060bool AMDGPUAsmParser::validateMovrels(
const MCInst &Inst,
4084 Error(ErrLoc,
"source operand must be a VGPR");
4088bool AMDGPUAsmParser::validateMAIAccWrite(
const MCInst &Inst,
4093 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4107 "source operand must be either a VGPR or an inline constant");
4114bool AMDGPUAsmParser::validateMAISrc2(
const MCInst &Inst,
4120 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4127 if (Inst.
getOperand(Src2Idx).
isImm() && isInlineConstant(Inst, Src2Idx)) {
4129 "inline constants are not allowed for this operand");
4136bool AMDGPUAsmParser::validateMFMA(
const MCInst &Inst,
4154 if (Src2Reg == DstReg)
4158 if (
TRI->getRegClass(
Desc.operands()[0].RegClass).getSizeInBits() <= 128)
4161 if (
TRI->regsOverlap(Src2Reg, DstReg)) {
4163 "source 2 operand must not partially overlap with dst");
4170bool AMDGPUAsmParser::validateDivScale(
const MCInst &Inst) {
4174 case V_DIV_SCALE_F32_gfx6_gfx7:
4175 case V_DIV_SCALE_F32_vi:
4176 case V_DIV_SCALE_F32_gfx10:
4177 case V_DIV_SCALE_F64_gfx6_gfx7:
4178 case V_DIV_SCALE_F64_vi:
4179 case V_DIV_SCALE_F64_gfx10:
4185 for (
auto Name : {AMDGPU::OpName::src0_modifiers,
4186 AMDGPU::OpName::src2_modifiers,
4187 AMDGPU::OpName::src2_modifiers}) {
4198bool AMDGPUAsmParser::validateMIMGD16(
const MCInst &Inst) {
4218 case AMDGPU::V_SUBREV_F32_e32:
4219 case AMDGPU::V_SUBREV_F32_e64:
4220 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4221 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4222 case AMDGPU::V_SUBREV_F32_e32_vi:
4223 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4224 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4225 case AMDGPU::V_SUBREV_F32_e64_vi:
4227 case AMDGPU::V_SUBREV_CO_U32_e32:
4228 case AMDGPU::V_SUBREV_CO_U32_e64:
4229 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4230 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4232 case AMDGPU::V_SUBBREV_U32_e32:
4233 case AMDGPU::V_SUBBREV_U32_e64:
4234 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4235 case AMDGPU::V_SUBBREV_U32_e32_vi:
4236 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4237 case AMDGPU::V_SUBBREV_U32_e64_vi:
4239 case AMDGPU::V_SUBREV_U32_e32:
4240 case AMDGPU::V_SUBREV_U32_e64:
4241 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4242 case AMDGPU::V_SUBREV_U32_e32_vi:
4243 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4244 case AMDGPU::V_SUBREV_U32_e64_vi:
4246 case AMDGPU::V_SUBREV_F16_e32:
4247 case AMDGPU::V_SUBREV_F16_e64:
4248 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4249 case AMDGPU::V_SUBREV_F16_e32_vi:
4250 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4251 case AMDGPU::V_SUBREV_F16_e64_vi:
4253 case AMDGPU::V_SUBREV_U16_e32:
4254 case AMDGPU::V_SUBREV_U16_e64:
4255 case AMDGPU::V_SUBREV_U16_e32_vi:
4256 case AMDGPU::V_SUBREV_U16_e64_vi:
4258 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4259 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4260 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4262 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4263 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4265 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4266 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4268 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4269 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4271 case AMDGPU::V_LSHRREV_B32_e32:
4272 case AMDGPU::V_LSHRREV_B32_e64:
4273 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4274 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4275 case AMDGPU::V_LSHRREV_B32_e32_vi:
4276 case AMDGPU::V_LSHRREV_B32_e64_vi:
4277 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4278 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4280 case AMDGPU::V_ASHRREV_I32_e32:
4281 case AMDGPU::V_ASHRREV_I32_e64:
4282 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4283 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4284 case AMDGPU::V_ASHRREV_I32_e32_vi:
4285 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4286 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4287 case AMDGPU::V_ASHRREV_I32_e64_vi:
4289 case AMDGPU::V_LSHLREV_B32_e32:
4290 case AMDGPU::V_LSHLREV_B32_e64:
4291 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4292 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4293 case AMDGPU::V_LSHLREV_B32_e32_vi:
4294 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4295 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4296 case AMDGPU::V_LSHLREV_B32_e64_vi:
4298 case AMDGPU::V_LSHLREV_B16_e32:
4299 case AMDGPU::V_LSHLREV_B16_e64:
4300 case AMDGPU::V_LSHLREV_B16_e32_vi:
4301 case AMDGPU::V_LSHLREV_B16_e64_vi:
4302 case AMDGPU::V_LSHLREV_B16_gfx10:
4304 case AMDGPU::V_LSHRREV_B16_e32:
4305 case AMDGPU::V_LSHRREV_B16_e64:
4306 case AMDGPU::V_LSHRREV_B16_e32_vi:
4307 case AMDGPU::V_LSHRREV_B16_e64_vi:
4308 case AMDGPU::V_LSHRREV_B16_gfx10:
4310 case AMDGPU::V_ASHRREV_I16_e32:
4311 case AMDGPU::V_ASHRREV_I16_e64:
4312 case AMDGPU::V_ASHRREV_I16_e32_vi:
4313 case AMDGPU::V_ASHRREV_I16_e64_vi:
4314 case AMDGPU::V_ASHRREV_I16_gfx10:
4316 case AMDGPU::V_LSHLREV_B64_e64:
4317 case AMDGPU::V_LSHLREV_B64_gfx10:
4318 case AMDGPU::V_LSHLREV_B64_vi:
4320 case AMDGPU::V_LSHRREV_B64_e64:
4321 case AMDGPU::V_LSHRREV_B64_gfx10:
4322 case AMDGPU::V_LSHRREV_B64_vi:
4324 case AMDGPU::V_ASHRREV_I64_e64:
4325 case AMDGPU::V_ASHRREV_I64_gfx10:
4326 case AMDGPU::V_ASHRREV_I64_vi:
4328 case AMDGPU::V_PK_LSHLREV_B16:
4329 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4330 case AMDGPU::V_PK_LSHLREV_B16_vi:
4332 case AMDGPU::V_PK_LSHRREV_B16:
4333 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4334 case AMDGPU::V_PK_LSHRREV_B16_vi:
4335 case AMDGPU::V_PK_ASHRREV_I16:
4336 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4337 case AMDGPU::V_PK_ASHRREV_I16_vi:
4344std::optional<StringRef>
4345AMDGPUAsmParser::validateLdsDirect(
const MCInst &Inst) {
4347 using namespace SIInstrFlags;
4348 const unsigned Opcode = Inst.
getOpcode();
4354 if ((
Desc.TSFlags & Enc) == 0)
4355 return std::nullopt;
4357 for (
auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4362 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4365 return StringRef(
"lds_direct is not supported on this GPU");
4368 return StringRef(
"lds_direct cannot be used with this instruction");
4370 if (SrcName != OpName::src0)
4371 return StringRef(
"lds_direct may be used as src0 only");
4375 return std::nullopt;
4379 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
4380 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4381 if (
Op.isFlatOffset())
4382 return Op.getStartLoc();
4387bool AMDGPUAsmParser::validateOffset(
const MCInst &Inst,
4396 return validateFlatOffset(Inst,
Operands);
4399 return validateSMEMOffset(Inst,
Operands);
4404 const unsigned OffsetSize = 24;
4405 if (!
isIntN(OffsetSize,
Op.getImm())) {
4407 Twine(
"expected a ") +
Twine(OffsetSize) +
"-bit signed offset");
4411 const unsigned OffsetSize = 16;
4412 if (!
isUIntN(OffsetSize,
Op.getImm())) {
4414 Twine(
"expected a ") +
Twine(OffsetSize) +
"-bit unsigned offset");
4421bool AMDGPUAsmParser::validateFlatOffset(
const MCInst &Inst,
4432 if (!hasFlatOffsets() &&
Op.getImm() != 0) {
4434 "flat offset modifier is not supported on this GPU");
4441 bool AllowNegative =
4444 if (!
isIntN(OffsetSize,
Op.getImm()) || (!AllowNegative &&
Op.getImm() < 0)) {
4446 Twine(
"expected a ") +
4447 (AllowNegative ?
Twine(OffsetSize) +
"-bit signed offset"
4448 :
Twine(OffsetSize - 1) +
"-bit unsigned offset"));
4457 for (
unsigned i = 2, e =
Operands.size(); i != e; ++i) {
4458 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4459 if (
Op.isSMEMOffset() ||
Op.isSMEMOffsetMod())
4460 return Op.getStartLoc();
4465bool AMDGPUAsmParser::validateSMEMOffset(
const MCInst &Inst,
4491 : (
isVI() || IsBuffer) ?
"expected a 20-bit unsigned offset"
4492 :
"expected a 21-bit signed offset");
4497bool AMDGPUAsmParser::validateSOPLiteral(
const MCInst &Inst)
const {
4506 const int OpIndices[] = { Src0Idx, Src1Idx };
4508 unsigned NumExprs = 0;
4509 unsigned NumLiterals = 0;
4512 for (
int OpIdx : OpIndices) {
4513 if (OpIdx == -1)
break;
4518 if (MO.
isImm() && !isInlineConstant(Inst, OpIdx)) {
4520 if (NumLiterals == 0 || LiteralValue !=
Value) {
4524 }
else if (MO.
isExpr()) {
4530 return NumLiterals + NumExprs <= 1;
4533bool AMDGPUAsmParser::validateOpSel(
const MCInst &Inst) {
4547 if (OpSelIdx != -1) {
4552 if (OpSelHiIdx != -1) {
4570bool AMDGPUAsmParser::validateNeg(
const MCInst &Inst,
int OpName) {
4595 int SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4596 AMDGPU::OpName::src1_modifiers,
4597 AMDGPU::OpName::src2_modifiers};
4599 for (
unsigned i = 0; i < 3; ++i) {
4609bool AMDGPUAsmParser::validateDPP(
const MCInst &Inst,
4613 if (DppCtrlIdx >= 0) {
4620 Error(S,
"DP ALU dpp only supports row_newbcast");
4626 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
4636 Error(S,
"invalid operand for instruction");
4641 "src1 immediate operand invalid for instruction");
4651bool AMDGPUAsmParser::validateVccOperand(
unsigned Reg)
const {
4652 auto FB = getFeatureBits();
4653 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4654 (FB[AMDGPU::FeatureWavefrontSize32] &&
Reg == AMDGPU::VCC_LO);
4658bool AMDGPUAsmParser::validateVOPLiteral(
const MCInst &Inst,
4664 !HasMandatoryLiteral && !
isVOPD(Opcode))
4669 unsigned NumExprs = 0;
4670 unsigned NumLiterals = 0;
4673 for (
int OpIdx : OpIndices) {
4683 if (MO.
isImm() && !isInlineConstant(Inst, OpIdx)) {
4689 if (!IsValid32Op && !isInt<32>(
Value) && !isUInt<32>(
Value)) {
4690 Error(getLitLoc(
Operands),
"invalid operand for instruction");
4694 if (IsFP64 && IsValid32Op)
4697 if (NumLiterals == 0 || LiteralValue !=
Value) {
4701 }
else if (MO.
isExpr()) {
4705 NumLiterals += NumExprs;
4710 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4711 Error(getLitLoc(
Operands),
"literal operands are not supported");
4715 if (NumLiterals > 1) {
4716 Error(getLitLoc(
Operands,
true),
"only one unique literal operand is allowed");
4734 unsigned Sub =
MRI->getSubReg(
Op.getReg(), AMDGPU::sub0);
4735 auto Reg = Sub ? Sub :
Op.getReg();
4737 return AGPR32.
contains(Reg) ? 1 : 0;
4740bool AMDGPUAsmParser::validateAGPRLdSt(
const MCInst &Inst)
const {
4748 : AMDGPU::OpName::vdata;
4756 if (Data2Areg >= 0 && Data2Areg != DataAreg)
4760 auto FB = getFeatureBits();
4761 if (FB[AMDGPU::FeatureGFX90AInsts]) {
4762 if (DataAreg < 0 || DstAreg < 0)
4764 return DstAreg == DataAreg;
4767 return DstAreg < 1 && DataAreg < 1;
4770bool AMDGPUAsmParser::validateVGPRAlign(
const MCInst &Inst)
const {
4771 auto FB = getFeatureBits();
4772 if (!FB[AMDGPU::FeatureGFX90AInsts])
4783 unsigned Sub =
MRI->getSubReg(
Op.getReg(), AMDGPU::sub0);
4787 if (VGPR32.
contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4789 if (AGPR32.
contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4797 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
4798 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4800 return Op.getStartLoc();
4805bool AMDGPUAsmParser::validateBLGP(
const MCInst &Inst,
4815 auto FB = getFeatureBits();
4816 bool UsesNeg =
false;
4817 if (FB[AMDGPU::FeatureGFX940Insts]) {
4819 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4820 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4821 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4822 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4827 if (IsNeg == UsesNeg)
4831 UsesNeg ?
"invalid modifier: blgp is not supported"
4832 :
"invalid modifier: neg is not supported");
4837bool AMDGPUAsmParser::validateWaitCnt(
const MCInst &Inst,
4843 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4844 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4845 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4846 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4852 if (Reg == AMDGPU::SGPR_NULL)
4856 Error(RegLoc,
"src0 must be null");
4860bool AMDGPUAsmParser::validateDS(
const MCInst &Inst,
4866 return validateGWS(Inst,
Operands);
4877 Error(S,
"gds modifier is not supported on this GPU");
4885bool AMDGPUAsmParser::validateGWS(
const MCInst &Inst,
4887 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4891 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4892 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4901 auto RegIdx =
Reg - (VGPR32.
contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4904 Error(RegLoc,
"vgpr must be even aligned");
4911bool AMDGPUAsmParser::validateCoherencyBits(
const MCInst &Inst,
4913 const SMLoc &IDLoc) {
4915 AMDGPU::OpName::cpol);
4922 return validateTHAndScopeBits(Inst,
Operands, CPol);
4928 Error(S,
"cache policy is not supported for SMRD instructions");
4932 Error(IDLoc,
"invalid cache policy for SMEM instruction");
4941 if (!(TSFlags & AllowSCCModifier)) {
4946 "scc modifier is not supported for this instruction on this GPU");
4957 :
"instruction must use glc");
4965 &CStr.data()[CStr.find(
isGFX940() ?
"sc0" :
"glc")]);
4967 :
"instruction must not use glc");
4975bool AMDGPUAsmParser::validateTHAndScopeBits(
const MCInst &Inst,
4977 const unsigned CPol) {
4981 const unsigned Opcode = Inst.
getOpcode();
4993 return PrintError(
"instruction must use th:TH_ATOMIC_RETURN");
5001 return PrintError(
"invalid th value for SMEM instruction");
5008 return PrintError(
"scope and th combination is not valid");
5017 return PrintError(
"invalid th value for atomic instructions");
5018 }
else if (IsStore) {
5020 return PrintError(
"invalid th value for store instructions");
5023 return PrintError(
"invalid th value for load instructions");
5033 if (!Operand->isReg())
5035 unsigned Reg = Operand->getReg();
5036 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
5038 "execz and vccz are not supported on this GPU");
5045bool AMDGPUAsmParser::validateTFE(
const MCInst &Inst,
5048 if (
Desc.mayStore() &&
5052 Error(Loc,
"TFE modifier has no meaning for store instructions");
5060bool AMDGPUAsmParser::validateInstruction(
const MCInst &Inst,
5063 if (
auto ErrMsg = validateLdsDirect(Inst)) {
5067 if (!validateSOPLiteral(Inst)) {
5069 "only one unique literal operand is allowed");
5072 if (!validateVOPLiteral(Inst,
Operands)) {
5075 if (!validateConstantBusLimitations(Inst,
Operands)) {
5078 if (!validateVOPDRegBankConstraints(Inst,
Operands)) {
5081 if (!validateIntClampSupported(Inst)) {
5083 "integer clamping is not supported on this GPU");
5086 if (!validateOpSel(Inst)) {
5088 "invalid op_sel operand");
5091 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5093 "invalid neg_lo operand");
5096 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5098 "invalid neg_hi operand");
5101 if (!validateDPP(Inst,
Operands)) {
5105 if (!validateMIMGD16(Inst)) {
5107 "d16 modifier is not supported on this GPU");
5110 if (!validateMIMGMSAA(Inst)) {
5112 "invalid dim; must be MSAA type");
5115 if (!validateMIMGDataSize(Inst, IDLoc)) {
5118 if (!validateMIMGAddrSize(Inst, IDLoc))
5120 if (!validateMIMGAtomicDMask(Inst)) {
5122 "invalid atomic image dmask");
5125 if (!validateMIMGGatherDMask(Inst)) {
5127 "invalid image_gather dmask: only one bit must be set");
5130 if (!validateMovrels(Inst,
Operands)) {
5133 if (!validateOffset(Inst,
Operands)) {
5136 if (!validateMAIAccWrite(Inst,
Operands)) {
5139 if (!validateMAISrc2(Inst,
Operands)) {
5142 if (!validateMFMA(Inst,
Operands)) {
5145 if (!validateCoherencyBits(Inst,
Operands, IDLoc)) {
5149 if (!validateAGPRLdSt(Inst)) {
5150 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5151 ?
"invalid register class: data and dst should be all VGPR or AGPR"
5152 :
"invalid register class: agpr loads and stores not supported on this GPU"
5156 if (!validateVGPRAlign(Inst)) {
5158 "invalid register class: vgpr tuples must be 64 bit aligned");
5165 if (!validateBLGP(Inst,
Operands)) {
5169 if (!validateDivScale(Inst)) {
5170 Error(IDLoc,
"ABS not allowed in VOP3B instructions");
5173 if (!validateWaitCnt(Inst,
Operands)) {
5176 if (!validateExeczVcczOperands(
Operands)) {
5179 if (!validateTFE(Inst,
Operands)) {
5188 unsigned VariantID = 0);
5192 unsigned VariantID);
5194bool AMDGPUAsmParser::isSupportedMnemo(
StringRef Mnemo,
5199bool AMDGPUAsmParser::isSupportedMnemo(
StringRef Mnemo,
5202 for (
auto Variant : Variants) {
5210bool AMDGPUAsmParser::checkUnsupportedInstruction(
StringRef Mnemo,
5211 const SMLoc &IDLoc) {
5212 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5215 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5220 getParser().clearPendingErrors();
5224 StringRef VariantName = getMatchedVariantName();
5225 if (!VariantName.
empty() && isSupportedMnemo(Mnemo, FBS)) {
5228 " variant of this instruction is not supported"));
5232 if (
isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5233 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5236 FeaturesWS32.
flip(AMDGPU::FeatureWavefrontSize64)
5237 .
flip(AMDGPU::FeatureWavefrontSize32);
5239 ComputeAvailableFeatures(FeaturesWS32);
5241 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5242 return Error(IDLoc,
"instruction requires wavesize=32");
5247 return Error(IDLoc,
"instruction not supported on this GPU");
5252 return Error(IDLoc,
"invalid instruction" + Suggestion);
5258 const auto &
Op = ((AMDGPUOperand &)*
Operands[InvalidOprIdx]);
5259 if (
Op.isToken() && InvalidOprIdx > 1) {
5260 const auto &PrevOp = ((AMDGPUOperand &)*
Operands[InvalidOprIdx - 1]);
5261 return PrevOp.isToken() && PrevOp.getToken() ==
"::";
5266bool AMDGPUAsmParser::MatchAndEmitInstruction(
SMLoc IDLoc,
unsigned &Opcode,
5270 bool MatchingInlineAsm) {
5272 unsigned Result = Match_Success;
5273 for (
auto Variant : getMatchedVariants()) {
5275 auto R = MatchInstructionImpl(
Operands, Inst, EI, MatchingInlineAsm,
5280 if ((R == Match_Success) ||
5281 (R == Match_PreferE32) ||
5282 (R == Match_MissingFeature && Result != Match_PreferE32) ||
5283 (R == Match_InvalidOperand && Result != Match_MissingFeature
5284 && Result != Match_PreferE32) ||
5285 (R == Match_MnemonicFail && Result != Match_InvalidOperand
5286 && Result != Match_MissingFeature
5287 && Result != Match_PreferE32)) {
5291 if (R == Match_Success)
5295 if (Result == Match_Success) {
5296 if (!validateInstruction(Inst, IDLoc,
Operands)) {
5305 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5311 case Match_MissingFeature:
5315 return Error(IDLoc,
"operands are not valid for this GPU or mode");
5317 case Match_InvalidOperand: {
5318 SMLoc ErrorLoc = IDLoc;
5321 return Error(IDLoc,
"too few operands for instruction");
5324 if (ErrorLoc ==
SMLoc())
5328 return Error(ErrorLoc,
"invalid VOPDY instruction");
5330 return Error(ErrorLoc,
"invalid operand for instruction");
5333 case Match_PreferE32:
5334 return Error(IDLoc,
"internal error: instruction without _e64 suffix "
5335 "should be encoded as e32");
5336 case Match_MnemonicFail:
5342bool AMDGPUAsmParser::ParseAsAbsoluteExpression(
uint32_t &Ret) {
5347 if (getParser().parseAbsoluteExpression(Tmp)) {
5354bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5356 return TokError(
"directive only supported for amdgcn architecture");
5358 std::string TargetIDDirective;
5359 SMLoc TargetStart = getTok().getLoc();
5360 if (getParser().parseEscapedString(TargetIDDirective))
5364 if (getTargetStreamer().getTargetID()->
toString() != TargetIDDirective)
5365 return getParser().Error(TargetRange.
Start,
5366 (
Twine(
".amdgcn_target directive's target id ") +
5367 Twine(TargetIDDirective) +
5368 Twine(
" does not match the specified target id ") +
5369 Twine(getTargetStreamer().getTargetID()->
toString())).str());
5374bool AMDGPUAsmParser::OutOfRangeError(
SMRange Range) {
5375 return Error(
Range.Start,
"value out of range", Range);
5378bool AMDGPUAsmParser::calculateGPRBlocks(
5379 const FeatureBitset &Features,
bool VCCUsed,
bool FlatScrUsed,
5380 bool XNACKUsed, std::optional<bool> EnableWavefrontSize32,
5381 unsigned NextFreeVGPR,
SMRange VGPRRange,
unsigned NextFreeSGPR,
5382 SMRange SGPRRange,
unsigned &VGPRBlocks,
unsigned &SGPRBlocks) {
5393 unsigned MaxAddressableNumSGPRs =
5396 if (
Version.Major >= 8 && !Features.
test(FeatureSGPRInitBug) &&
5397 NumSGPRs > MaxAddressableNumSGPRs)
5398 return OutOfRangeError(SGPRRange);
5403 if ((
Version.Major <= 7 || Features.
test(FeatureSGPRInitBug)) &&
5404 NumSGPRs > MaxAddressableNumSGPRs)
5405 return OutOfRangeError(SGPRRange);
5407 if (Features.
test(FeatureSGPRInitBug))
5412 EnableWavefrontSize32);
5418bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5420 return TokError(
"directive only supported for amdgcn architecture");
5423 return TokError(
"directive only supported for amdhsa OS");
5426 if (getParser().parseIdentifier(KernelName))
5431 &getSTI(), getContext());
5447 unsigned ImpliedUserSGPRCount = 0;
5451 std::optional<unsigned> ExplicitUserSGPRCount;
5452 bool ReserveVCC =
true;
5453 bool ReserveFlatScr =
true;
5454 std::optional<bool> EnableWavefrontSize32;
5460 SMRange IDRange = getTok().getLocRange();
5461 if (!parseId(
ID,
"expected .amdhsa_ directive or .end_amdhsa_kernel"))
5464 if (
ID ==
".end_amdhsa_kernel")
5468 return TokError(
".amdhsa_ directives cannot be repeated");
5470 SMLoc ValStart = getLoc();
5472 if (getParser().parseExpression(ExprVal))
5474 SMLoc ValEnd = getLoc();
5479 bool EvaluatableExpr;
5480 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
5482 return OutOfRangeError(ValRange);
5486#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
5487 if (!isUInt<ENTRY##_WIDTH>(Val)) \
5488 return OutOfRangeError(RANGE); \
5489 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
5494#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
5496 return Error(IDRange.Start, "directive should have resolvable expression", \
5499 if (
ID ==
".amdhsa_group_segment_fixed_size") {
5502 return OutOfRangeError(ValRange);
5504 }
else if (
ID ==
".amdhsa_private_segment_fixed_size") {
5507 return OutOfRangeError(ValRange);
5509 }
else if (
ID ==
".amdhsa_kernarg_size") {
5511 return OutOfRangeError(ValRange);
5513 }
else if (
ID ==
".amdhsa_user_sgpr_count") {
5515 ExplicitUserSGPRCount = Val;
5516 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_buffer") {
5520 "directive is not supported with architected flat scratch",
5523 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5526 ImpliedUserSGPRCount += 4;
5527 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_preload_length") {
5530 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5533 return OutOfRangeError(ValRange);
5537 ImpliedUserSGPRCount += Val;
5538 PreloadLength = Val;
5540 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_preload_offset") {
5543 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5546 return OutOfRangeError(ValRange);
5550 PreloadOffset = Val;
5551 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_ptr") {
5554 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
5557 ImpliedUserSGPRCount += 2;
5558 }
else if (
ID ==
".amdhsa_user_sgpr_queue_ptr") {
5561 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
5564 ImpliedUserSGPRCount += 2;
5565 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_segment_ptr") {
5568 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5571 ImpliedUserSGPRCount += 2;
5572 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_id") {
5575 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
5578 ImpliedUserSGPRCount += 2;
5579 }
else if (
ID ==
".amdhsa_user_sgpr_flat_scratch_init") {
5582 "directive is not supported with architected flat scratch",
5586 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
5589 ImpliedUserSGPRCount += 2;
5590 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_size") {
5593 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5596 ImpliedUserSGPRCount += 1;
5597 }
else if (
ID ==
".amdhsa_wavefront_size32") {
5599 if (IVersion.
Major < 10)
5600 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5601 EnableWavefrontSize32 = Val;
5603 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
5605 }
else if (
ID ==
".amdhsa_uses_dynamic_stack") {
5607 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
5609 }
else if (
ID ==
".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5612 "directive is not supported with architected flat scratch",
5615 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5617 }
else if (
ID ==
".amdhsa_enable_private_segment") {
5621 "directive is not supported without architected flat scratch",
5624 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5626 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_x") {
5628 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
5630 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_y") {
5632 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
5634 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_z") {
5636 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
5638 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_info") {
5640 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
5642 }
else if (
ID ==
".amdhsa_system_vgpr_workitem_id") {
5644 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
5646 }
else if (
ID ==
".amdhsa_next_free_vgpr") {
5648 VGPRRange = ValRange;
5650 }
else if (
ID ==
".amdhsa_next_free_sgpr") {
5652 SGPRRange = ValRange;
5654 }
else if (
ID ==
".amdhsa_accum_offset") {
5656 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5659 }
else if (
ID ==
".amdhsa_reserve_vcc") {
5661 if (!isUInt<1>(Val))
5662 return OutOfRangeError(ValRange);
5664 }
else if (
ID ==
".amdhsa_reserve_flat_scratch") {
5666 if (IVersion.
Major < 7)
5667 return Error(IDRange.
Start,
"directive requires gfx7+", IDRange);
5670 "directive is not supported with architected flat scratch",
5672 if (!isUInt<1>(Val))
5673 return OutOfRangeError(ValRange);
5674 ReserveFlatScr = Val;
5675 }
else if (
ID ==
".amdhsa_reserve_xnack_mask") {
5676 if (IVersion.
Major < 8)
5677 return Error(IDRange.
Start,
"directive requires gfx8+", IDRange);
5678 if (!isUInt<1>(Val))
5679 return OutOfRangeError(ValRange);
5680 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5681 return getParser().Error(IDRange.
Start,
".amdhsa_reserve_xnack_mask does not match target id",
5683 }
else if (
ID ==
".amdhsa_float_round_mode_32") {
5685 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
5687 }
else if (
ID ==
".amdhsa_float_round_mode_16_64") {
5689 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
5691 }
else if (
ID ==
".amdhsa_float_denorm_mode_32") {
5693 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
5695 }
else if (
ID ==
".amdhsa_float_denorm_mode_16_64") {
5697 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
5699 }
else if (
ID ==
".amdhsa_dx10_clamp") {
5700 if (IVersion.
Major >= 12)
5701 return Error(IDRange.
Start,
"directive unsupported on gfx12+", IDRange);
5703 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
5705 }
else if (
ID ==
".amdhsa_ieee_mode") {
5706 if (IVersion.
Major >= 12)
5707 return Error(IDRange.
Start,
"directive unsupported on gfx12+", IDRange);
5709 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
5711 }
else if (
ID ==
".amdhsa_fp16_overflow") {
5712 if (IVersion.
Major < 9)
5713 return Error(IDRange.
Start,
"directive requires gfx9+", IDRange);
5715 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
5717 }
else if (
ID ==
".amdhsa_tg_split") {
5719 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5722 }
else if (
ID ==
".amdhsa_workgroup_processor_mode") {
5723 if (IVersion.
Major < 10)
5724 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5726 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
5728 }
else if (
ID ==
".amdhsa_memory_ordered") {
5729 if (IVersion.
Major < 10)
5730 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5732 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
5734 }
else if (
ID ==
".amdhsa_forward_progress") {
5735 if (IVersion.
Major < 10)
5736 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5738 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
5740 }
else if (
ID ==
".amdhsa_shared_vgpr_count") {
5742 if (IVersion.
Major < 10 || IVersion.
Major >= 12)
5743 return Error(IDRange.
Start,
"directive requires gfx10 or gfx11",
5745 SharedVGPRCount = Val;
5747 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
5749 }
else if (
ID ==
".amdhsa_exception_fp_ieee_invalid_op") {
5752 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
5754 }
else if (
ID ==
".amdhsa_exception_fp_denorm_src") {
5756 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5758 }
else if (
ID ==
".amdhsa_exception_fp_ieee_div_zero") {
5761 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
5763 }
else if (
ID ==
".amdhsa_exception_fp_ieee_overflow") {
5765 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5767 }
else if (
ID ==
".amdhsa_exception_fp_ieee_underflow") {
5769 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5771 }
else if (
ID ==
".amdhsa_exception_fp_ieee_inexact") {
5773 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5775 }
else if (
ID ==
".amdhsa_exception_int_div_zero") {
5777 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5779 }
else if (
ID ==
".amdhsa_round_robin_scheduling") {
5780 if (IVersion.
Major < 12)
5781 return Error(IDRange.
Start,
"directive requires gfx12+", IDRange);
5783 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
5786 return Error(IDRange.
Start,
"unknown .amdhsa_kernel directive", IDRange);
5789#undef PARSE_BITS_ENTRY
5792 if (!Seen.
contains(
".amdhsa_next_free_vgpr"))
5793 return TokError(
".amdhsa_next_free_vgpr directive is required");
5795 if (!Seen.
contains(
".amdhsa_next_free_sgpr"))
5796 return TokError(
".amdhsa_next_free_sgpr directive is required");
5798 unsigned VGPRBlocks;
5799 unsigned SGPRBlocks;
5800 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5801 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5802 EnableWavefrontSize32, NextFreeVGPR,
5803 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5807 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5809 return OutOfRangeError(VGPRRange);
5812 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
5813 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
5815 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5817 return OutOfRangeError(SGPRRange);
5820 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
5821 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
5823 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5824 return TokError(
"amdgpu_user_sgpr_count smaller than than implied by "
5825 "enabled user SGPRs");
5827 unsigned UserSGPRCount =
5828 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5830 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5831 return TokError(
"too many user SGPRs enabled");
5834 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
5835 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, getContext());
5839 return TokError(
"Kernarg size should be resolvable");
5841 if (PreloadLength && kernarg_size &&
5842 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
5843 return TokError(
"Kernarg preload length + offset is larger than the "
5844 "kernarg segment size");
5847 if (!Seen.
contains(
".amdhsa_accum_offset"))
5848 return TokError(
".amdhsa_accum_offset directive is required");
5849 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5850 return TokError(
"accum_offset should be in range [4..256] in "
5853 return TokError(
"accum_offset exceeds total VGPR allocation");
5857 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
5858 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, getContext());
5861 if (IVersion.
Major >= 10 && IVersion.
Major < 12) {
5863 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
5864 return TokError(
"shared_vgpr_count directive not valid on "
5865 "wavefront size 32");
5867 if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5868 return TokError(
"shared_vgpr_count*2 + "
5869 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5874 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
5875 NextFreeVGPR, NextFreeSGPR,
5876 ReserveVCC, ReserveFlatScr);
5880bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
5882 if (ParseAsAbsoluteExpression(Version))
5885 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
5889bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(
StringRef ID,
5893 if (
ID ==
"max_scratch_backing_memory_byte_size") {
5894 Parser.eatToEndOfStatement();
5901 return TokError(Err.str());
5905 if (
ID ==
"enable_dx10_clamp") {
5908 return TokError(
"enable_dx10_clamp=1 is not allowed on GFX12+");
5911 if (
ID ==
"enable_ieee_mode") {
5914 return TokError(
"enable_ieee_mode=1 is not allowed on GFX12+");
5917 if (
ID ==
"enable_wavefront_size32") {
5920 return TokError(
"enable_wavefront_size32=1 is only allowed on GFX10+");
5921 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5922 return TokError(
"enable_wavefront_size32=1 requires +WavefrontSize32");
5924 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5925 return TokError(
"enable_wavefront_size32=0 requires +WavefrontSize64");
5929 if (
ID ==
"wavefront_size") {
5930 if (Header.wavefront_size == 5) {
5932 return TokError(
"wavefront_size=5 is only allowed on GFX10+");
5933 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5934 return TokError(
"wavefront_size=5 requires +WavefrontSize32");
5935 }
else if (Header.wavefront_size == 6) {
5936 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5937 return TokError(
"wavefront_size=6 requires +WavefrontSize64");
5941 if (
ID ==
"enable_wgp_mode") {
5944 return TokError(
"enable_wgp_mode=1 is only allowed on GFX10+");
5947 if (
ID ==
"enable_mem_ordered") {
5950 return TokError(
"enable_mem_ordered=1 is only allowed on GFX10+");
5953 if (
ID ==
"enable_fwd_progress") {
5956 return TokError(
"enable_fwd_progress=1 is only allowed on GFX10+");
5962bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5972 if (!parseId(
ID,
"expected value identifier or .end_amd_kernel_code_t"))
5975 if (
ID ==
".end_amd_kernel_code_t")
5978 if (ParseAMDKernelCodeTValue(
ID, Header))
5982 getTargetStreamer().EmitAMDKernelCodeT(Header);
5987bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5989 if (!parseId(KernelName,
"expected symbol name"))
5992 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5995 KernelScope.initialize(getContext());
5999bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6001 return Error(getLoc(),
6002 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6006 auto TargetIDDirective = getLexer().getTok().getStringContents();
6007 if (getTargetStreamer().getTargetID()->
toString() != TargetIDDirective)
6008 return Error(getParser().getTok().getLoc(),
"target id must match options");
6010 getTargetStreamer().EmitISAVersion();
6016bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6019 std::string HSAMetadataString;
6024 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6025 return Error(getLoc(),
"invalid HSA metadata");
6032bool AMDGPUAsmParser::ParseToEndDirective(
const char *AssemblerDirectiveBegin,
6033 const char *AssemblerDirectiveEnd,
6034 std::string &CollectString) {
6038 getLexer().setSkipSpace(
false);
6040 bool FoundEnd =
false;
6043 CollectStream << getTokenStr();
6047 if (trySkipId(AssemblerDirectiveEnd)) {
6052 CollectStream << Parser.parseStringToEndOfStatement()
6053 << getContext().getAsmInfo()->getSeparatorString();
6055 Parser.eatToEndOfStatement();
6058 getLexer().setSkipSpace(
true);
6061 return TokError(
Twine(
"expected directive ") +
6062 Twine(AssemblerDirectiveEnd) +
Twine(
" not found"));
6065 CollectStream.flush();
6070bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6076 auto PALMetadata = getTargetStreamer().getPALMetadata();
6077 if (!PALMetadata->setFromString(
String))
6078 return Error(getLoc(),
"invalid PAL metadata");
6083bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6085 return Error(getLoc(),
6087 "not available on non-amdpal OSes")).str());
6090 auto PALMetadata = getTargetStreamer().getPALMetadata();
6091 PALMetadata->setLegacy();
6094 if (ParseAsAbsoluteExpression(Key)) {
6095 return TokError(
Twine(
"invalid value in ") +
6099 return TokError(
Twine(
"expected an even number of values in ") +
6102 if (ParseAsAbsoluteExpression(
Value)) {
6103 return TokError(
Twine(
"invalid value in ") +
6106 PALMetadata->setRegister(Key,
Value);
6115bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6116 if (getParser().checkForValidSection())
6120 SMLoc NameLoc = getLoc();
6121 if (getParser().parseIdentifier(
Name))
6122 return TokError(
"expected identifier in directive");
6125 if (getParser().parseComma())
6131 SMLoc SizeLoc = getLoc();
6132 if (getParser().parseAbsoluteExpression(
Size))
6135 return Error(SizeLoc,
"size must be non-negative");
6136 if (
Size > LocalMemorySize)
6137 return Error(SizeLoc,
"size is too large");
6139 int64_t Alignment = 4;
6141 SMLoc AlignLoc = getLoc();
6142 if (getParser().parseAbsoluteExpression(Alignment))
6145 return Error(AlignLoc,
"alignment must be a power of two");
6150 if (Alignment >= 1u << 31)
6151 return Error(AlignLoc,
"alignment is too large");
6157 Symbol->redefineIfPossible();
6158 if (!
Symbol->isUndefined())
6159 return Error(NameLoc,
"invalid symbol redefinition");
6161 getTargetStreamer().emitAMDGPULDS(Symbol,
Size,
Align(Alignment));
6165bool AMDGPUAsmParser::ParseDirective(
AsmToken DirectiveID) {
6169 if (IDVal ==
".amdhsa_kernel")
6170 return ParseDirectiveAMDHSAKernel();
6172 if (IDVal ==
".amdhsa_code_object_version")
6173 return ParseDirectiveAMDHSACodeObjectVersion();
6177 return ParseDirectiveHSAMetadata();
6179 if (IDVal ==
".amd_kernel_code_t")
6180 return ParseDirectiveAMDKernelCodeT();
6182 if (IDVal ==
".amdgpu_hsa_kernel")
6183 return ParseDirectiveAMDGPUHsaKernel();
6185 if (IDVal ==
".amd_amdgpu_isa")
6186 return ParseDirectiveISAVersion();
6190 Twine(
" directive is "
6191 "not available on non-amdhsa OSes"))
6196 if (IDVal ==
".amdgcn_target")
6197 return ParseDirectiveAMDGCNTarget();
6199 if (IDVal ==
".amdgpu_lds")
6200 return ParseDirectiveAMDGPULDS();
6203 return ParseDirectivePALMetadataBegin();
6206 return ParseDirectivePALMetadata();
6214 if (
MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
6218 if (
MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
6219 return hasSGPR104_SGPR105();
6222 case AMDGPU::SRC_SHARED_BASE_LO:
6223 case AMDGPU::SRC_SHARED_BASE:
6224 case AMDGPU::SRC_SHARED_LIMIT_LO:
6225 case AMDGPU::SRC_SHARED_LIMIT:
6226 case AMDGPU::SRC_PRIVATE_BASE_LO:
6227 case AMDGPU::SRC_PRIVATE_BASE:
6228 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
6229 case AMDGPU::SRC_PRIVATE_LIMIT:
6231 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
6234 case AMDGPU::TBA_LO:
6235 case AMDGPU::TBA_HI:
6237 case AMDGPU::TMA_LO:
6238 case AMDGPU::TMA_HI:
6240 case AMDGPU::XNACK_MASK:
6241 case AMDGPU::XNACK_MASK_LO:
6242 case AMDGPU::XNACK_MASK_HI:
6243 return (
isVI() ||
isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6244 case AMDGPU::SGPR_NULL:
6258 case AMDGPU::FLAT_SCR:
6259 case AMDGPU::FLAT_SCR_LO:
6260 case AMDGPU::FLAT_SCR_HI:
6269 if (
MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
6270 return hasSGPR102_SGPR103();
6283 Res = MatchOperandParserImpl(
Operands, Mnemonic);
6295 SMLoc LBraceLoc = getLoc();
6300 auto Loc = getLoc();
6303 Error(Loc,
"expected a register");
6307 RBraceLoc = getLoc();
6312 "expected a comma or a closing square bracket"))
6316 if (
Operands.size() - Prefix > 1) {
6318 AMDGPUOperand::CreateToken(
this,
"[", LBraceLoc));
6319 Operands.push_back(AMDGPUOperand::CreateToken(
this,
"]", RBraceLoc));
6330 setForcedEncodingSize(0);
6331 setForcedDPP(
false);
6332 setForcedSDWA(
false);
6334 if (
Name.ends_with(
"_e64_dpp")) {
6336 setForcedEncodingSize(64);
6337 return Name.substr(0,
Name.size() - 8);
6338 }
else if (
Name.ends_with(
"_e64")) {
6339 setForcedEncodingSize(64);
6340 return Name.substr(0,
Name.size() - 4);
6341 }
else if (
Name.ends_with(
"_e32")) {
6342 setForcedEncodingSize(32);
6343 return Name.substr(0,
Name.size() - 4);
6344 }
else if (
Name.ends_with(
"_dpp")) {
6346 return Name.substr(0,
Name.size() - 4);
6347 }
else if (
Name.ends_with(
"_sdwa")) {
6348 setForcedSDWA(
true);
6349 return Name.substr(0,
Name.size() - 5);
6356 unsigned VariantID);
6368 Operands.push_back(AMDGPUOperand::CreateToken(
this,
Name, NameLoc));
6370 bool IsMIMG =
Name.starts_with(
"image_");
6373 OperandMode Mode = OperandMode_Default;
6375 Mode = OperandMode_NSA;
6379 checkUnsupportedInstruction(
Name, NameLoc);
6380 if (!Parser.hasPendingError()) {
6383 :
"not a valid operand.";
6384 Error(getLoc(), Msg);
6406 if (!trySkipId(
Name))
6409 Operands.push_back(AMDGPUOperand::CreateToken(
this,
Name, S));
6413ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
const char *Prefix,
6424 std::function<
bool(int64_t &)> ConvertResult) {
6432 if (ConvertResult && !ConvertResult(
Value)) {
6436 Operands.push_back(AMDGPUOperand::CreateImm(
this,
Value, S, ImmTy));
6440ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
6442 bool (*ConvertResult)(int64_t &)) {
6451 const unsigned MaxSize = 4;
6455 for (
int I = 0; ; ++
I) {
6457 SMLoc Loc = getLoc();
6461 if (
Op != 0 &&
Op != 1)
6469 if (
I + 1 == MaxSize)
6470 return Error(getLoc(),
"expected a closing square bracket");
6476 Operands.push_back(AMDGPUOperand::CreateImm(
this, Val, S, ImmTy));
6482 AMDGPUOperand::ImmTy ImmTy) {
6486 if (trySkipId(
Name)) {
6488 }
else if (trySkipId(
"no",
Name)) {
6495 return Error(S,
"r128 modifier is not supported on this GPU");
6497 return Error(S,
"a16 modifier is not supported on this GPU");
6499 if (
isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
6500 ImmTy = AMDGPUOperand::ImmTyR128A16;
6502 Operands.push_back(AMDGPUOperand::CreateImm(
this, Bit, S, ImmTy));
6507 bool &Disabling)
const {
6508 Disabling =
Id.consume_front(
"no");
6528 SMLoc StringLoc = getLoc();
6530 int64_t CPolVal = 0;
6548 ResScope = parseScope(
Operands, Scope);
6563 Operands.push_back(AMDGPUOperand::CreateImm(
this, CPolVal, StringLoc,
6564 AMDGPUOperand::ImmTyCPol));
6569 SMLoc OpLoc = getLoc();
6570 unsigned Enabled = 0, Seen = 0;
6574 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
6581 return Error(S,
"dlc modifier is not supported on this GPU");
6584 return Error(S,
"scc modifier is not supported on this GPU");
6587 return Error(S,
"duplicate cache policy modifier");
6599 AMDGPUOperand::CreateImm(
this,
Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
6611 Res = parseStringWithPrefix(
"scope",
Value, StringLoc);
6622 if (Scope == 0xffffffff)
6623 return Error(StringLoc,
"invalid scope value");
6637 if (
Value ==
"TH_DEFAULT")
6639 else if (
Value ==
"TH_STORE_LU" ||
Value ==
"TH_LOAD_RT_WB" ||
6640 Value ==
"TH_LOAD_NT_WB") {
6641 return Error(StringLoc,
"invalid th value");
6642 }
else if (
Value.consume_front(
"TH_ATOMIC_")) {
6644 }
else if (
Value.consume_front(
"TH_LOAD_")) {
6646 }
else if (
Value.consume_front(
"TH_STORE_")) {
6649 return Error(StringLoc,
"invalid th value");
6652 if (
Value ==
"BYPASS")
6683 if (TH == 0xffffffff)
6684 return Error(StringLoc,
"invalid th value");
6691 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
6692 AMDGPUOperand::ImmTy ImmT,
6694 auto i = OptionalIdx.find(ImmT);
6695 if (i != OptionalIdx.end()) {
6696 unsigned Idx = i->second;
6697 ((AMDGPUOperand &)*
Operands[
Idx]).addImmOperands(Inst, 1);
6709 StringLoc = getLoc();
6718bool AMDGPUAsmParser::tryParseFmt(
const char *Pref,
6722 SMLoc Loc = getLoc();
6724 auto Res = parseIntWithPrefix(Pref, Val);
6730 if (Val < 0 || Val > MaxVal) {
6740 AMDGPUOperand::ImmTy ImmTy) {
6741 const char *Pref =
"index_key";
6743 SMLoc Loc = getLoc();
6744 auto Res = parseIntWithPrefix(Pref, ImmVal);
6748 if (ImmTy == AMDGPUOperand::ImmTyIndexKey16bit && (ImmVal < 0 || ImmVal > 1))
6751 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
6754 Operands.push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc, ImmTy));
6759 return tryParseIndexKey(
Operands, AMDGPUOperand::ImmTyIndexKey8bit);
6763 return tryParseIndexKey(
Operands, AMDGPUOperand::ImmTyIndexKey16bit);
6768ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6775 for (
int I = 0;
I < 2; ++
I) {
6776 if (Dfmt == DFMT_UNDEF && !tryParseFmt(
"dfmt", DFMT_MAX, Dfmt))
6779 if (Nfmt == NFMT_UNDEF && !tryParseFmt(
"nfmt", NFMT_MAX, Nfmt))
6784 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6790 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6793 Dfmt = (Dfmt ==
DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6794 Nfmt = (Nfmt ==
NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6800ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6805 if (!tryParseFmt(
"format", UFMT_MAX, Fmt))
6808 if (Fmt == UFMT_UNDEF)
6815bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6823 if (Format != DFMT_UNDEF) {
6829 if (Format != NFMT_UNDEF) {
6834 Error(Loc,
"unsupported format");
6845 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6850 SMLoc Loc = getLoc();
6851 if (!parseId(Str,
"expected a format string") ||
6852 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
6854 if (Dfmt == DFMT_UNDEF)
6855 return Error(Loc,
"duplicate numeric format");
6856 if (Nfmt == NFMT_UNDEF)
6857 return Error(Loc,
"duplicate data format");
6860 Dfmt = (Dfmt ==
DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6861 Nfmt = (Nfmt ==
NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6865 if (Ufmt == UFMT_UNDEF)
6866 return Error(FormatLoc,
"unsupported format");
6881 if (Id == UFMT_UNDEF)
6885 return Error(Loc,
"unified format is not supported on this GPU");
6891ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6893 SMLoc Loc = getLoc();
6895 if (!parseExpr(Format))
6898 return Error(Loc,
"out of range format");
6903ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6911 SMLoc Loc = getLoc();
6912 if (!parseId(FormatStr,
"expected a format string"))
6915 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6917 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6927 return parseNumericFormat(Format);
6935 SMLoc Loc = getLoc();
6945 AMDGPUOperand::CreateImm(
this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6964 Res = parseSymbolicOrNumericFormat(Format);
6969 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands[
Size - 2]);
6970 assert(
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6977 return Error(getLoc(),
"duplicate format");
6983 parseIntWithPrefix(
"offset",
Operands, AMDGPUOperand::ImmTyOffset);
6985 Res = parseIntWithPrefix(
"inst_offset",
Operands,
6986 AMDGPUOperand::ImmTyInstOffset);
6993 parseNamedBit(
"r128",
Operands, AMDGPUOperand::ImmTyR128A16);
6995 Res = parseNamedBit(
"a16",
Operands, AMDGPUOperand::ImmTyA16);
7001 parseIntWithPrefix(
"blgp",
Operands, AMDGPUOperand::ImmTyBLGP);
7004 parseOperandArrayWithPrefix(
"neg",
Operands, AMDGPUOperand::ImmTyBLGP);
7014 OptionalImmIndexMap OptionalIdx;
7016 unsigned OperandIdx[4];
7017 unsigned EnMask = 0;
7020 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
7021 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
7026 OperandIdx[SrcIdx] = Inst.
size();
7027 Op.addRegOperands(Inst, 1);
7034 OperandIdx[SrcIdx] = Inst.
size();
7040 if (
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7041 Op.addImmOperands(Inst, 1);
7045 if (
Op.isToken() && (
Op.getToken() ==
"done" ||
Op.getToken() ==
"row_en"))
7049 OptionalIdx[
Op.getImmTy()] = i;
7055 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7062 for (
auto i = 0; i < SrcIdx; ++i) {
7064 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7089 IntVal =
encode(ISA, IntVal, CntVal);
7090 if (CntVal !=
decode(ISA, IntVal)) {
7092 IntVal =
encode(ISA, IntVal, -1);
7100bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7102 SMLoc CntLoc = getLoc();
7110 SMLoc ValLoc = getLoc();
7111 if (!parseExpr(CntVal))
7119 if (CntName ==
"vmcnt" || CntName ==
"vmcnt_sat") {
7121 }
else if (CntName ==
"expcnt" || CntName ==
"expcnt_sat") {
7123 }
else if (CntName ==
"lgkmcnt" || CntName ==
"lgkmcnt_sat") {
7126 Error(CntLoc,
"invalid counter name " + CntName);
7131 Error(ValLoc,
"too large value for " + CntName);
7140 Error(getLoc(),
"expected a counter name");
7167bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7168 SMLoc FieldLoc = getLoc();
7174 SMLoc ValueLoc = getLoc();
7181 if (FieldName ==
"instid0") {
7183 }
else if (FieldName ==
"instskip") {
7185 }
else if (FieldName ==
"instid1") {
7188 Error(FieldLoc,
"invalid field name " + FieldName);
7207 .
Case(
"VALU_DEP_1", 1)
7208 .
Case(
"VALU_DEP_2", 2)
7209 .
Case(
"VALU_DEP_3", 3)
7210 .
Case(
"VALU_DEP_4", 4)
7211 .
Case(
"TRANS32_DEP_1", 5)
7212 .
Case(
"TRANS32_DEP_2", 6)
7213 .
Case(
"TRANS32_DEP_3", 7)
7214 .
Case(
"FMA_ACCUM_CYCLE_1", 8)
7215 .
Case(
"SALU_CYCLE_1", 9)
7216 .
Case(
"SALU_CYCLE_2", 10)
7217 .
Case(
"SALU_CYCLE_3", 11)
7225 Delay |=
Value << Shift;
7235 if (!parseDelay(Delay))
7239 if (!parseExpr(Delay))
7243 Operands.push_back(AMDGPUOperand::CreateImm(
this, Delay, S));
7248AMDGPUOperand::isSWaitCnt()
const {
7252bool AMDGPUOperand::isSDelayALU()
const {
return isImm(); }
7258void AMDGPUAsmParser::depCtrError(
SMLoc Loc,
int ErrorId,
7262 Error(Loc,
Twine(
"invalid counter name ", DepCtrName));
7265 Error(Loc,
Twine(DepCtrName,
" is not supported on this GPU"));
7268 Error(Loc,
Twine(
"duplicate counter name ", DepCtrName));
7271 Error(Loc,
Twine(
"invalid value for ", DepCtrName));
7278bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr,
unsigned &UsedOprMask) {
7282 SMLoc DepCtrLoc = getLoc();
7290 if (!parseExpr(ExprVal))
7293 unsigned PrevOprMask = UsedOprMask;
7294 int CntVal =
encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
7297 depCtrError(DepCtrLoc, CntVal, DepCtrName);
7306 Error(getLoc(),
"expected a counter name");
7311 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
7312 DepCtr = (DepCtr & ~CntValMask) | CntVal;
7320 SMLoc Loc = getLoc();
7323 unsigned UsedOprMask = 0;
7325 if (!parseDepCtr(DepCtr, UsedOprMask))
7329 if (!parseExpr(DepCtr))
7333 Operands.push_back(AMDGPUOperand::CreateImm(
this, DepCtr, Loc));
7337bool AMDGPUOperand::isDepCtr()
const {
return isS16Imm(); }
7343ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
7345 OperandInfoTy &Width) {
7352 HwReg.Loc = getLoc();
7355 HwReg.IsSymbolic =
true;
7357 }
else if (!parseExpr(HwReg.Val,
"a register name")) {
7365 if (!skipToken(
AsmToken::Comma,
"expected a comma or a closing parenthesis"))
7369 if (!parseExpr(
Offset.Val))
7375 Width.Loc = getLoc();
7376 if (!parseExpr(Width.Val) ||
7387 SMLoc Loc = getLoc();
7389 StructuredOpField HwReg(
"id",
"hardware register", HwregId::Width,
7391 StructuredOpField
Offset(
"offset",
"bit offset", HwregOffset::Width,
7392 HwregOffset::Default);
7393 struct : StructuredOpField {
7394 using StructuredOpField::StructuredOpField;
7395 bool validate(AMDGPUAsmParser &Parser)
const override {
7397 return Error(Parser,
"only values from 1 to 32 are legal");
7400 } Width(
"size",
"bitfield width", HwregSize::Width, HwregSize::Default);
7404 Res = parseHwregFunc(HwReg,
Offset, Width);
7407 if (!validateStructuredOpFields({&HwReg, &
Offset, &Width}))
7409 ImmVal = HwregEncoding::encode(HwReg.Val,
Offset.Val, Width.Val);
7413 parseExpr(ImmVal,
"a hwreg macro, structured immediate"))
7419 if (!isUInt<16>(ImmVal))
7420 return Error(Loc,
"invalid immediate: only 16-bit values are legal");
7422 AMDGPUOperand::CreateImm(
this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
7426bool AMDGPUOperand::isHwreg()
const {
7427 return isImmTy(ImmTyHwreg);
7435AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
7437 OperandInfoTy &Stream) {
7443 Msg.IsSymbolic =
true;
7445 }
else if (!parseExpr(
Msg.Val,
"a message name")) {
7450 Op.IsDefined =
true;
7455 }
else if (!parseExpr(
Op.Val,
"an operation name")) {
7460 Stream.IsDefined =
true;
7461 Stream.Loc = getLoc();
7462 if (!parseExpr(Stream.Val))
7471AMDGPUAsmParser::validateSendMsg(
const OperandInfoTy &Msg,
7472 const OperandInfoTy &
Op,
7473 const OperandInfoTy &Stream) {
7483 Error(
Msg.Loc,
"specified message id is not supported on this GPU");
7488 Error(
Msg.Loc,
"invalid message id");
7494 Error(
Op.Loc,
"message does not support operations");
7496 Error(
Msg.Loc,
"missing message operation");
7501 Error(
Op.Loc,
"invalid operation id");
7506 Error(Stream.Loc,
"message operation does not support streams");
7510 Error(Stream.Loc,
"invalid message stream id");
7520 SMLoc Loc = getLoc();
7524 OperandInfoTy
Op(OP_NONE_);
7525 OperandInfoTy Stream(STREAM_ID_NONE_);
7526 if (parseSendMsgBody(Msg,
Op, Stream) &&
7527 validateSendMsg(Msg,
Op, Stream)) {
7532 }
else if (parseExpr(ImmVal,
"a sendmsg macro")) {
7533 if (ImmVal < 0 || !isUInt<16>(ImmVal))
7534 return Error(Loc,
"invalid immediate: only 16-bit values are legal");
7539 Operands.push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
7543bool AMDGPUOperand::isSendMsg()
const {
7544 return isImmTy(ImmTySendMsg);
7565 return Error(S,
"invalid interpolation slot");
7567 Operands.push_back(AMDGPUOperand::CreateImm(
this, Slot, S,
7568 AMDGPUOperand::ImmTyInterpSlot));
7579 if (!Str.starts_with(
"attr"))
7580 return Error(S,
"invalid interpolation attribute");
7590 return Error(S,
"invalid or missing interpolation attribute channel");
7592 Str = Str.drop_back(2).drop_front(4);
7595 if (Str.getAsInteger(10, Attr))
7596 return Error(S,
"invalid or missing interpolation attribute number");
7599 return Error(S,
"out of bounds interpolation attribute number");
7603 Operands.push_back(AMDGPUOperand::CreateImm(
this, Attr, S,
7604 AMDGPUOperand::ImmTyInterpAttr));
7605 Operands.push_back(AMDGPUOperand::CreateImm(
7606 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
7625 return Error(S, (
Id == ET_INVALID)
7626 ?
"invalid exp target"
7627 :
"exp target is not supported on this GPU");
7629 Operands.push_back(AMDGPUOperand::CreateImm(
this,
Id, S,
7630 AMDGPUOperand::ImmTyExpTgt));
7645 return isId(getToken(),
Id);
7650 return getTokenKind() ==
Kind;
7653StringRef AMDGPUAsmParser::getId()
const {
7680 if (isId(
Id) && peekToken().is(Kind)) {
7690 if (isToken(Kind)) {
7700 if (!trySkipToken(Kind)) {
7701 Error(getLoc(), ErrMsg);
7712 if (Parser.parseExpression(Expr))
7715 if (Expr->evaluateAsAbsolute(Imm))
7719 Error(S,
"expected absolute expression");
7722 Twine(
" or an absolute expression"));
7732 if (Parser.parseExpression(Expr))
7736 if (Expr->evaluateAsAbsolute(IntVal)) {
7737 Operands.push_back(AMDGPUOperand::CreateImm(
this, IntVal, S));
7739 Operands.push_back(AMDGPUOperand::CreateExpr(
this, Expr, S));
7747 Val = getToken().getStringContents();
7751 Error(getLoc(), ErrMsg);
7759 Val = getTokenStr();
7763 if (!ErrMsg.
empty())
7764 Error(getLoc(), ErrMsg);
7770AMDGPUAsmParser::getToken()
const {
7771 return Parser.getTok();
7774AsmToken AMDGPUAsmParser::peekToken(
bool ShouldSkipSpace) {
7777 : getLexer().peekTok(ShouldSkipSpace);
7782 auto TokCount = getLexer().peekTokens(Tokens);
7789AMDGPUAsmParser::getTokenKind()
const {
7794AMDGPUAsmParser::getLoc()
const {
7795 return getToken().getLoc();
7799AMDGPUAsmParser::getTokenStr()
const {
7800 return getToken().getString();
7804AMDGPUAsmParser::lex() {
7809 return ((AMDGPUOperand &)*
Operands[0]).getStartLoc();
7813AMDGPUAsmParser::getOperandLoc(std::function<
bool(
const AMDGPUOperand&)>
Test,
7815 for (
unsigned i =
Operands.size() - 1; i > 0; --i) {
7816 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
7818 return Op.getStartLoc();
7824AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy
Type,
7826 auto Test = [=](
const AMDGPUOperand&
Op) {
return Op.isImmTy(
Type); };
7831AMDGPUAsmParser::getRegLoc(
unsigned Reg,
7833 auto Test = [=](
const AMDGPUOperand&
Op) {
7834 return Op.isRegKind() &&
Op.getReg() ==
Reg;
7840 bool SearchMandatoryLiterals)
const {
7841 auto Test = [](
const AMDGPUOperand&
Op) {
7842 return Op.IsImmKindLiteral() ||
Op.isExpr();
7845 if (SearchMandatoryLiterals && Loc == getInstLoc(
Operands))
7846 Loc = getMandatoryLitLoc(
Operands);
7851 auto Test = [](
const AMDGPUOperand &
Op) {
7852 return Op.IsImmKindMandatoryLiteral();
7859 auto Test = [](
const AMDGPUOperand&
Op) {
7860 return Op.isImmKindConst();
7877 SMLoc IdLoc = getLoc();
7883 find_if(Fields, [
Id](StructuredOpField *
F) {
return F->Id ==
Id; });
7884 if (
I == Fields.
end())
7885 return Error(IdLoc,
"unknown field");
7886 if ((*I)->IsDefined)
7887 return Error(IdLoc,
"duplicate field");
7890 (*I)->Loc = getLoc();
7891 if (!parseExpr((*I)->Val))
7893 (*I)->IsDefined =
true;
7900bool AMDGPUAsmParser::validateStructuredOpFields(
7902 return all_of(Fields, [
this](
const StructuredOpField *
F) {
7903 return F->validate(*
this);
7914 const unsigned OrMask,
7915 const unsigned XorMask) {
7918 return BITMASK_PERM_ENC |
7919 (AndMask << BITMASK_AND_SHIFT) |
7920 (OrMask << BITMASK_OR_SHIFT) |
7921 (XorMask << BITMASK_XOR_SHIFT);
7925AMDGPUAsmParser::parseSwizzleOperand(int64_t &
Op,
7926 const unsigned MinVal,
7927 const unsigned MaxVal,
7934 if (!parseExpr(
Op)) {
7937 if (Op < MinVal || Op > MaxVal) {
7946AMDGPUAsmParser::parseSwizzleOperands(
const unsigned OpNum, int64_t*
Op,
7947 const unsigned MinVal,
7948 const unsigned MaxVal,
7951 for (
unsigned i = 0; i < OpNum; ++i) {
7952 if (!parseSwizzleOperand(
Op[i], MinVal, MaxVal, ErrMsg, Loc))
7960AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7964 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7965 "expected a 2-bit lane id")) {
7976AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7983 if (!parseSwizzleOperand(GroupSize,
7985 "group size must be in the interval [2,32]",
7990 Error(Loc,
"group size must be a power of two");
7993 if (parseSwizzleOperand(LaneIdx,
7995 "lane id must be in the interval [0,group size - 1]",
8004AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8010 if (!parseSwizzleOperand(GroupSize,
8012 "group size must be in the interval [2,32]",
8017 Error(Loc,
"group size must be a power of two");
8026AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8032 if (!parseSwizzleOperand(GroupSize,
8034 "group size must be in the interval [1,16]",
8039 Error(Loc,
"group size must be a power of two");
8048AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8056 SMLoc StrLoc = getLoc();
8057 if (!parseString(Ctl)) {
8060 if (Ctl.
size() != BITMASK_WIDTH) {
8061 Error(StrLoc,
"expected a 5-character mask");
8065 unsigned AndMask = 0;
8066 unsigned OrMask = 0;
8067 unsigned XorMask = 0;
8069 for (
size_t i = 0; i < Ctl.
size(); ++i) {
8073 Error(StrLoc,
"invalid mask");
8095AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8097 SMLoc OffsetLoc = getLoc();
8099 if (!parseExpr(Imm,
"a swizzle macro")) {
8102 if (!isUInt<16>(Imm)) {
8103 Error(OffsetLoc,
"expected a 16-bit offset");
8110AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8115 SMLoc ModeLoc = getLoc();
8118 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8119 Ok = parseSwizzleQuadPerm(Imm);
8120 }
else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8121 Ok = parseSwizzleBitmaskPerm(Imm);
8122 }
else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8123 Ok = parseSwizzleBroadcast(Imm);
8124 }
else if (trySkipId(IdSymbolic[ID_SWAP])) {
8125 Ok = parseSwizzleSwap(Imm);
8126 }
else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8127 Ok = parseSwizzleReverse(Imm);
8129 Error(ModeLoc,
"expected a swizzle mode");
8132 return Ok && skipToken(
AsmToken::RParen,
"expected a closing parentheses");
8142 if (trySkipId(
"offset")) {
8146 if (trySkipId(
"swizzle")) {
8147 Ok = parseSwizzleMacro(Imm);
8149 Ok = parseSwizzleOffset(Imm);
8153 Operands.push_back(AMDGPUOperand::CreateImm(
this, Imm, S, AMDGPUOperand::ImmTySwizzle));
8161AMDGPUOperand::isSwizzle()
const {
8162 return isImmTy(ImmTySwizzle);
8169int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8183 for (
unsigned ModeId = ID_MIN; ModeId <=
ID_MAX; ++ModeId) {
8184 if (trySkipId(IdSymbolic[ModeId])) {
8191 Error(S, (Imm == 0)?
8192 "expected a VGPR index mode or a closing parenthesis" :
8193 "expected a VGPR index mode");
8198 Error(S,
"duplicate VGPR index mode");
8206 "expected a comma or a closing parenthesis"))
8221 Imm = parseGPRIdxMacro();
8225 if (getParser().parseAbsoluteExpression(Imm))
8227 if (Imm < 0 || !isUInt<4>(Imm))
8228 return Error(S,
"invalid immediate: only 4-bit values are legal");
8232 AMDGPUOperand::CreateImm(
this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
8236bool AMDGPUOperand::isGPRIdxMode()
const {
8237 return isImmTy(ImmTyGprIdxMode);
8249 if (isRegister() || isModifier())
8261 if (
Opr.isExpr() && !
Opr.isSymbolRefExpr()) {
8262 Error(Loc,
"expected an absolute expression or a label");
8263 }
else if (
Opr.isImm() && !
Opr.isS16Imm()) {
8264 Error(Loc,
"expected a 16-bit signed jump offset");
8282void AMDGPUAsmParser::cvtMubufImpl(
MCInst &Inst,
8285 OptionalImmIndexMap OptionalIdx;
8286 unsigned FirstOperandIdx = 1;
8287 bool IsAtomicReturn =
false;
8294 for (
unsigned i = FirstOperandIdx, e =
Operands.size(); i != e; ++i) {
8295 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
8299 Op.addRegOperands(Inst, 1);
8303 if (IsAtomicReturn && i == FirstOperandIdx)
8304 Op.addRegOperands(Inst, 1);
8309 if (
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
8310 Op.addImmOperands(Inst, 1);
8322 OptionalIdx[
Op.getImmTy()] = i;
8333bool AMDGPUOperand::isSMRDOffset8()
const {
8334 return isImmLiteral() && isUInt<8>(getImm());
8337bool AMDGPUOperand::isSMEMOffset()
const {
8339 return isImmLiteral();
8342bool AMDGPUOperand::isSMRDLiteralOffset()
const {
8345 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
8377bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
8378 if (BoundCtrl == 0 || BoundCtrl == 1) {
8386void AMDGPUAsmParser::onBeginOfFile() {
8387 if (!getParser().getStreamer().getTargetStreamer() ||
8391 if (!getTargetStreamer().getTargetID())
8392 getTargetStreamer().initializeTargetID(getSTI(),
8393 getSTI().getFeatureString());
8396 getTargetStreamer().EmitDirectiveAMDGCNTarget();
8404bool AMDGPUAsmParser::parsePrimaryExpr(
const MCExpr *&Res,
SMLoc &EndLoc) {
8410 .
Case(
"max", AGVK::AGVK_Max)
8411 .
Case(
"or", AGVK::AGVK_Or)
8421 if (Exprs.
empty()) {
8422 Error(getToken().getLoc(),
8423 "empty " +
Twine(TokenId) +
" expression");
8426 if (CommaCount + 1 != Exprs.
size()) {
8427 Error(getToken().getLoc(),
8428 "mismatch of commas in " +
Twine(TokenId) +
" expression");
8435 if (getParser().parseExpression(Expr, EndLoc))
8439 if (LastTokenWasComma)
8442 Error(getToken().getLoc(),
8443 "unexpected token in " +
Twine(TokenId) +
" expression");
8449 return getParser().parsePrimaryExpr(Res, EndLoc,
nullptr);
8454 if (
Name ==
"mul") {
8455 return parseIntWithPrefix(
"mul",
Operands,
8459 if (
Name ==
"div") {
8460 return parseIntWithPrefix(
"div",
Operands,
8476 const int Ops[] = { AMDGPU::OpName::src0,
8477 AMDGPU::OpName::src1,
8478 AMDGPU::OpName::src2 };
8493 if (
DstOp.isReg() &&
8494 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(
DstOp.
getReg())) {
8498 if ((OpSel & (1 << SrcNum)) != 0)
8504void AMDGPUAsmParser::cvtVOP3OpSel(
MCInst &Inst,
8511 OptionalImmIndexMap &OptionalIdx) {
8512 cvtVOP3P(Inst,
Operands, OptionalIdx);
8521 &&
Desc.NumOperands > (OpNum + 1)
8523 &&
Desc.operands()[OpNum + 1].RegClass != -1
8525 &&
Desc.getOperandConstraint(OpNum + 1,
8526 MCOI::OperandConstraint::TIED_TO) == -1;
8531 OptionalImmIndexMap OptionalIdx;
8536 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
8537 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
8540 for (
unsigned E =
Operands.size();
I != E; ++
I) {
8541 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
8543 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8544 }
else if (
Op.isInterpSlot() ||
Op.isInterpAttr() ||
8545 Op.isInterpAttrChan()) {
8547 }
else if (
Op.isImmModifier()) {
8548 OptionalIdx[
Op.getImmTy()] =
I;
8556 AMDGPUOperand::ImmTyHigh);
8560 AMDGPUOperand::ImmTyClampSI);
8564 AMDGPUOperand::ImmTyOModSI);
8569 OptionalImmIndexMap OptionalIdx;
8574 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
8575 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
8578 for (
unsigned E =
Operands.size();
I != E; ++
I) {
8579 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
8581 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8582 }
else if (
Op.isImmModifier()) {
8583 OptionalIdx[
Op.getImmTy()] =
I;
8600 const int Ops[] = { AMDGPU::OpName::src0,
8601 AMDGPU::OpName::src1,
8602 AMDGPU::OpName::src2 };
8603 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8604 AMDGPU::OpName::src1_modifiers,
8605 AMDGPU::OpName::src2_modifiers };
8609 for (
int J = 0; J < 3; ++J) {
8617 if ((OpSel & (1 << J)) != 0)
8619 if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8620 (OpSel & (1 << 3)) != 0)
8628 OptionalImmIndexMap &OptionalIdx) {
8633 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
8634 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
8637 for (
unsigned E =
Operands.size();
I != E; ++
I) {
8638 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
8640 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8641 }
else if (
Op.isImmModifier()) {
8642 OptionalIdx[
Op.getImmTy()] =
I;
8643 }
else if (
Op.isRegOrImm()) {
8644 Op.addRegOrImmOperands(Inst, 1);
8654 AMDGPUOperand::ImmTyByteSel);
8659 AMDGPUOperand::ImmTyClampSI);
8663 AMDGPUOperand::ImmTyOModSI);
8670 auto it = Inst.
begin();
8680 OptionalImmIndexMap OptionalIdx;
8681 cvtVOP3(Inst,
Operands, OptionalIdx);
8685 OptionalImmIndexMap &OptIdx) {
8691 if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
8692 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
8693 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
8694 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
8702 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp_gfx12 ||
8703 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp_gfx12 ||
8704 Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp8_gfx12 ||
8705 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12 ||
8706 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
8707 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
8708 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
8709 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12)) {
8718 if (OpSelIdx != -1) {
8723 if (OpSelHiIdx != -1) {
8737 const int Ops[] = { AMDGPU::OpName::src0,
8738 AMDGPU::OpName::src1,
8739 AMDGPU::OpName::src2 };
8740 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8741 AMDGPU::OpName::src1_modifiers,
8742 AMDGPU::OpName::src2_modifiers };
8745 unsigned OpSelHi = 0;
8752 if (OpSelHiIdx != -1)
8761 for (
int J = 0; J < 3; ++J) {
8774 if (
SrcOp.isReg() && getMRI()
8781 if ((OpSel & (1 << J)) != 0)
8785 if ((OpSelHi & (1 << J)) != 0)
8788 if ((NegLo & (1 << J)) != 0)
8791 if ((NegHi & (1 << J)) != 0)
8799 OptionalImmIndexMap OptIdx;
8805 unsigned i,
unsigned Opc,
unsigned OpName) {
8807 ((AMDGPUOperand &)*
Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
8809 ((AMDGPUOperand &)*
Operands[i]).addRegOperands(Inst, 1);
8815 ((AMDGPUOperand &)*
Operands[1]).addRegOperands(Inst, 1);
8818 ((AMDGPUOperand &)*
Operands[1]).addRegOperands(Inst, 1);
8819 ((AMDGPUOperand &)*
Operands[4]).addRegOperands(Inst, 1);
8821 OptionalImmIndexMap OptIdx;
8822 for (
unsigned i = 5; i <
Operands.size(); ++i) {
8823 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
8824 OptIdx[
Op.getImmTy()] = i;
8829 AMDGPUOperand::ImmTyIndexKey8bit);
8833 AMDGPUOperand::ImmTyIndexKey16bit);
8853 Operands.push_back(AMDGPUOperand::CreateToken(
this,
"::", S));
8854 SMLoc OpYLoc = getLoc();
8857 Operands.push_back(AMDGPUOperand::CreateToken(
this, OpYName, OpYLoc));
8860 return Error(OpYLoc,
"expected a VOPDY instruction after ::");
8867 auto addOp = [&](
uint16_t ParsedOprIdx) {
8868 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[ParsedOprIdx]);
8870 Op.addRegOperands(Inst, 1);
8874 Op.addImmOperands(Inst, 1);
8886 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
8890 const auto &CInfo = InstInfo[CompIdx];
8891 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
8892 for (
unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
8893 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
8894 if (CInfo.hasSrc2Acc())
8895 addOp(CInfo.getIndexOfDstInParsedOperands());
8903bool AMDGPUOperand::isDPP8()
const {
8904 return isImmTy(ImmTyDPP8);
8907bool AMDGPUOperand::isDPPCtrl()
const {
8908 using namespace AMDGPU::DPP;
8910 bool result =
isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8912 int64_t
Imm = getImm();
8913 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8914 (
Imm >= DppCtrl::ROW_SHL_FIRST &&
Imm <= DppCtrl::ROW_SHL_LAST) ||
8915 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8916 (
Imm >= DppCtrl::ROW_ROR_FIRST &&
Imm <= DppCtrl::ROW_ROR_LAST) ||
8917 (Imm == DppCtrl::WAVE_SHL1) ||
8918 (
Imm == DppCtrl::WAVE_ROL1) ||
8919 (Imm == DppCtrl::WAVE_SHR1) ||
8920 (
Imm == DppCtrl::WAVE_ROR1) ||
8921 (Imm == DppCtrl::ROW_MIRROR) ||
8922 (
Imm == DppCtrl::ROW_HALF_MIRROR) ||
8923 (Imm == DppCtrl::BCAST15) ||
8924 (
Imm == DppCtrl::BCAST31) ||
8925 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8926 (
Imm >= DppCtrl::ROW_XMASK_FIRST &&
Imm <= DppCtrl::ROW_XMASK_LAST);
8935bool AMDGPUOperand::isBLGP()
const {
8936 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8939bool AMDGPUOperand::isCBSZ()
const {
8940 return isImm() && getImmTy() == ImmTyCBSZ;
8943bool AMDGPUOperand::isABID()
const {
8944 return isImm() && getImmTy() == ImmTyABID;
8947bool AMDGPUOperand::isS16Imm()
const {
8948 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8951bool AMDGPUOperand::isU16Imm()
const {
8952 return isImmLiteral() && isUInt<16>(getImm());
8959bool AMDGPUAsmParser::parseDimId(
unsigned &Encoding) {
8964 SMLoc Loc = getToken().getEndLoc();
8965 Token = std::string(getTokenStr());
8967 if (getLoc() != Loc)
8972 if (!parseId(Suffix))
8998 SMLoc Loc = getLoc();
8999 if (!parseDimId(Encoding))
9000 return Error(Loc,
"invalid dim value");
9002 Operands.push_back(AMDGPUOperand::CreateImm(
this, Encoding, S,
9003 AMDGPUOperand::ImmTyDim));
9021 if (!skipToken(
AsmToken::LBrac,
"expected an opening square bracket"))
9024 for (
size_t i = 0; i < 8; ++i) {
9028 SMLoc Loc = getLoc();
9029 if (getParser().parseAbsoluteExpression(Sels[i]))
9031 if (0 > Sels[i] || 7 < Sels[i])
9032 return Error(Loc,
"expected a 3-bit value");
9039 for (
size_t i = 0; i < 8; ++i)
9040 DPP8 |= (Sels[i] << (i * 3));
9042 Operands.push_back(AMDGPUOperand::CreateImm(
this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
9047AMDGPUAsmParser::isSupportedDPPCtrl(
StringRef Ctrl,
9049 if (Ctrl ==
"row_newbcast")
9052 if (Ctrl ==
"row_share" ||
9053 Ctrl ==
"row_xmask")
9056 if (Ctrl ==
"wave_shl" ||
9057 Ctrl ==
"wave_shr" ||
9058 Ctrl ==
"wave_rol" ||
9059 Ctrl ==
"wave_ror" ||
9060 Ctrl ==
"row_bcast")
9063 return Ctrl ==
"row_mirror" ||
9064 Ctrl ==
"row_half_mirror" ||
9065 Ctrl ==
"quad_perm" ||
9066 Ctrl ==
"row_shl" ||
9067 Ctrl ==
"row_shr" ||
9072AMDGPUAsmParser::parseDPPCtrlPerm() {
9075 if (!skipToken(
AsmToken::LBrac,
"expected an opening square bracket"))
9079 for (
int i = 0; i < 4; ++i) {
9084 SMLoc Loc = getLoc();
9085 if (getParser().parseAbsoluteExpression(Temp))
9087 if (Temp < 0 || Temp > 3) {
9088 Error(Loc,
"expected a 2-bit value");
9092 Val += (Temp << i * 2);
9102AMDGPUAsmParser::parseDPPCtrlSel(
StringRef Ctrl) {
9103 using namespace AMDGPU::DPP;
9108 SMLoc Loc = getLoc();
9110 if (getParser().parseAbsoluteExpression(Val))
9113 struct DppCtrlCheck {
9120 .
Case(
"wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
9121 .Case(
"wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
9122 .Case(
"wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
9123 .Case(
"wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
9124 .Case(
"row_shl", {DppCtrl::ROW_SHL0, 1, 15})
9125 .Case(
"row_shr", {DppCtrl::ROW_SHR0, 1, 15})
9126 .Case(
"row_ror", {DppCtrl::ROW_ROR0, 1, 15})
9127 .Case(
"row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
9128 .Case(
"row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
9129 .Case(
"row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
9133 if (
Check.Ctrl == -1) {
9134 Valid = (
Ctrl ==
"row_bcast" && (Val == 15 || Val == 31));
9135 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
9150 using namespace AMDGPU::DPP;
9153 !isSupportedDPPCtrl(getTokenStr(),
Operands))
9162 if (Ctrl ==
"row_mirror") {
9163 Val = DppCtrl::ROW_MIRROR;
9164 }
else if (Ctrl ==
"row_half_mirror") {
9165 Val = DppCtrl::ROW_HALF_MIRROR;
9168 if (Ctrl ==
"quad_perm") {
9169 Val = parseDPPCtrlPerm();
9171 Val = parseDPPCtrlSel(Ctrl);
9180 AMDGPUOperand::CreateImm(
this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
9186 OptionalImmIndexMap OptionalIdx;
9196 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
9200 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9201 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
9205 for (
unsigned E =
Operands.size();
I != E; ++
I) {
9209 if (OldIdx == NumOperands) {
9211 constexpr int DST_IDX = 0;
9213 }
else if (Src2ModIdx == NumOperands) {
9224 bool IsVOP3CvtSrDpp =
9225 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9226 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9227 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9228 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
9229 if (IsVOP3CvtSrDpp) {
9243 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
9245 if (IsDPP8 &&
Op.isDppFI()) {
9248 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9249 }
else if (
Op.isReg()) {
9250 Op.addRegOperands(Inst, 1);
9251 }
else if (
Op.isImm() &&
9253 assert(!
Op.IsImmKindLiteral() &&
"Cannot use literal with DPP");
9254 Op.addImmOperands(Inst, 1);
9255 }
else if (
Op.isImm()) {
9256 OptionalIdx[
Op.getImmTy()] =
I;
9264 AMDGPUOperand::ImmTyByteSel);
9273 cvtVOP3P(Inst,
Operands, OptionalIdx);
9275 cvtVOP3OpSel(Inst,
Operands, OptionalIdx);
9292 AMDGPUOperand::ImmTyDppFI);
9297 OptionalImmIndexMap OptionalIdx;
9301 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9302 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
9306 for (
unsigned E =
Operands.size();
I != E; ++
I) {
9314 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
9316 if (
Op.isReg() && validateVccOperand(
Op.getReg())) {
9324 Op.addImmOperands(Inst, 1);
9326 Op.addRegWithFPInputModsOperands(Inst, 2);
9327 }
else if (
Op.isDppFI()) {
9329 }
else if (
Op.isReg()) {
9330 Op.addRegOperands(Inst, 1);
9336 Op.addRegWithFPInputModsOperands(Inst, 2);
9337 }
else if (
Op.isReg()) {
9338 Op.addRegOperands(Inst, 1);
9339 }
else if (
Op.isDPPCtrl()) {
9340 Op.addImmOperands(Inst, 1);
9341 }
else if (
Op.isImm()) {
9343 OptionalIdx[
Op.getImmTy()] =
I;
9359 AMDGPUOperand::ImmTyDppFI);
9370 AMDGPUOperand::ImmTy
Type) {
9383 .
Case(
"BYTE_0", SdwaSel::BYTE_0)
9384 .
Case(
"BYTE_1", SdwaSel::BYTE_1)
9385 .
Case(
"BYTE_2", SdwaSel::BYTE_2)
9386 .
Case(
"BYTE_3", SdwaSel::BYTE_3)
9387 .
Case(
"WORD_0", SdwaSel::WORD_0)
9388 .
Case(
"WORD_1", SdwaSel::WORD_1)
9389 .
Case(
"DWORD", SdwaSel::DWORD)
9392 if (
Int == 0xffffffff)
9393 return Error(StringLoc,
"invalid " +
Twine(Prefix) +
" value");
9412 .
Case(
"UNUSED_PAD", DstUnused::UNUSED_PAD)
9413 .
Case(
"UNUSED_SEXT", DstUnused::UNUSED_SEXT)
9414 .
Case(
"UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
9417 if (
Int == 0xffffffff)
9418 return Error(StringLoc,
"invalid dst_unused value");
9420 Operands.push_back(AMDGPUOperand::CreateImm(
this,
Int, S, AMDGPUOperand::ImmTySDWADstUnused));
9450 OptionalImmIndexMap OptionalIdx;
9451 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
9452 bool SkippedVcc =
false;
9456 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9457 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
9460 for (
unsigned E =
Operands.size();
I != E; ++
I) {
9461 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
9462 if (SkipVcc && !SkippedVcc &&
Op.isReg() &&
9463 (
Op.getReg() == AMDGPU::VCC ||
Op.getReg() == AMDGPU::VCC_LO)) {
9481 Op.addRegOrImmWithInputModsOperands(Inst, 2);
9482 }
else if (
Op.isImm()) {
9484 OptionalIdx[
Op.getImmTy()] =
I;
9492 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
9493 Opc != AMDGPU::V_NOP_sdwa_vi) {
9495 switch (BasicInstType) {
9499 AMDGPUOperand::ImmTyClampSI, 0);
9503 AMDGPUOperand::ImmTyOModSI, 0);
9507 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9511 AMDGPUOperand::ImmTySDWADstUnused,
9512 DstUnused::UNUSED_PRESERVE);
9537 llvm_unreachable(
"Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
9543 if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
9544 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
9545 auto it = Inst.
begin();
9558#define GET_REGISTER_MATCHER
9559#define GET_MATCHER_IMPLEMENTATION
9560#define GET_MNEMONIC_SPELL_CHECKER
9561#define GET_MNEMONIC_CHECKER
9562#include "AMDGPUGenAsmMatcher.inc"
9568 return parseTokenOp(
"addr64",
Operands);
9570 return parseTokenOp(
"done",
Operands);
9572 return parseTokenOp(
"idxen",
Operands);
9574 return parseTokenOp(
"lds",
Operands);
9576 return parseTokenOp(
"offen",
Operands);
9578 return parseTokenOp(
"off",
Operands);
9580 return parseTokenOp(
"row_en",
Operands);
9582 return parseNamedBit(
"gds",
Operands, AMDGPUOperand::ImmTyGDS);
9584 return parseNamedBit(
"tfe",
Operands, AMDGPUOperand::ImmTyTFE);
9586 return tryCustomParseOperand(
Operands, MCK);
9597 AMDGPUOperand &Operand = (AMDGPUOperand&)
Op;
9600 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
9602 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
9604 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
9606 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
9608 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
9610 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
9618 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
9620 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
9621 case MCK_SOPPBrTarget:
9622 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
9623 case MCK_VReg32OrOff:
9624 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
9625 case MCK_InterpSlot:
9626 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
9627 case MCK_InterpAttr:
9628 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
9629 case MCK_InterpAttrChan:
9630 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
9632 case MCK_SReg_64_XEXEC:
9638 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
9640 return Match_InvalidOperand;
9652 if (!parseExpr(Imm)) {
9657 if (!isUInt<16>(Imm))
9658 return Error(S,
"expected a 16-bit value");
9661 AMDGPUOperand::CreateImm(
this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
9665bool AMDGPUOperand::isEndpgm()
const {
return isImmTy(ImmTyEndpgm); }
9671bool AMDGPUOperand::isWaitVDST()
const {
return isImmTy(ImmTyWaitVDST); }
9673bool AMDGPUOperand::isWaitVAVDst()
const {
return isImmTy(ImmTyWaitVAVDst); }
9675bool AMDGPUOperand::isWaitVMVSrc()
const {
return isImmTy(ImmTyWaitVMVSrc); }
9681bool AMDGPUOperand::isWaitEXP()
const {
return isImmTy(ImmTyWaitEXP); }
9687bool AMDGPUOperand::isSplitBarrier()
const {
return isInlinableImm(MVT::i32); }
unsigned const MachineRegisterInfo * MRI
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static unsigned getSpecialRegForName(StringRef RegName)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, unsigned OpName)
static bool IsRevOpcode(const unsigned Opcode)
static int getRegClass(RegisterKind Is, unsigned RegWidth)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, const MCRegisterInfo *MRI)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
#define LLVM_EXTERNAL_VISIBILITY
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static unsigned getOperandSize(MachineInstr &MI, unsigned Idx, MachineRegisterInfo &MRI)
static llvm::Expected< InlineInfo > decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
#define G_00B848_FWD_PROGRESS(x)
#define G_00B848_MEM_ORDERED(x)
#define G_00B848_IEEE_MODE(x)
#define G_00B848_DX10_CLAMP(x)
#define G_00B848_WGP_MODE(x)
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
unsigned unsigned DefaultVal
This file implements the SmallBitVector class.
StringSet - A set-like wrapper for the StringMap.
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
support::ulittle16_t & Lo
support::ulittle16_t & Hi
static const AMDGPUVariadicMCExpr * create(VariadicKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
Target independent representation for an assembler token.
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
bool is(TokenKind K) const
TokenKind getKind() const
This class represents an Operation in the Expression.
Base class for user error types.
Lightweight error class with error context and mandatory checking.
Tagged union holding either a T or a Error.
Class representing an expression and its matching format.
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
MCAsmParser & getParser()
Generic assembler parser interface, for use by target specific assembly parsers.
virtual MCStreamer & getStreamer()=0
Return the output streamer for the assembler.
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Context object for machine code objects.
const MCRegisterInfo * getRegisterInfo() const
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
const MCSubtargetInfo * getSubtargetInfo() const
Base class for the full range of assembler expressions which are needed for parsing.
Instances of this class represent a single low-level machine instruction.
unsigned getNumOperands() const
unsigned getOpcode() const
iterator insert(iterator I, const MCOperand &Op)
void addOperand(const MCOperand Op)
const MCOperand & getOperand(unsigned i) const
Describe properties that are true of each instruction in the target description file.
bool mayStore() const
Return true if this instruction could possibly modify memory.
Interface to description of machine instruction set.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Instances of this class represent operands of the MCInst class.
static MCOperand createReg(unsigned Reg)
static MCOperand createExpr(const MCExpr *Val)
void setReg(unsigned Reg)
Set the register number.
static MCOperand createImm(int64_t Val)
unsigned getReg() const
Returns the register number.
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
virtual bool isReg() const =0
isReg - Is this a register operand?
virtual bool isMem() const =0
isMem - Is this a memory operand?
virtual MCRegister getReg() const =0
virtual bool isToken() const =0
isToken - Is this a token operand?
virtual bool isImm() const =0
isImm - Is this an immediate operand?
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
unsigned getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
static constexpr unsigned NoRegister
Streaming machine code generation interface.
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
MCTargetStreamer * getTargetStreamer()
Generic base class for all target subtargets.
const FeatureBitset & getFeatureBits() const
FeatureBitset ToggleFeature(uint64_t FB)
Toggle a feature and return the re-computed feature bits.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
void setVariableValue(const MCExpr *Value)
MCTargetAsmParser - Generic interface to target specific assembly parsers.
MCSubtargetInfo & copySTI()
Create a copy of STI and return a non-const reference to it.
virtual bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
virtual bool ParseDirective(AsmToken DirectiveID)
ParseDirective - Parse a target specific assembler directive This method is deprecated,...
virtual ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
tryParseRegister - parse one register if possible
void setAvailableFeatures(const FeatureBitset &Value)
const MCSubtargetInfo & getSTI() const
virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind)
Allow a target to add special case operand matching for things that tblgen doesn't/can't handle effec...
virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands)=0
ParseInstruction - Parse one assembly instruction.
virtual unsigned checkTargetMatchPredicate(MCInst &Inst)
checkTargetMatchPredicate - Validate the instruction match against any complex target predicates not ...
virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm)=0
MatchAndEmitInstruction - Recognize a series of operands of a parsed instruction as an actual MCInst ...
Target specific streamer interface.
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
Wrapper class representing virtual and physical registers.
Represents a location in source code.
static SMLoc getFromPointer(const char *Ptr)
constexpr const char * getPointer() const
constexpr bool isValid() const
Represents a range in source code.
Implements a dense probed hash-table based set with some number of buckets stored inline.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringMapEntry - This is used to represent one value that is inserted into a StringMap.
StringRef - Represent a constant reference to a string, i.e.
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
empty - Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
constexpr size_t size() const
size - Get the string size.
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
StringSet - A wrapper for StringMap that provides set-like functionality.
bool contains(StringRef key) const
Check if the set contains the given key.
std::pair< typename Base::iterator, bool > insert(StringRef key)
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char NumVGPRs[]
Key for Kernel::CodeProps::Metadata::mNumVGPRs.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
const CustomOperand< const MCSubtargetInfo & > Opr[]
int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI)
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
int64_t getMsgOpId(int64_t MsgId, const StringRef Name)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
const CustomOperand< const MCSubtargetInfo & > Msg[]
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
constexpr unsigned COMPONENTS[]
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READNONE bool isLegalDPALU_DPPControl(unsigned DC)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
unsigned mc2PseudoReg(unsigned Reg)
Convert hardware register Reg to a pseudo register.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isDPALU_DPP(const MCInstrDesc &OpDesc)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
bool isGFX9(const MCSubtargetInfo &STI)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
bool hasMAIInsts(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool isSGPR(unsigned Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
bool isHi(unsigned Reg, const MCRegisterInfo &MRI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_V2INT32
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_INLINE_AC_V2FP16
@ OPERAND_REG_IMM_INT32
Operands with register or 32-bit immediate.
@ OPERAND_REG_IMM_BF16_DEFERRED
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_AC_BF16
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_INLINE_AC_INT16
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_V2INT16
@ OPERAND_REG_INLINE_AC_FP16
@ OPERAND_REG_INLINE_AC_INT32
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_INLINE_AC_V2BF16
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_REG_INLINE_C_V2FP32
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
@ OPERAND_REG_IMM_FP32_DEFERRED
@ OPERAND_REG_IMM_FP16_DEFERRED
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isVI(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ UNDEF
UNDEF - An undefined node.
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Reg
All possible values of the reg field in the ModR/M byte.
std::optional< const char * > toString(const std::optional< DWARFFormValue > &V)
Take an optional DWARFFormValue and try to extract a string value from it.
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
This is an optimization pass for GlobalISel generic memory operations.
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
int popcount(T Value) noexcept
Count the number of set bits in a value.
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
void PrintError(const Twine &Msg)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Target & getTheR600Target()
The target for R600 GPUs.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Target & getTheGCNTarget()
The target for GCN GPUs.
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool parseAmdKernelCodeField(StringRef ID, MCAsmParser &Parser, amd_kernel_code_t &C, raw_ostream &Err)
AMD Kernel Code Object (amd_kernel_code_t).
Instruction set architecture version.
const MCExpr * compute_pgm_rsrc2
const MCExpr * kernarg_size
const MCExpr * kernarg_preload
const MCExpr * compute_pgm_rsrc3
const MCExpr * private_segment_fixed_size
const MCExpr * compute_pgm_rsrc1
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
const MCExpr * group_segment_fixed_size
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
const MCExpr * kernel_code_properties
Represents the counter values to wait for in an s_waitcnt instruction.
static const fltSemantics & IEEEsingle() LLVM_READNONE
static const fltSemantics & IEEEhalf() LLVM_READNONE
static const fltSemantics & BFloat() LLVM_READNONE
opStatus
IEEE-754R 7: Default exception handling.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Description of the encoding of one expression Op.
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...
uint32_t group_segment_fixed_size
uint32_t private_segment_fixed_size