53enum RegisterKind { IS_UNKNOWN,
IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
67 SMLoc StartLoc, EndLoc;
68 const AMDGPUAsmParser *AsmParser;
71 AMDGPUOperand(KindTy Kind_,
const AMDGPUAsmParser *AsmParser_)
72 :
Kind(Kind_), AsmParser(AsmParser_) {}
74 using Ptr = std::unique_ptr<AMDGPUOperand>;
82 bool hasFPModifiers()
const {
return Abs || Neg; }
83 bool hasIntModifiers()
const {
return Sext; }
84 bool hasModifiers()
const {
return hasFPModifiers() || hasIntModifiers(); }
86 int64_t getFPModifiersOperand()
const {
93 int64_t getIntModifiersOperand()
const {
99 int64_t getModifiersOperand()
const {
100 assert(!(hasFPModifiers() && hasIntModifiers())
101 &&
"fp and int modifiers should not be used simultaneously");
102 if (hasFPModifiers()) {
103 return getFPModifiersOperand();
104 }
else if (hasIntModifiers()) {
105 return getIntModifiersOperand();
187 ImmKindTyMandatoryLiteral,
201 mutable ImmKindTy
Kind;
218 bool isToken()
const override {
return Kind == Token; }
220 bool isSymbolRefExpr()
const {
221 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
224 bool isImm()
const override {
225 return Kind == Immediate;
228 void setImmKindNone()
const {
230 Imm.Kind = ImmKindTyNone;
233 void setImmKindLiteral()
const {
235 Imm.Kind = ImmKindTyLiteral;
238 void setImmKindMandatoryLiteral()
const {
240 Imm.Kind = ImmKindTyMandatoryLiteral;
243 void setImmKindConst()
const {
245 Imm.Kind = ImmKindTyConst;
248 bool IsImmKindLiteral()
const {
249 return isImm() &&
Imm.Kind == ImmKindTyLiteral;
252 bool IsImmKindMandatoryLiteral()
const {
253 return isImm() &&
Imm.Kind == ImmKindTyMandatoryLiteral;
256 bool isImmKindConst()
const {
257 return isImm() &&
Imm.Kind == ImmKindTyConst;
260 bool isInlinableImm(
MVT type)
const;
261 bool isLiteralImm(
MVT type)
const;
263 bool isRegKind()
const {
267 bool isReg()
const override {
268 return isRegKind() && !hasModifiers();
271 bool isRegOrInline(
unsigned RCID,
MVT type)
const {
272 return isRegClass(RCID) || isInlinableImm(type);
276 return isRegOrInline(RCID, type) || isLiteralImm(type);
279 bool isRegOrImmWithInt16InputMods()
const {
283 bool isRegOrImmWithIntT16InputMods()
const {
287 bool isRegOrImmWithInt32InputMods()
const {
291 bool isRegOrInlineImmWithInt16InputMods()
const {
292 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
295 bool isRegOrInlineImmWithInt32InputMods()
const {
296 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
299 bool isRegOrImmWithInt64InputMods()
const {
303 bool isRegOrImmWithFP16InputMods()
const {
307 bool isRegOrImmWithFPT16InputMods()
const {
311 bool isRegOrImmWithFP32InputMods()
const {
315 bool isRegOrImmWithFP64InputMods()
const {
319 template <
bool IsFake16>
bool isRegOrInlineImmWithFP16InputMods()
const {
320 return isRegOrInline(
321 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
324 bool isRegOrInlineImmWithFP32InputMods()
const {
325 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
328 bool isPackedFP16InputMods()
const {
332 bool isVReg()
const {
333 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
334 isRegClass(AMDGPU::VReg_64RegClassID) ||
335 isRegClass(AMDGPU::VReg_96RegClassID) ||
336 isRegClass(AMDGPU::VReg_128RegClassID) ||
337 isRegClass(AMDGPU::VReg_160RegClassID) ||
338 isRegClass(AMDGPU::VReg_192RegClassID) ||
339 isRegClass(AMDGPU::VReg_256RegClassID) ||
340 isRegClass(AMDGPU::VReg_512RegClassID) ||
341 isRegClass(AMDGPU::VReg_1024RegClassID);
344 bool isVReg32()
const {
345 return isRegClass(AMDGPU::VGPR_32RegClassID);
348 bool isVReg32OrOff()
const {
349 return isOff() || isVReg32();
352 bool isNull()
const {
353 return isRegKind() &&
getReg() == AMDGPU::SGPR_NULL;
356 bool isVRegWithInputMods()
const;
357 template <
bool IsFake16>
bool isT16VRegWithInputMods()
const;
359 bool isSDWAOperand(
MVT type)
const;
360 bool isSDWAFP16Operand()
const;
361 bool isSDWAFP32Operand()
const;
362 bool isSDWAInt16Operand()
const;
363 bool isSDWAInt32Operand()
const;
365 bool isImmTy(ImmTy ImmT)
const {
369 template <ImmTy Ty>
bool isImmTy()
const {
return isImmTy(Ty); }
371 bool isImmLiteral()
const {
return isImmTy(ImmTyNone); }
373 bool isImmModifier()
const {
374 return isImm() &&
Imm.Type != ImmTyNone;
377 bool isOModSI()
const {
return isImmTy(ImmTyOModSI); }
378 bool isDMask()
const {
return isImmTy(ImmTyDMask); }
379 bool isDim()
const {
return isImmTy(ImmTyDim); }
380 bool isR128A16()
const {
return isImmTy(ImmTyR128A16); }
381 bool isOff()
const {
return isImmTy(ImmTyOff); }
382 bool isExpTgt()
const {
return isImmTy(ImmTyExpTgt); }
383 bool isOffen()
const {
return isImmTy(ImmTyOffen); }
384 bool isIdxen()
const {
return isImmTy(ImmTyIdxen); }
385 bool isAddr64()
const {
return isImmTy(ImmTyAddr64); }
386 bool isOffset()
const {
return isImmTy(ImmTyOffset); }
387 bool isOffset0()
const {
return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
388 bool isOffset1()
const {
return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
389 bool isSMEMOffsetMod()
const {
return isImmTy(ImmTySMEMOffsetMod); }
390 bool isFlatOffset()
const {
return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
391 bool isGDS()
const {
return isImmTy(ImmTyGDS); }
392 bool isLDS()
const {
return isImmTy(ImmTyLDS); }
393 bool isCPol()
const {
return isImmTy(ImmTyCPol); }
394 bool isIndexKey8bit()
const {
return isImmTy(ImmTyIndexKey8bit); }
395 bool isIndexKey16bit()
const {
return isImmTy(ImmTyIndexKey16bit); }
396 bool isTFE()
const {
return isImmTy(ImmTyTFE); }
397 bool isFORMAT()
const {
return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
398 bool isDppBankMask()
const {
return isImmTy(ImmTyDppBankMask); }
399 bool isDppRowMask()
const {
return isImmTy(ImmTyDppRowMask); }
400 bool isDppBoundCtrl()
const {
return isImmTy(ImmTyDppBoundCtrl); }
401 bool isDppFI()
const {
return isImmTy(ImmTyDppFI); }
402 bool isSDWADstSel()
const {
return isImmTy(ImmTySDWADstSel); }
403 bool isSDWASrc0Sel()
const {
return isImmTy(ImmTySDWASrc0Sel); }
404 bool isSDWASrc1Sel()
const {
return isImmTy(ImmTySDWASrc1Sel); }
405 bool isSDWADstUnused()
const {
return isImmTy(ImmTySDWADstUnused); }
406 bool isInterpSlot()
const {
return isImmTy(ImmTyInterpSlot); }
407 bool isInterpAttr()
const {
return isImmTy(ImmTyInterpAttr); }
408 bool isInterpAttrChan()
const {
return isImmTy(ImmTyInterpAttrChan); }
409 bool isOpSel()
const {
return isImmTy(ImmTyOpSel); }
410 bool isOpSelHi()
const {
return isImmTy(ImmTyOpSelHi); }
411 bool isNegLo()
const {
return isImmTy(ImmTyNegLo); }
412 bool isNegHi()
const {
return isImmTy(ImmTyNegHi); }
414 bool isRegOrImm()
const {
418 bool isRegClass(
unsigned RCID)
const;
422 bool isRegOrInlineNoMods(
unsigned RCID,
MVT type)
const {
423 return isRegOrInline(RCID, type) && !hasModifiers();
426 bool isSCSrcB16()
const {
427 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
430 bool isSCSrcV2B16()
const {
434 bool isSCSrc_b32()
const {
435 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
438 bool isSCSrc_b64()
const {
439 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
442 bool isBoolReg()
const;
444 bool isSCSrcF16()
const {
445 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
448 bool isSCSrcV2F16()
const {
452 bool isSCSrcF32()
const {
453 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
456 bool isSCSrcF64()
const {
457 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
460 bool isSSrc_b32()
const {
461 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
464 bool isSSrc_b16()
const {
return isSCSrcB16() || isLiteralImm(MVT::i16); }
466 bool isSSrcV2B16()
const {
471 bool isSSrc_b64()
const {
474 return isSCSrc_b64() || isLiteralImm(MVT::i64);
477 bool isSSrc_f32()
const {
478 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
481 bool isSSrcF64()
const {
return isSCSrc_b64() || isLiteralImm(MVT::f64); }
483 bool isSSrc_bf16()
const {
return isSCSrcB16() || isLiteralImm(MVT::bf16); }
485 bool isSSrc_f16()
const {
return isSCSrcB16() || isLiteralImm(MVT::f16); }
487 bool isSSrcV2F16()
const {
492 bool isSSrcV2FP32()
const {
497 bool isSCSrcV2FP32()
const {
502 bool isSSrcV2INT32()
const {
507 bool isSCSrcV2INT32()
const {
509 return isSCSrc_b32();
512 bool isSSrcOrLds_b32()
const {
513 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
514 isLiteralImm(MVT::i32) || isExpr();
517 bool isVCSrc_b32()
const {
518 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
521 bool isVCSrcB64()
const {
522 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
525 bool isVCSrcTB16()
const {
526 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
529 bool isVCSrcTB16_Lo128()
const {
530 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
533 bool isVCSrcFake16B16_Lo128()
const {
534 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
537 bool isVCSrc_b16()
const {
538 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
541 bool isVCSrc_v2b16()
const {
return isVCSrc_b16(); }
543 bool isVCSrc_f32()
const {
544 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
547 bool isVCSrcF64()
const {
548 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
551 bool isVCSrcTBF16()
const {
552 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
555 bool isVCSrcTF16()
const {
556 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
559 bool isVCSrcTBF16_Lo128()
const {
560 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
563 bool isVCSrcTF16_Lo128()
const {
564 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
567 bool isVCSrcFake16BF16_Lo128()
const {
568 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
571 bool isVCSrcFake16F16_Lo128()
const {
572 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
575 bool isVCSrc_bf16()
const {
576 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
579 bool isVCSrc_f16()
const {
580 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
583 bool isVCSrc_v2bf16()
const {
return isVCSrc_bf16(); }
585 bool isVCSrc_v2f16()
const {
return isVCSrc_f16(); }
587 bool isVSrc_b32()
const {
588 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
591 bool isVSrc_b64()
const {
return isVCSrcF64() || isLiteralImm(MVT::i64); }
593 bool isVSrcT_b16()
const {
return isVCSrcTB16() || isLiteralImm(MVT::i16); }
595 bool isVSrcT_b16_Lo128()
const {
596 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
599 bool isVSrcFake16_b16_Lo128()
const {
600 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
603 bool isVSrc_b16()
const {
return isVCSrc_b16() || isLiteralImm(MVT::i16); }
605 bool isVSrc_v2b16()
const {
return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
607 bool isVCSrcV2FP32()
const {
611 bool isVSrc_v2f32()
const {
return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
613 bool isVCSrcV2INT32()
const {
617 bool isVSrc_v2b32()
const {
return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
619 bool isVSrc_f32()
const {
620 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
623 bool isVSrc_f64()
const {
return isVCSrcF64() || isLiteralImm(MVT::f64); }
625 bool isVSrcT_bf16()
const {
return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
627 bool isVSrcT_f16()
const {
return isVCSrcTF16() || isLiteralImm(MVT::f16); }
629 bool isVSrcT_bf16_Lo128()
const {
630 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
633 bool isVSrcT_f16_Lo128()
const {
634 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
637 bool isVSrcFake16_bf16_Lo128()
const {
638 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
641 bool isVSrcFake16_f16_Lo128()
const {
642 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
645 bool isVSrc_bf16()
const {
return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
647 bool isVSrc_f16()
const {
return isVCSrc_f16() || isLiteralImm(MVT::f16); }
649 bool isVSrc_v2bf16()
const {
650 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
653 bool isVSrc_v2f16()
const {
return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
655 bool isVISrcB32()
const {
656 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
659 bool isVISrcB16()
const {
660 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
663 bool isVISrcV2B16()
const {
667 bool isVISrcF32()
const {
668 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
671 bool isVISrcF16()
const {
672 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
675 bool isVISrcV2F16()
const {
676 return isVISrcF16() || isVISrcB32();
679 bool isVISrc_64_bf16()
const {
680 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
683 bool isVISrc_64_f16()
const {
684 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
687 bool isVISrc_64_b32()
const {
688 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
691 bool isVISrc_64B64()
const {
692 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
695 bool isVISrc_64_f64()
const {
696 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
699 bool isVISrc_64V2FP32()
const {
700 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
703 bool isVISrc_64V2INT32()
const {
704 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
707 bool isVISrc_256_b32()
const {
708 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
711 bool isVISrc_256_f32()
const {
712 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
715 bool isVISrc_256B64()
const {
716 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
719 bool isVISrc_256_f64()
const {
720 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
723 bool isVISrc_128B16()
const {
724 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
727 bool isVISrc_128V2B16()
const {
728 return isVISrc_128B16();
731 bool isVISrc_128_b32()
const {
732 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
735 bool isVISrc_128_f32()
const {
736 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
739 bool isVISrc_256V2FP32()
const {
740 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
743 bool isVISrc_256V2INT32()
const {
744 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
747 bool isVISrc_512_b32()
const {
748 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
751 bool isVISrc_512B16()
const {
752 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
755 bool isVISrc_512V2B16()
const {
756 return isVISrc_512B16();
759 bool isVISrc_512_f32()
const {
760 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
763 bool isVISrc_512F16()
const {
764 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
767 bool isVISrc_512V2F16()
const {
768 return isVISrc_512F16() || isVISrc_512_b32();
771 bool isVISrc_1024_b32()
const {
772 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
775 bool isVISrc_1024B16()
const {
776 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
779 bool isVISrc_1024V2B16()
const {
780 return isVISrc_1024B16();
783 bool isVISrc_1024_f32()
const {
784 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
787 bool isVISrc_1024F16()
const {
788 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
791 bool isVISrc_1024V2F16()
const {
792 return isVISrc_1024F16() || isVISrc_1024_b32();
795 bool isAISrcB32()
const {
796 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
799 bool isAISrcB16()
const {
800 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
803 bool isAISrcV2B16()
const {
807 bool isAISrcF32()
const {
808 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
811 bool isAISrcF16()
const {
812 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
815 bool isAISrcV2F16()
const {
816 return isAISrcF16() || isAISrcB32();
819 bool isAISrc_64B64()
const {
820 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
823 bool isAISrc_64_f64()
const {
824 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
827 bool isAISrc_128_b32()
const {
828 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
831 bool isAISrc_128B16()
const {
832 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
835 bool isAISrc_128V2B16()
const {
836 return isAISrc_128B16();
839 bool isAISrc_128_f32()
const {
840 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
843 bool isAISrc_128F16()
const {
844 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
847 bool isAISrc_128V2F16()
const {
848 return isAISrc_128F16() || isAISrc_128_b32();
851 bool isVISrc_128_bf16()
const {
852 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
855 bool isVISrc_128_f16()
const {
856 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
859 bool isVISrc_128V2F16()
const {
860 return isVISrc_128_f16() || isVISrc_128_b32();
863 bool isAISrc_256B64()
const {
864 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
867 bool isAISrc_256_f64()
const {
868 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
871 bool isAISrc_512_b32()
const {
872 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
875 bool isAISrc_512B16()
const {
876 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
879 bool isAISrc_512V2B16()
const {
880 return isAISrc_512B16();
883 bool isAISrc_512_f32()
const {
884 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
887 bool isAISrc_512F16()
const {
888 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
891 bool isAISrc_512V2F16()
const {
892 return isAISrc_512F16() || isAISrc_512_b32();
895 bool isAISrc_1024_b32()
const {
896 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
899 bool isAISrc_1024B16()
const {
900 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
903 bool isAISrc_1024V2B16()
const {
904 return isAISrc_1024B16();
907 bool isAISrc_1024_f32()
const {
908 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
911 bool isAISrc_1024F16()
const {
912 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
915 bool isAISrc_1024V2F16()
const {
916 return isAISrc_1024F16() || isAISrc_1024_b32();
919 bool isKImmFP32()
const {
920 return isLiteralImm(MVT::f32);
923 bool isKImmFP16()
const {
924 return isLiteralImm(MVT::f16);
927 bool isMem()
const override {
931 bool isExpr()
const {
935 bool isSOPPBrTarget()
const {
return isExpr() ||
isImm(); }
937 bool isSWaitCnt()
const;
938 bool isDepCtr()
const;
939 bool isSDelayALU()
const;
940 bool isHwreg()
const;
941 bool isSendMsg()
const;
942 bool isSplitBarrier()
const;
943 bool isSwizzle()
const;
944 bool isSMRDOffset8()
const;
945 bool isSMEMOffset()
const;
946 bool isSMRDLiteralOffset()
const;
948 bool isDPPCtrl()
const;
952 bool isGPRIdxMode()
const;
953 bool isS16Imm()
const;
954 bool isU16Imm()
const;
955 bool isEndpgm()
const;
956 bool isWaitVDST()
const;
957 bool isWaitEXP()
const;
958 bool isWaitVAVDst()
const;
959 bool isWaitVMVSrc()
const;
961 auto getPredicate(std::function<
bool(
const AMDGPUOperand &
Op)>
P)
const {
962 return std::bind(
P, *
this);
970 int64_t getImm()
const {
975 void setImm(int64_t Val) {
980 ImmTy getImmTy()
const {
990 SMLoc getStartLoc()
const override {
994 SMLoc getEndLoc()
const override {
999 return SMRange(StartLoc, EndLoc);
1002 Modifiers getModifiers()
const {
1003 assert(isRegKind() || isImmTy(ImmTyNone));
1004 return isRegKind() ?
Reg.Mods :
Imm.Mods;
1007 void setModifiers(Modifiers Mods) {
1008 assert(isRegKind() || isImmTy(ImmTyNone));
1015 bool hasModifiers()
const {
1016 return getModifiers().hasModifiers();
1019 bool hasFPModifiers()
const {
1020 return getModifiers().hasFPModifiers();
1023 bool hasIntModifiers()
const {
1024 return getModifiers().hasIntModifiers();
1029 void addImmOperands(
MCInst &Inst,
unsigned N,
bool ApplyModifiers =
true)
const;
1031 void addLiteralImmOperand(
MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const;
1033 void addRegOperands(
MCInst &Inst,
unsigned N)
const;
1035 void addRegOrImmOperands(
MCInst &Inst,
unsigned N)
const {
1037 addRegOperands(Inst,
N);
1039 addImmOperands(Inst,
N);
1042 void addRegOrImmWithInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1043 Modifiers Mods = getModifiers();
1046 addRegOperands(Inst,
N);
1048 addImmOperands(Inst,
N,
false);
1052 void addRegOrImmWithFPInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1053 assert(!hasIntModifiers());
1054 addRegOrImmWithInputModsOperands(Inst,
N);
1057 void addRegOrImmWithIntInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1058 assert(!hasFPModifiers());
1059 addRegOrImmWithInputModsOperands(Inst,
N);
1062 void addRegWithInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1063 Modifiers Mods = getModifiers();
1066 addRegOperands(Inst,
N);
1069 void addRegWithFPInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1070 assert(!hasIntModifiers());
1071 addRegWithInputModsOperands(Inst,
N);
1074 void addRegWithIntInputModsOperands(
MCInst &Inst,
unsigned N)
const {
1075 assert(!hasFPModifiers());
1076 addRegWithInputModsOperands(Inst,
N);
1082 case ImmTyNone:
OS <<
"None";
break;
1083 case ImmTyGDS:
OS <<
"GDS";
break;
1084 case ImmTyLDS:
OS <<
"LDS";
break;
1085 case ImmTyOffen:
OS <<
"Offen";
break;
1086 case ImmTyIdxen:
OS <<
"Idxen";
break;
1087 case ImmTyAddr64:
OS <<
"Addr64";
break;
1088 case ImmTyOffset:
OS <<
"Offset";
break;
1089 case ImmTyInstOffset:
OS <<
"InstOffset";
break;
1090 case ImmTyOffset0:
OS <<
"Offset0";
break;
1091 case ImmTyOffset1:
OS <<
"Offset1";
break;
1092 case ImmTySMEMOffsetMod:
OS <<
"SMEMOffsetMod";
break;
1093 case ImmTyCPol:
OS <<
"CPol";
break;
1094 case ImmTyIndexKey8bit:
OS <<
"index_key";
break;
1095 case ImmTyIndexKey16bit:
OS <<
"index_key";
break;
1096 case ImmTyTFE:
OS <<
"TFE";
break;
1097 case ImmTyD16:
OS <<
"D16";
break;
1098 case ImmTyFORMAT:
OS <<
"FORMAT";
break;
1099 case ImmTyClampSI:
OS <<
"ClampSI";
break;
1100 case ImmTyOModSI:
OS <<
"OModSI";
break;
1101 case ImmTyDPP8:
OS <<
"DPP8";
break;
1102 case ImmTyDppCtrl:
OS <<
"DppCtrl";
break;
1103 case ImmTyDppRowMask:
OS <<
"DppRowMask";
break;
1104 case ImmTyDppBankMask:
OS <<
"DppBankMask";
break;
1105 case ImmTyDppBoundCtrl:
OS <<
"DppBoundCtrl";
break;
1106 case ImmTyDppFI:
OS <<
"DppFI";
break;
1107 case ImmTySDWADstSel:
OS <<
"SDWADstSel";
break;
1108 case ImmTySDWASrc0Sel:
OS <<
"SDWASrc0Sel";
break;
1109 case ImmTySDWASrc1Sel:
OS <<
"SDWASrc1Sel";
break;
1110 case ImmTySDWADstUnused:
OS <<
"SDWADstUnused";
break;
1111 case ImmTyDMask:
OS <<
"DMask";
break;
1112 case ImmTyDim:
OS <<
"Dim";
break;
1113 case ImmTyUNorm:
OS <<
"UNorm";
break;
1114 case ImmTyDA:
OS <<
"DA";
break;
1115 case ImmTyR128A16:
OS <<
"R128A16";
break;
1116 case ImmTyA16:
OS <<
"A16";
break;
1117 case ImmTyLWE:
OS <<
"LWE";
break;
1118 case ImmTyOff:
OS <<
"Off";
break;
1119 case ImmTyExpTgt:
OS <<
"ExpTgt";
break;
1120 case ImmTyExpCompr:
OS <<
"ExpCompr";
break;
1121 case ImmTyExpVM:
OS <<
"ExpVM";
break;
1122 case ImmTyHwreg:
OS <<
"Hwreg";
break;
1123 case ImmTySendMsg:
OS <<
"SendMsg";
break;
1124 case ImmTyInterpSlot:
OS <<
"InterpSlot";
break;
1125 case ImmTyInterpAttr:
OS <<
"InterpAttr";
break;
1126 case ImmTyInterpAttrChan:
OS <<
"InterpAttrChan";
break;
1127 case ImmTyOpSel:
OS <<
"OpSel";
break;
1128 case ImmTyOpSelHi:
OS <<
"OpSelHi";
break;
1129 case ImmTyNegLo:
OS <<
"NegLo";
break;
1130 case ImmTyNegHi:
OS <<
"NegHi";
break;
1131 case ImmTySwizzle:
OS <<
"Swizzle";
break;
1132 case ImmTyGprIdxMode:
OS <<
"GprIdxMode";
break;
1133 case ImmTyHigh:
OS <<
"High";
break;
1134 case ImmTyBLGP:
OS <<
"BLGP";
break;
1135 case ImmTyCBSZ:
OS <<
"CBSZ";
break;
1136 case ImmTyABID:
OS <<
"ABID";
break;
1137 case ImmTyEndpgm:
OS <<
"Endpgm";
break;
1138 case ImmTyWaitVDST:
OS <<
"WaitVDST";
break;
1139 case ImmTyWaitEXP:
OS <<
"WaitEXP";
break;
1140 case ImmTyWaitVAVDst:
OS <<
"WaitVAVDst";
break;
1141 case ImmTyWaitVMVSrc:
OS <<
"WaitVMVSrc";
break;
1149 OS <<
"<register " <<
getReg() <<
" mods: " <<
Reg.Mods <<
'>';
1152 OS <<
'<' << getImm();
1153 if (getImmTy() != ImmTyNone) {
1154 OS <<
" type: "; printImmTy(
OS, getImmTy());
1156 OS <<
" mods: " <<
Imm.Mods <<
'>';
1159 OS <<
'\'' << getToken() <<
'\'';
1162 OS <<
"<expr " << *Expr <<
'>';
1167 static AMDGPUOperand::Ptr CreateImm(
const AMDGPUAsmParser *AsmParser,
1168 int64_t Val,
SMLoc Loc,
1169 ImmTy
Type = ImmTyNone,
1170 bool IsFPImm =
false) {
1171 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1173 Op->Imm.IsFPImm = IsFPImm;
1174 Op->Imm.Kind = ImmKindTyNone;
1176 Op->Imm.Mods = Modifiers();
1182 static AMDGPUOperand::Ptr CreateToken(
const AMDGPUAsmParser *AsmParser,
1184 bool HasExplicitEncodingSize =
true) {
1185 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1186 Res->Tok.Data = Str.data();
1187 Res->Tok.Length = Str.size();
1188 Res->StartLoc = Loc;
1193 static AMDGPUOperand::Ptr CreateReg(
const AMDGPUAsmParser *AsmParser,
1194 unsigned RegNo,
SMLoc S,
1196 auto Op = std::make_unique<AMDGPUOperand>(
Register, AsmParser);
1197 Op->Reg.RegNo = RegNo;
1198 Op->Reg.Mods = Modifiers();
1204 static AMDGPUOperand::Ptr CreateExpr(
const AMDGPUAsmParser *AsmParser,
1206 auto Op = std::make_unique<AMDGPUOperand>(
Expression, AsmParser);
1215 OS <<
"abs:" << Mods.Abs <<
" neg: " << Mods.Neg <<
" sext:" << Mods.Sext;
1226class KernelScopeInfo {
1227 int SgprIndexUnusedMin = -1;
1228 int VgprIndexUnusedMin = -1;
1229 int AgprIndexUnusedMin = -1;
1233 void usesSgprAt(
int i) {
1234 if (i >= SgprIndexUnusedMin) {
1235 SgprIndexUnusedMin = ++i;
1244 void usesVgprAt(
int i) {
1245 if (i >= VgprIndexUnusedMin) {
1246 VgprIndexUnusedMin = ++i;
1251 VgprIndexUnusedMin);
1257 void usesAgprAt(
int i) {
1262 if (i >= AgprIndexUnusedMin) {
1263 AgprIndexUnusedMin = ++i;
1273 VgprIndexUnusedMin);
1280 KernelScopeInfo() =
default;
1286 usesSgprAt(SgprIndexUnusedMin = -1);
1287 usesVgprAt(VgprIndexUnusedMin = -1);
1289 usesAgprAt(AgprIndexUnusedMin = -1);
1293 void usesRegister(RegisterKind RegKind,
unsigned DwordRegIndex,
1294 unsigned RegWidth) {
1297 usesSgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1300 usesAgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1303 usesVgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1314 unsigned ForcedEncodingSize = 0;
1315 bool ForcedDPP =
false;
1316 bool ForcedSDWA =
false;
1317 KernelScopeInfo KernelScope;
1322#define GET_ASSEMBLER_HEADER
1323#include "AMDGPUGenAsmMatcher.inc"
1328 bool ParseAsAbsoluteExpression(
uint32_t &Ret);
1329 bool OutOfRangeError(
SMRange Range);
1345 bool calculateGPRBlocks(
const FeatureBitset &Features,
bool VCCUsed,
1346 bool FlatScrUsed,
bool XNACKUsed,
1347 std::optional<bool> EnableWavefrontSize32,
1348 unsigned NextFreeVGPR,
SMRange VGPRRange,
1349 unsigned NextFreeSGPR,
SMRange SGPRRange,
1350 unsigned &VGPRBlocks,
unsigned &SGPRBlocks);
1351 bool ParseDirectiveAMDGCNTarget();
1352 bool ParseDirectiveAMDHSACodeObjectVersion();
1353 bool ParseDirectiveAMDHSAKernel();
1355 bool ParseDirectiveAMDKernelCodeT();
1358 bool ParseDirectiveAMDGPUHsaKernel();
1360 bool ParseDirectiveISAVersion();
1361 bool ParseDirectiveHSAMetadata();
1362 bool ParseDirectivePALMetadataBegin();
1363 bool ParseDirectivePALMetadata();
1364 bool ParseDirectiveAMDGPULDS();
1368 bool ParseToEndDirective(
const char *AssemblerDirectiveBegin,
1369 const char *AssemblerDirectiveEnd,
1370 std::string &CollectString);
1372 bool AddNextRegisterToList(
unsigned& Reg,
unsigned& RegWidth,
1373 RegisterKind RegKind,
unsigned Reg1,
SMLoc Loc);
1374 bool ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
1375 unsigned &RegNum,
unsigned &RegWidth,
1376 bool RestoreOnFailure =
false);
1377 bool ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
1378 unsigned &RegNum,
unsigned &RegWidth,
1380 unsigned ParseRegularReg(RegisterKind &RegKind,
unsigned &RegNum,
1383 unsigned ParseSpecialReg(RegisterKind &RegKind,
unsigned &RegNum,
1386 unsigned ParseRegList(RegisterKind &RegKind,
unsigned &RegNum,
1388 bool ParseRegRange(
unsigned& Num,
unsigned& Width);
1389 unsigned getRegularReg(RegisterKind RegKind,
unsigned RegNum,
unsigned SubReg,
1390 unsigned RegWidth,
SMLoc Loc);
1394 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1395 void initializeGprCountSymbol(RegisterKind RegKind);
1396 bool updateGprCountSymbols(RegisterKind RegKind,
unsigned DwordRegIndex,
1402 enum AMDGPUMatchResultTy {
1403 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1406 OperandMode_Default,
1410 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1418 if (getFeatureBits().
none()) {
1450 initializeGprCountSymbol(IS_VGPR);
1451 initializeGprCountSymbol(IS_SGPR);
1524 bool hasInv2PiInlineImm()
const {
1525 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1528 bool hasFlatOffsets()
const {
1529 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1533 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1536 bool hasSGPR102_SGPR103()
const {
1540 bool hasSGPR104_SGPR105()
const {
return isGFX10Plus(); }
1542 bool hasIntClamp()
const {
1543 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1546 bool hasPartialNSAEncoding()
const {
1547 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1579 void setForcedEncodingSize(
unsigned Size) { ForcedEncodingSize =
Size; }
1580 void setForcedDPP(
bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1581 void setForcedSDWA(
bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1583 unsigned getForcedEncodingSize()
const {
return ForcedEncodingSize; }
1584 bool isForcedVOP3()
const {
return ForcedEncodingSize == 64; }
1585 bool isForcedDPP()
const {
return ForcedDPP; }
1586 bool isForcedSDWA()
const {
return ForcedSDWA; }
1588 StringRef getMatchedVariantName()
const;
1590 std::unique_ptr<AMDGPUOperand>
parseRegister(
bool RestoreOnFailure =
false);
1592 bool RestoreOnFailure);
1595 SMLoc &EndLoc)
override;
1598 unsigned Kind)
override;
1602 bool MatchingInlineAsm)
override;
1605 OperandMode Mode = OperandMode_Default);
1613 ParseStatus parseIntWithPrefix(
const char *Prefix, int64_t &
Int);
1617 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1618 std::function<
bool(int64_t &)> ConvertResult =
nullptr);
1622 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1623 bool (*ConvertResult)(int64_t &) =
nullptr);
1627 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1636 bool isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1637 bool isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1638 bool isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1639 bool isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1640 bool parseSP3NegModifier();
1642 bool HasLit =
false);
1645 bool HasLit =
false);
1647 bool AllowImm =
true);
1649 bool AllowImm =
true);
1654 AMDGPUOperand::ImmTy ImmTy);
1665 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1670 bool tryParseFmt(
const char *Pref, int64_t MaxVal, int64_t &Val);
1671 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt,
StringRef FormatStr,
SMLoc Loc);
1675 bool parseCnt(int64_t &IntVal);
1678 bool parseDepCtr(int64_t &IntVal,
unsigned &Mask);
1682 bool parseDelay(int64_t &Delay);
1688 struct OperandInfoTy {
1691 bool IsSymbolic =
false;
1692 bool IsDefined =
false;
1694 OperandInfoTy(int64_t Val) : Val(Val) {}
1697 struct StructuredOpField : OperandInfoTy {
1701 bool IsDefined =
false;
1706 virtual ~StructuredOpField() =
default;
1708 bool Error(AMDGPUAsmParser &Parser,
const Twine &Err)
const {
1709 Parser.Error(Loc,
"invalid " +
Desc +
": " + Err);
1713 virtual bool validate(AMDGPUAsmParser &Parser)
const {
1715 return Error(Parser,
"not supported on this GPU");
1717 return Error(Parser,
"only " +
Twine(Width) +
"-bit values are legal");
1725 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &
Op, OperandInfoTy &Stream);
1726 bool validateSendMsg(
const OperandInfoTy &Msg,
1727 const OperandInfoTy &
Op,
1728 const OperandInfoTy &Stream);
1731 OperandInfoTy &Width);
1737 SMLoc getOperandLoc(std::function<
bool(
const AMDGPUOperand&)>
Test,
1742 bool SearchMandatoryLiterals =
false)
const;
1751 bool validateSOPLiteral(
const MCInst &Inst)
const;
1753 bool validateVOPDRegBankConstraints(
const MCInst &Inst,
1755 bool validateIntClampSupported(
const MCInst &Inst);
1756 bool validateMIMGAtomicDMask(
const MCInst &Inst);
1757 bool validateMIMGGatherDMask(
const MCInst &Inst);
1759 bool validateMIMGDataSize(
const MCInst &Inst,
const SMLoc &IDLoc);
1760 bool validateMIMGAddrSize(
const MCInst &Inst,
const SMLoc &IDLoc);
1761 bool validateMIMGD16(
const MCInst &Inst);
1762 bool validateMIMGMSAA(
const MCInst &Inst);
1763 bool validateOpSel(
const MCInst &Inst);
1766 bool validateVccOperand(
unsigned Reg)
const;
1771 bool validateAGPRLdSt(
const MCInst &Inst)
const;
1772 bool validateVGPRAlign(
const MCInst &Inst)
const;
1776 bool validateDivScale(
const MCInst &Inst);
1779 const SMLoc &IDLoc);
1781 const unsigned CPol);
1784 std::optional<StringRef> validateLdsDirect(
const MCInst &Inst);
1785 unsigned getConstantBusLimit(
unsigned Opcode)
const;
1786 bool usesConstantBus(
const MCInst &Inst,
unsigned OpIdx);
1787 bool isInlineConstant(
const MCInst &Inst,
unsigned OpIdx)
const;
1788 unsigned findImplicitSGPRReadInVOP(
const MCInst &Inst)
const;
1814 AsmToken peekToken(
bool ShouldSkipSpace =
true);
1816 SMLoc getLoc()
const;
1820 void onBeginOfFile()
override;
1821 bool parsePrimaryExpr(
const MCExpr *&Res,
SMLoc &EndLoc)
override;
1832 bool parseSwizzleOperand(int64_t &
Op,
1833 const unsigned MinVal,
1834 const unsigned MaxVal,
1837 bool parseSwizzleOperands(
const unsigned OpNum, int64_t*
Op,
1838 const unsigned MinVal,
1839 const unsigned MaxVal,
1842 bool parseSwizzleOffset(int64_t &Imm);
1843 bool parseSwizzleMacro(int64_t &Imm);
1844 bool parseSwizzleQuadPerm(int64_t &Imm);
1845 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1846 bool parseSwizzleBroadcast(int64_t &Imm);
1847 bool parseSwizzleSwap(int64_t &Imm);
1848 bool parseSwizzleReverse(int64_t &Imm);
1851 int64_t parseGPRIdxMacro();
1859 OptionalImmIndexMap &OptionalIdx);
1867 OptionalImmIndexMap &OptionalIdx);
1869 OptionalImmIndexMap &OptionalIdx);
1874 bool parseDimId(
unsigned &Encoding);
1876 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1880 int64_t parseDPPCtrlSel(
StringRef Ctrl);
1881 int64_t parseDPPCtrlPerm();
1887 bool IsDPP8 =
false);
1893 AMDGPUOperand::ImmTy
Type);
1902 bool SkipDstVcc =
false,
1903 bool SkipSrcVcc =
false);
1916 return &APFloat::IEEEsingle();
1918 return &APFloat::IEEEdouble();
1920 return &APFloat::IEEEhalf();
1953 return &APFloat::IEEEsingle();
1959 return &APFloat::IEEEdouble();
1968 return &APFloat::IEEEhalf();
1976 return &APFloat::BFloat();
1991 APFloat::rmNearestTiesToEven,
1994 if (
Status != APFloat::opOK &&
1996 ((
Status & APFloat::opOverflow) != 0 ||
1997 (
Status & APFloat::opUnderflow) != 0)) {
2020bool AMDGPUOperand::isInlinableImm(
MVT type)
const {
2030 if (!isImmTy(ImmTyNone)) {
2041 if (type == MVT::f64 || type == MVT::i64) {
2043 AsmParser->hasInv2PiInlineImm());
2065 APFloat::rmNearestTiesToEven, &Lost);
2072 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2074 AsmParser->hasInv2PiInlineImm());
2079 static_cast<int32_t
>(FPLiteral.bitcastToAPInt().getZExtValue()),
2080 AsmParser->hasInv2PiInlineImm());
2084 if (type == MVT::f64 || type == MVT::i64) {
2086 AsmParser->hasInv2PiInlineImm());
2095 static_cast<int16_t
>(
Literal.getLoBits(16).getSExtValue()),
2096 type, AsmParser->hasInv2PiInlineImm());
2100 static_cast<int32_t
>(
Literal.getLoBits(32).getZExtValue()),
2101 AsmParser->hasInv2PiInlineImm());
2104bool AMDGPUOperand::isLiteralImm(
MVT type)
const {
2106 if (!isImmTy(ImmTyNone)) {
2113 if (type == MVT::f64 && hasFPModifiers()) {
2130 if (type == MVT::f64) {
2135 if (type == MVT::i64) {
2148 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2149 : (type == MVT::v2i16) ? MVT::f32
2150 : (type == MVT::v2f32) ? MVT::f32
2157bool AMDGPUOperand::isRegClass(
unsigned RCID)
const {
2158 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(
getReg());
2161bool AMDGPUOperand::isVRegWithInputMods()
const {
2162 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2164 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2165 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2168template <
bool IsFake16>
bool AMDGPUOperand::isT16VRegWithInputMods()
const {
2169 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2170 : AMDGPU::VGPR_16_Lo128RegClassID);
2173bool AMDGPUOperand::isSDWAOperand(
MVT type)
const {
2174 if (AsmParser->isVI())
2176 else if (AsmParser->isGFX9Plus())
2177 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2182bool AMDGPUOperand::isSDWAFP16Operand()
const {
2183 return isSDWAOperand(MVT::f16);
2186bool AMDGPUOperand::isSDWAFP32Operand()
const {
2187 return isSDWAOperand(MVT::f32);
2190bool AMDGPUOperand::isSDWAInt16Operand()
const {
2191 return isSDWAOperand(MVT::i16);
2194bool AMDGPUOperand::isSDWAInt32Operand()
const {
2195 return isSDWAOperand(MVT::i32);
2198bool AMDGPUOperand::isBoolReg()
const {
2199 auto FB = AsmParser->getFeatureBits();
2200 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) ||
2201 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
2206 assert(isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2221void AMDGPUOperand::addImmOperands(
MCInst &Inst,
unsigned N,
bool ApplyModifiers)
const {
2229 addLiteralImmOperand(Inst,
Imm.Val,
2231 isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2233 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2239void AMDGPUOperand::addLiteralImmOperand(
MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const {
2240 const auto& InstDesc = AsmParser->getMII()->get(Inst.
getOpcode());
2245 if (ApplyModifiers) {
2248 Val = applyInputFPModifiers(Val,
Size);
2252 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2262 AsmParser->hasInv2PiInlineImm())) {
2271 if (
Literal.getLoBits(32) != 0) {
2272 const_cast<AMDGPUAsmParser *
>(AsmParser)->
Warning(Inst.
getLoc(),
2273 "Can't encode literal as exact 64-bit floating-point operand. "
2274 "Low 32-bits will be set to zero");
2275 Val &= 0xffffffff00000000u;
2279 setImmKindLiteral();
2295 if (AsmParser->hasInv2PiInlineImm() &&
Literal == 0x3fc45f306725feed) {
2301 setImmKindLiteral();
2337 APFloat::rmNearestTiesToEven, &lost);
2341 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2344 setImmKindMandatoryLiteral();
2346 setImmKindLiteral();
2377 AsmParser->hasInv2PiInlineImm())) {
2384 setImmKindLiteral();
2402 setImmKindLiteral();
2416 setImmKindLiteral();
2425 AsmParser->hasInv2PiInlineImm())) {
2432 setImmKindLiteral();
2441 AsmParser->hasInv2PiInlineImm())) {
2448 setImmKindLiteral();
2462 AsmParser->hasInv2PiInlineImm()));
2472 AsmParser->hasInv2PiInlineImm()));
2480 setImmKindMandatoryLiteral();
2484 setImmKindMandatoryLiteral();
2491void AMDGPUOperand::addRegOperands(
MCInst &Inst,
unsigned N)
const {
2495bool AMDGPUOperand::isInlineValue()
const {
2504 if (Is == IS_VGPR) {
2508 return AMDGPU::VGPR_32RegClassID;
2510 return AMDGPU::VReg_64RegClassID;
2512 return AMDGPU::VReg_96RegClassID;
2514 return AMDGPU::VReg_128RegClassID;
2516 return AMDGPU::VReg_160RegClassID;
2518 return AMDGPU::VReg_192RegClassID;
2520 return AMDGPU::VReg_224RegClassID;
2522 return AMDGPU::VReg_256RegClassID;
2524 return AMDGPU::VReg_288RegClassID;
2526 return AMDGPU::VReg_320RegClassID;
2528 return AMDGPU::VReg_352RegClassID;
2530 return AMDGPU::VReg_384RegClassID;
2532 return AMDGPU::VReg_512RegClassID;
2534 return AMDGPU::VReg_1024RegClassID;
2536 }
else if (Is == IS_TTMP) {
2540 return AMDGPU::TTMP_32RegClassID;
2542 return AMDGPU::TTMP_64RegClassID;
2544 return AMDGPU::TTMP_128RegClassID;
2546 return AMDGPU::TTMP_256RegClassID;
2548 return AMDGPU::TTMP_512RegClassID;
2550 }
else if (Is == IS_SGPR) {
2554 return AMDGPU::SGPR_32RegClassID;
2556 return AMDGPU::SGPR_64RegClassID;
2558 return AMDGPU::SGPR_96RegClassID;
2560 return AMDGPU::SGPR_128RegClassID;
2562 return AMDGPU::SGPR_160RegClassID;
2564 return AMDGPU::SGPR_192RegClassID;
2566 return AMDGPU::SGPR_224RegClassID;
2568 return AMDGPU::SGPR_256RegClassID;
2570 return AMDGPU::SGPR_288RegClassID;
2572 return AMDGPU::SGPR_320RegClassID;
2574 return AMDGPU::SGPR_352RegClassID;
2576 return AMDGPU::SGPR_384RegClassID;
2578 return AMDGPU::SGPR_512RegClassID;
2580 }
else if (Is == IS_AGPR) {
2584 return AMDGPU::AGPR_32RegClassID;
2586 return AMDGPU::AReg_64RegClassID;
2588 return AMDGPU::AReg_96RegClassID;
2590 return AMDGPU::AReg_128RegClassID;
2592 return AMDGPU::AReg_160RegClassID;
2594 return AMDGPU::AReg_192RegClassID;
2596 return AMDGPU::AReg_224RegClassID;
2598 return AMDGPU::AReg_256RegClassID;
2600 return AMDGPU::AReg_288RegClassID;
2602 return AMDGPU::AReg_320RegClassID;
2604 return AMDGPU::AReg_352RegClassID;
2606 return AMDGPU::AReg_384RegClassID;
2608 return AMDGPU::AReg_512RegClassID;
2610 return AMDGPU::AReg_1024RegClassID;
2618 .
Case(
"exec", AMDGPU::EXEC)
2619 .
Case(
"vcc", AMDGPU::VCC)
2620 .
Case(
"flat_scratch", AMDGPU::FLAT_SCR)
2621 .
Case(
"xnack_mask", AMDGPU::XNACK_MASK)
2622 .
Case(
"shared_base", AMDGPU::SRC_SHARED_BASE)
2623 .
Case(
"src_shared_base", AMDGPU::SRC_SHARED_BASE)
2624 .
Case(
"shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2625 .
Case(
"src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2626 .
Case(
"private_base", AMDGPU::SRC_PRIVATE_BASE)
2627 .
Case(
"src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2628 .
Case(
"private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2629 .
Case(
"src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2630 .
Case(
"pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2631 .
Case(
"src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2632 .
Case(
"lds_direct", AMDGPU::LDS_DIRECT)
2633 .
Case(
"src_lds_direct", AMDGPU::LDS_DIRECT)
2634 .
Case(
"m0", AMDGPU::M0)
2635 .
Case(
"vccz", AMDGPU::SRC_VCCZ)
2636 .
Case(
"src_vccz", AMDGPU::SRC_VCCZ)
2637 .
Case(
"execz", AMDGPU::SRC_EXECZ)
2638 .
Case(
"src_execz", AMDGPU::SRC_EXECZ)
2639 .
Case(
"scc", AMDGPU::SRC_SCC)
2640 .
Case(
"src_scc", AMDGPU::SRC_SCC)
2641 .
Case(
"tba", AMDGPU::TBA)
2642 .
Case(
"tma", AMDGPU::TMA)
2643 .
Case(
"flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2644 .
Case(
"flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2645 .
Case(
"xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2646 .
Case(
"xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2647 .
Case(
"vcc_lo", AMDGPU::VCC_LO)
2648 .
Case(
"vcc_hi", AMDGPU::VCC_HI)
2649 .
Case(
"exec_lo", AMDGPU::EXEC_LO)
2650 .
Case(
"exec_hi", AMDGPU::EXEC_HI)
2651 .
Case(
"tma_lo", AMDGPU::TMA_LO)
2652 .
Case(
"tma_hi", AMDGPU::TMA_HI)
2653 .
Case(
"tba_lo", AMDGPU::TBA_LO)
2654 .
Case(
"tba_hi", AMDGPU::TBA_HI)
2655 .
Case(
"pc", AMDGPU::PC_REG)
2656 .
Case(
"null", AMDGPU::SGPR_NULL)
2660bool AMDGPUAsmParser::ParseRegister(
MCRegister &RegNo,
SMLoc &StartLoc,
2661 SMLoc &EndLoc,
bool RestoreOnFailure) {
2662 auto R = parseRegister();
2663 if (!R)
return true;
2665 RegNo =
R->getReg();
2666 StartLoc =
R->getStartLoc();
2667 EndLoc =
R->getEndLoc();
2673 return ParseRegister(Reg, StartLoc, EndLoc,
false);
2678 bool Result = ParseRegister(Reg, StartLoc, EndLoc,
true);
2679 bool PendingErrors = getParser().hasPendingError();
2680 getParser().clearPendingErrors();
2688bool AMDGPUAsmParser::AddNextRegisterToList(
unsigned &Reg,
unsigned &RegWidth,
2689 RegisterKind RegKind,
unsigned Reg1,
2693 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2698 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2699 Reg = AMDGPU::FLAT_SCR;
2703 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2704 Reg = AMDGPU::XNACK_MASK;
2708 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2713 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2718 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2723 Error(Loc,
"register does not fit in the list");
2729 if (Reg1 != Reg + RegWidth / 32) {
2730 Error(Loc,
"registers in a list must have consecutive indices");
2748 {{
"ttmp"}, IS_TTMP},
2754 return Kind == IS_VGPR ||
2762 if (Str.starts_with(Reg.Name))
2768 return !Str.getAsInteger(10, Num);
2772AMDGPUAsmParser::isRegister(
const AsmToken &Token,
2789 if (!RegSuffix.
empty()) {
2807AMDGPUAsmParser::isRegister()
2809 return isRegister(getToken(), peekToken());
2812unsigned AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
unsigned RegNum,
2813 unsigned SubReg,
unsigned RegWidth,
2817 unsigned AlignSize = 1;
2818 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2824 if (RegNum % AlignSize != 0) {
2825 Error(Loc,
"invalid register alignment");
2826 return AMDGPU::NoRegister;
2829 unsigned RegIdx = RegNum / AlignSize;
2832 Error(Loc,
"invalid or unsupported register size");
2833 return AMDGPU::NoRegister;
2839 Error(Loc,
"register index is out of range");
2840 return AMDGPU::NoRegister;
2850 assert(Reg &&
"Invalid subregister!");
2856bool AMDGPUAsmParser::ParseRegRange(
unsigned &Num,
unsigned &RegWidth) {
2857 int64_t RegLo, RegHi;
2861 SMLoc FirstIdxLoc = getLoc();
2864 if (!parseExpr(RegLo))
2868 SecondIdxLoc = getLoc();
2869 if (!parseExpr(RegHi))
2878 if (!isUInt<32>(RegLo)) {
2879 Error(FirstIdxLoc,
"invalid register index");
2883 if (!isUInt<32>(RegHi)) {
2884 Error(SecondIdxLoc,
"invalid register index");
2888 if (RegLo > RegHi) {
2889 Error(FirstIdxLoc,
"first register index should not exceed second index");
2893 Num =
static_cast<unsigned>(RegLo);
2894 RegWidth = 32 * ((RegHi - RegLo) + 1);
2898unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2899 unsigned &RegNum,
unsigned &RegWidth,
2906 RegKind = IS_SPECIAL;
2913unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2914 unsigned &RegNum,
unsigned &RegWidth,
2918 auto Loc = getLoc();
2922 Error(Loc,
"invalid register name");
2923 return AMDGPU::NoRegister;
2931 unsigned SubReg = NoSubRegister;
2932 if (!RegSuffix.
empty()) {
2944 Error(Loc,
"invalid register index");
2945 return AMDGPU::NoRegister;
2950 if (!ParseRegRange(RegNum, RegWidth))
2951 return AMDGPU::NoRegister;
2954 return getRegularReg(RegKind, RegNum,
SubReg, RegWidth, Loc);
2957unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
unsigned &RegNum,
2960 unsigned Reg = AMDGPU::NoRegister;
2961 auto ListLoc = getLoc();
2964 "expected a register or a list of registers")) {
2965 return AMDGPU::NoRegister;
2970 auto Loc = getLoc();
2971 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2972 return AMDGPU::NoRegister;
2973 if (RegWidth != 32) {
2974 Error(Loc,
"expected a single 32-bit register");
2975 return AMDGPU::NoRegister;
2979 RegisterKind NextRegKind;
2980 unsigned NextReg, NextRegNum, NextRegWidth;
2983 if (!ParseAMDGPURegister(NextRegKind, NextReg,
2984 NextRegNum, NextRegWidth,
2986 return AMDGPU::NoRegister;
2988 if (NextRegWidth != 32) {
2989 Error(Loc,
"expected a single 32-bit register");
2990 return AMDGPU::NoRegister;
2992 if (NextRegKind != RegKind) {
2993 Error(Loc,
"registers in a list must be of the same kind");
2994 return AMDGPU::NoRegister;
2996 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2997 return AMDGPU::NoRegister;
3001 "expected a comma or a closing square bracket")) {
3002 return AMDGPU::NoRegister;
3006 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3011bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
3012 unsigned &RegNum,
unsigned &RegWidth,
3014 auto Loc = getLoc();
3015 Reg = AMDGPU::NoRegister;
3018 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3019 if (Reg == AMDGPU::NoRegister)
3020 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3022 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3026 if (Reg == AMDGPU::NoRegister) {
3027 assert(Parser.hasPendingError());
3031 if (!subtargetHasRegister(*
TRI, Reg)) {
3032 if (Reg == AMDGPU::SGPR_NULL) {
3033 Error(Loc,
"'null' operand is not supported on this GPU");
3035 Error(Loc,
"register not available on this GPU");
3043bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &Reg,
3044 unsigned &RegNum,
unsigned &RegWidth,
3045 bool RestoreOnFailure ) {
3046 Reg = AMDGPU::NoRegister;
3049 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3050 if (RestoreOnFailure) {
3051 while (!Tokens.
empty()) {
3060std::optional<StringRef>
3061AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3064 return StringRef(
".amdgcn.next_free_vgpr");
3066 return StringRef(
".amdgcn.next_free_sgpr");
3068 return std::nullopt;
3072void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3073 auto SymbolName = getGprCountSymbolName(RegKind);
3074 assert(SymbolName &&
"initializing invalid register kind");
3075 MCSymbol *
Sym = getContext().getOrCreateSymbol(*SymbolName);
3079bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3080 unsigned DwordRegIndex,
3081 unsigned RegWidth) {
3086 auto SymbolName = getGprCountSymbolName(RegKind);
3089 MCSymbol *
Sym = getContext().getOrCreateSymbol(*SymbolName);
3091 int64_t NewMax = DwordRegIndex +
divideCeil(RegWidth, 32) - 1;
3094 if (!
Sym->isVariable())
3095 return !
Error(getLoc(),
3096 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3097 if (!
Sym->getVariableValue(
false)->evaluateAsAbsolute(OldCount))
3100 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3102 if (OldCount <= NewMax)
3108std::unique_ptr<AMDGPUOperand>
3109AMDGPUAsmParser::parseRegister(
bool RestoreOnFailure) {
3110 const auto &Tok = getToken();
3111 SMLoc StartLoc = Tok.getLoc();
3112 SMLoc EndLoc = Tok.getEndLoc();
3113 RegisterKind RegKind;
3114 unsigned Reg, RegNum, RegWidth;
3116 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3120 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3123 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3124 return AMDGPUOperand::CreateReg(
this, Reg, StartLoc, EndLoc);
3128 bool HasSP3AbsModifier,
bool HasLit) {
3136 HasLit = trySkipId(
"lit");
3148 const auto& Tok = getToken();
3149 const auto& NextTok = peekToken();
3152 bool Negate =
false;
3160 AMDGPUOperand::Modifiers Mods;
3171 APFloat RealVal(APFloat::IEEEdouble());
3172 auto roundMode = APFloat::rmNearestTiesToEven;
3173 if (
errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3176 RealVal.changeSign();
3179 AMDGPUOperand::CreateImm(
this, RealVal.bitcastToAPInt().getZExtValue(), S,
3180 AMDGPUOperand::ImmTyNone,
true));
3181 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3182 Op.setModifiers(Mods);
3191 if (HasSP3AbsModifier) {
3200 if (getParser().parsePrimaryExpr(Expr, EndLoc,
nullptr))
3203 if (Parser.parseExpression(Expr))
3207 if (Expr->evaluateAsAbsolute(IntVal)) {
3208 Operands.push_back(AMDGPUOperand::CreateImm(
this, IntVal, S));
3209 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3210 Op.setModifiers(Mods);
3214 Operands.push_back(AMDGPUOperand::CreateExpr(
this, Expr, S));
3227 if (
auto R = parseRegister()) {
3236 bool HasSP3AbsMod,
bool HasLit) {
3242 return parseImm(
Operands, HasSP3AbsMod, HasLit);
3246AMDGPUAsmParser::isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3249 return str ==
"abs" || str ==
"neg" || str ==
"sext";
3255AMDGPUAsmParser::isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3260AMDGPUAsmParser::isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3261 return isNamedOperandModifier(Token, NextToken) || Token.
is(
AsmToken::Pipe);
3265AMDGPUAsmParser::isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3266 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3283AMDGPUAsmParser::isModifier() {
3287 peekTokens(NextToken);
3289 return isOperandModifier(Tok, NextToken[0]) ||
3290 (Tok.
is(
AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3291 isOpcodeModifierWithVal(Tok, NextToken[0]);
3317AMDGPUAsmParser::parseSP3NegModifier() {
3320 peekTokens(NextToken);
3323 (isRegister(NextToken[0], NextToken[1]) ||
3325 isId(NextToken[0],
"abs"))) {
3343 return Error(getLoc(),
"invalid syntax, expected 'neg' modifier");
3345 SP3Neg = parseSP3NegModifier();
3348 Neg = trySkipId(
"neg");
3350 return Error(Loc,
"expected register or immediate");
3354 Abs = trySkipId(
"abs");
3358 Lit = trySkipId(
"lit");
3365 return Error(Loc,
"expected register or immediate");
3369 Res = parseRegOrImm(
Operands, SP3Abs, Lit);
3376 if (Lit && !
Operands.back()->isImm())
3377 Error(Loc,
"expected immediate with lit modifier");
3379 if (SP3Abs && !skipToken(
AsmToken::Pipe,
"expected vertical bar"))
3388 AMDGPUOperand::Modifiers Mods;
3389 Mods.Abs = Abs || SP3Abs;
3390 Mods.Neg = Neg || SP3Neg;
3393 if (Mods.hasFPModifiers() || Lit) {
3394 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3396 return Error(
Op.getStartLoc(),
"expected an absolute expression");
3397 Op.setModifiers(Mods);
3405 bool Sext = trySkipId(
"sext");
3406 if (Sext && !skipToken(
AsmToken::LParen,
"expected left paren after sext"))
3421 AMDGPUOperand::Modifiers Mods;
3424 if (Mods.hasIntModifiers()) {
3425 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3427 return Error(
Op.getStartLoc(),
"expected an absolute expression");
3428 Op.setModifiers(Mods);
3435 return parseRegOrImmWithFPInputMods(
Operands,
false);
3439 return parseRegOrImmWithIntInputMods(
Operands,
false);
3443 auto Loc = getLoc();
3444 if (trySkipId(
"off")) {
3445 Operands.push_back(AMDGPUOperand::CreateImm(
this, 0, Loc,
3446 AMDGPUOperand::ImmTyOff,
false));
3453 std::unique_ptr<AMDGPUOperand>
Reg = parseRegister();
3455 Operands.push_back(std::move(Reg));
3462unsigned AMDGPUAsmParser::checkTargetMatchPredicate(
MCInst &Inst) {
3469 return Match_InvalidOperand;
3471 if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3472 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3477 if (!
Op.isImm() ||
Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3478 return Match_InvalidOperand;
3482 return Match_Success;
3486 static const unsigned Variants[] = {
3497 if (isForcedDPP() && isForcedVOP3()) {
3501 if (getForcedEncodingSize() == 32) {
3506 if (isForcedVOP3()) {
3511 if (isForcedSDWA()) {
3517 if (isForcedDPP()) {
3525StringRef AMDGPUAsmParser::getMatchedVariantName()
const {
3526 if (isForcedDPP() && isForcedVOP3())
3529 if (getForcedEncodingSize() == 32)
3544unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(
const MCInst &Inst)
const {
3548 case AMDGPU::FLAT_SCR:
3550 case AMDGPU::VCC_LO:
3551 case AMDGPU::VCC_HI:
3558 return AMDGPU::NoRegister;
3565bool AMDGPUAsmParser::isInlineConstant(
const MCInst &Inst,
3566 unsigned OpIdx)
const {
3576 int64_t Val = MO.
getImm();
3625unsigned AMDGPUAsmParser::getConstantBusLimit(
unsigned Opcode)
const {
3631 case AMDGPU::V_LSHLREV_B64_e64:
3632 case AMDGPU::V_LSHLREV_B64_gfx10:
3633 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3634 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3635 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3636 case AMDGPU::V_LSHRREV_B64_e64:
3637 case AMDGPU::V_LSHRREV_B64_gfx10:
3638 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3639 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3640 case AMDGPU::V_ASHRREV_I64_e64:
3641 case AMDGPU::V_ASHRREV_I64_gfx10:
3642 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3643 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3644 case AMDGPU::V_LSHL_B64_e64:
3645 case AMDGPU::V_LSHR_B64_e64:
3646 case AMDGPU::V_ASHR_I64_e64:
3659 bool AddMandatoryLiterals =
false) {
3665 int16_t ImmDeferredIdx =
3682bool AMDGPUAsmParser::usesConstantBus(
const MCInst &Inst,
unsigned OpIdx) {
3685 return !isInlineConstant(Inst, OpIdx);
3686 }
else if (MO.
isReg()) {
3693 return isSGPR(PReg,
TRI) && PReg != SGPR_NULL;
3705 const unsigned Opcode = Inst.
getOpcode();
3706 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3709 if (!LaneSelOp.
isReg())
3712 return LaneSelReg ==
M0 || LaneSelReg == M0_gfxpre11;
3715bool AMDGPUAsmParser::validateConstantBusLimitations(
3717 const unsigned Opcode = Inst.
getOpcode();
3719 unsigned LastSGPR = AMDGPU::NoRegister;
3720 unsigned ConstantBusUseCount = 0;
3721 unsigned NumLiterals = 0;
3722 unsigned LiteralSize;
3724 if (!(
Desc.TSFlags &
3740 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3741 if (SGPRUsed != AMDGPU::NoRegister) {
3742 SGPRsUsed.
insert(SGPRUsed);
3743 ++ConstantBusUseCount;
3748 for (
int OpIdx : OpIndices) {
3753 if (usesConstantBus(Inst, OpIdx)) {
3762 if (SGPRsUsed.
insert(LastSGPR).second) {
3763 ++ConstantBusUseCount;
3783 if (NumLiterals == 0) {
3786 }
else if (LiteralSize !=
Size) {
3792 ConstantBusUseCount += NumLiterals;
3794 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3800 Error(Loc,
"invalid operand (violates constant bus restrictions)");
3804bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3807 const unsigned Opcode = Inst.
getOpcode();
3813 auto getVRegIdx = [&](
unsigned,
unsigned OperandIdx) {
3821 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3824 auto InvalidCompOprIdx =
3825 InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc);
3826 if (!InvalidCompOprIdx)
3829 auto CompOprIdx = *InvalidCompOprIdx;
3831 std::max(InstInfo[
VOPD::X].getIndexInParsedOperands(CompOprIdx),
3832 InstInfo[
VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3835 auto Loc = ((AMDGPUOperand &)*
Operands[ParsedIdx]).getStartLoc();
3836 if (CompOprIdx == VOPD::Component::DST) {
3837 Error(Loc,
"one dst register must be even and the other odd");
3839 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3841 " operands must use different VGPR banks");
3847bool AMDGPUAsmParser::validateIntClampSupported(
const MCInst &Inst) {
3864bool AMDGPUAsmParser::validateMIMGDataSize(
const MCInst &Inst,
3865 const SMLoc &IDLoc) {
3883 unsigned TFESize = (TFEIdx != -1 && Inst.
getOperand(TFEIdx).
getImm()) ? 1 : 0;
3888 bool IsPackedD16 =
false;
3893 IsPackedD16 = D16Idx >= 0;
3895 DataSize = (DataSize + 1) / 2;
3898 if ((VDataSize / 4) == DataSize + TFESize)
3903 Modifiers = IsPackedD16 ?
"dmask and d16" :
"dmask";
3905 Modifiers = IsPackedD16 ?
"dmask, d16 and tfe" :
"dmask and tfe";
3907 Error(IDLoc,
Twine(
"image data size does not match ") + Modifiers);
3911bool AMDGPUAsmParser::validateMIMGAddrSize(
const MCInst &Inst,
3912 const SMLoc &IDLoc) {
3925 : AMDGPU::OpName::rsrc;
3932 assert(SrsrcIdx > VAddr0Idx);
3935 if (BaseOpcode->
BVH) {
3936 if (IsA16 == BaseOpcode->
A16)
3938 Error(IDLoc,
"image address size does not match a16");
3944 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3945 unsigned ActualAddrSize =
3946 IsNSA ? SrsrcIdx - VAddr0Idx
3949 unsigned ExpectedAddrSize =
3953 if (hasPartialNSAEncoding() &&
3956 int VAddrLastIdx = SrsrcIdx - 1;
3957 unsigned VAddrLastSize =
3960 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3963 if (ExpectedAddrSize > 12)
3964 ExpectedAddrSize = 16;
3969 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3973 if (ActualAddrSize == ExpectedAddrSize)
3976 Error(IDLoc,
"image address size does not match dim and a16");
3980bool AMDGPUAsmParser::validateMIMGAtomicDMask(
const MCInst &Inst) {
3987 if (!
Desc.mayLoad() || !
Desc.mayStore())
3997 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4000bool AMDGPUAsmParser::validateMIMGGatherDMask(
const MCInst &Inst) {
4016 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4019bool AMDGPUAsmParser::validateMIMGMSAA(
const MCInst &Inst) {
4030 if (!BaseOpcode->
MSAA)
4039 return DimInfo->
MSAA;
4045 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4046 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4047 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4057bool AMDGPUAsmParser::validateMovrels(
const MCInst &Inst,
4081 Error(ErrLoc,
"source operand must be a VGPR");
4085bool AMDGPUAsmParser::validateMAIAccWrite(
const MCInst &Inst,
4090 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4104 "source operand must be either a VGPR or an inline constant");
4111bool AMDGPUAsmParser::validateMAISrc2(
const MCInst &Inst,
4117 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4124 if (Inst.
getOperand(Src2Idx).
isImm() && isInlineConstant(Inst, Src2Idx)) {
4126 "inline constants are not allowed for this operand");
4133bool AMDGPUAsmParser::validateMFMA(
const MCInst &Inst,
4151 if (Src2Reg == DstReg)
4155 if (
TRI->getRegClass(
Desc.operands()[0].RegClass).getSizeInBits() <= 128)
4158 if (
TRI->regsOverlap(Src2Reg, DstReg)) {
4160 "source 2 operand must not partially overlap with dst");
4167bool AMDGPUAsmParser::validateDivScale(
const MCInst &Inst) {
4171 case V_DIV_SCALE_F32_gfx6_gfx7:
4172 case V_DIV_SCALE_F32_vi:
4173 case V_DIV_SCALE_F32_gfx10:
4174 case V_DIV_SCALE_F64_gfx6_gfx7:
4175 case V_DIV_SCALE_F64_vi:
4176 case V_DIV_SCALE_F64_gfx10:
4182 for (
auto Name : {AMDGPU::OpName::src0_modifiers,
4183 AMDGPU::OpName::src2_modifiers,
4184 AMDGPU::OpName::src2_modifiers}) {
4195bool AMDGPUAsmParser::validateMIMGD16(
const MCInst &Inst) {
4215 case AMDGPU::V_SUBREV_F32_e32:
4216 case AMDGPU::V_SUBREV_F32_e64:
4217 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4218 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4219 case AMDGPU::V_SUBREV_F32_e32_vi:
4220 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4221 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4222 case AMDGPU::V_SUBREV_F32_e64_vi:
4224 case AMDGPU::V_SUBREV_CO_U32_e32:
4225 case AMDGPU::V_SUBREV_CO_U32_e64:
4226 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4227 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4229 case AMDGPU::V_SUBBREV_U32_e32:
4230 case AMDGPU::V_SUBBREV_U32_e64:
4231 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4232 case AMDGPU::V_SUBBREV_U32_e32_vi:
4233 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4234 case AMDGPU::V_SUBBREV_U32_e64_vi:
4236 case AMDGPU::V_SUBREV_U32_e32:
4237 case AMDGPU::V_SUBREV_U32_e64:
4238 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4239 case AMDGPU::V_SUBREV_U32_e32_vi:
4240 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4241 case AMDGPU::V_SUBREV_U32_e64_vi:
4243 case AMDGPU::V_SUBREV_F16_e32:
4244 case AMDGPU::V_SUBREV_F16_e64:
4245 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4246 case AMDGPU::V_SUBREV_F16_e32_vi:
4247 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4248 case AMDGPU::V_SUBREV_F16_e64_vi:
4250 case AMDGPU::V_SUBREV_U16_e32:
4251 case AMDGPU::V_SUBREV_U16_e64:
4252 case AMDGPU::V_SUBREV_U16_e32_vi:
4253 case AMDGPU::V_SUBREV_U16_e64_vi:
4255 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4256 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4257 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4259 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4260 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4262 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4263 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4265 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4266 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4268 case AMDGPU::V_LSHRREV_B32_e32:
4269 case AMDGPU::V_LSHRREV_B32_e64:
4270 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4271 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4272 case AMDGPU::V_LSHRREV_B32_e32_vi:
4273 case AMDGPU::V_LSHRREV_B32_e64_vi:
4274 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4275 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4277 case AMDGPU::V_ASHRREV_I32_e32:
4278 case AMDGPU::V_ASHRREV_I32_e64:
4279 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4280 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4281 case AMDGPU::V_ASHRREV_I32_e32_vi:
4282 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4283 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4284 case AMDGPU::V_ASHRREV_I32_e64_vi:
4286 case AMDGPU::V_LSHLREV_B32_e32:
4287 case AMDGPU::V_LSHLREV_B32_e64:
4288 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4289 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4290 case AMDGPU::V_LSHLREV_B32_e32_vi:
4291 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4292 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4293 case AMDGPU::V_LSHLREV_B32_e64_vi:
4295 case AMDGPU::V_LSHLREV_B16_e32:
4296 case AMDGPU::V_LSHLREV_B16_e64:
4297 case AMDGPU::V_LSHLREV_B16_e32_vi:
4298 case AMDGPU::V_LSHLREV_B16_e64_vi:
4299 case AMDGPU::V_LSHLREV_B16_gfx10:
4301 case AMDGPU::V_LSHRREV_B16_e32:
4302 case AMDGPU::V_LSHRREV_B16_e64:
4303 case AMDGPU::V_LSHRREV_B16_e32_vi:
4304 case AMDGPU::V_LSHRREV_B16_e64_vi:
4305 case AMDGPU::V_LSHRREV_B16_gfx10:
4307 case AMDGPU::V_ASHRREV_I16_e32:
4308 case AMDGPU::V_ASHRREV_I16_e64:
4309 case AMDGPU::V_ASHRREV_I16_e32_vi:
4310 case AMDGPU::V_ASHRREV_I16_e64_vi:
4311 case AMDGPU::V_ASHRREV_I16_gfx10:
4313 case AMDGPU::V_LSHLREV_B64_e64:
4314 case AMDGPU::V_LSHLREV_B64_gfx10:
4315 case AMDGPU::V_LSHLREV_B64_vi:
4317 case AMDGPU::V_LSHRREV_B64_e64:
4318 case AMDGPU::V_LSHRREV_B64_gfx10:
4319 case AMDGPU::V_LSHRREV_B64_vi:
4321 case AMDGPU::V_ASHRREV_I64_e64:
4322 case AMDGPU::V_ASHRREV_I64_gfx10:
4323 case AMDGPU::V_ASHRREV_I64_vi:
4325 case AMDGPU::V_PK_LSHLREV_B16:
4326 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4327 case AMDGPU::V_PK_LSHLREV_B16_vi:
4329 case AMDGPU::V_PK_LSHRREV_B16:
4330 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4331 case AMDGPU::V_PK_LSHRREV_B16_vi:
4332 case AMDGPU::V_PK_ASHRREV_I16:
4333 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4334 case AMDGPU::V_PK_ASHRREV_I16_vi:
4341std::optional<StringRef>
4342AMDGPUAsmParser::validateLdsDirect(
const MCInst &Inst) {
4344 using namespace SIInstrFlags;
4345 const unsigned Opcode = Inst.
getOpcode();
4351 if ((
Desc.TSFlags & Enc) == 0)
4352 return std::nullopt;
4354 for (
auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4359 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4362 return StringRef(
"lds_direct is not supported on this GPU");
4365 return StringRef(
"lds_direct cannot be used with this instruction");
4367 if (SrcName != OpName::src0)
4368 return StringRef(
"lds_direct may be used as src0 only");
4372 return std::nullopt;
4376 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
4377 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4378 if (
Op.isFlatOffset())
4379 return Op.getStartLoc();
4384bool AMDGPUAsmParser::validateOffset(
const MCInst &Inst,
4393 return validateFlatOffset(Inst,
Operands);
4396 return validateSMEMOffset(Inst,
Operands);
4401 const unsigned OffsetSize = 24;
4402 if (!
isIntN(OffsetSize,
Op.getImm())) {
4404 Twine(
"expected a ") +
Twine(OffsetSize) +
"-bit signed offset");
4408 const unsigned OffsetSize = 16;
4409 if (!
isUIntN(OffsetSize,
Op.getImm())) {
4411 Twine(
"expected a ") +
Twine(OffsetSize) +
"-bit unsigned offset");
4418bool AMDGPUAsmParser::validateFlatOffset(
const MCInst &Inst,
4429 if (!hasFlatOffsets() &&
Op.getImm() != 0) {
4431 "flat offset modifier is not supported on this GPU");
4438 bool AllowNegative =
4441 if (!
isIntN(OffsetSize,
Op.getImm()) || (!AllowNegative &&
Op.getImm() < 0)) {
4443 Twine(
"expected a ") +
4444 (AllowNegative ?
Twine(OffsetSize) +
"-bit signed offset"
4445 :
Twine(OffsetSize - 1) +
"-bit unsigned offset"));
4454 for (
unsigned i = 2, e =
Operands.size(); i != e; ++i) {
4455 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4456 if (
Op.isSMEMOffset() ||
Op.isSMEMOffsetMod())
4457 return Op.getStartLoc();
4462bool AMDGPUAsmParser::validateSMEMOffset(
const MCInst &Inst,
4488 : (
isVI() || IsBuffer) ?
"expected a 20-bit unsigned offset"
4489 :
"expected a 21-bit signed offset");
4494bool AMDGPUAsmParser::validateSOPLiteral(
const MCInst &Inst)
const {
4503 const int OpIndices[] = { Src0Idx, Src1Idx };
4505 unsigned NumExprs = 0;
4506 unsigned NumLiterals = 0;
4509 for (
int OpIdx : OpIndices) {
4510 if (OpIdx == -1)
break;
4515 if (MO.
isImm() && !isInlineConstant(Inst, OpIdx)) {
4517 if (NumLiterals == 0 || LiteralValue !=
Value) {
4521 }
else if (MO.
isExpr()) {
4527 return NumLiterals + NumExprs <= 1;
4530bool AMDGPUAsmParser::validateOpSel(
const MCInst &Inst) {
4544 if (OpSelIdx != -1) {
4549 if (OpSelHiIdx != -1) {
4567bool AMDGPUAsmParser::validateNeg(
const MCInst &Inst,
int OpName) {
4592 int SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4593 AMDGPU::OpName::src1_modifiers,
4594 AMDGPU::OpName::src2_modifiers};
4596 for (
unsigned i = 0; i < 3; ++i) {
4606bool AMDGPUAsmParser::validateDPP(
const MCInst &Inst,
4610 if (DppCtrlIdx >= 0) {
4617 Error(S,
"DP ALU dpp only supports row_newbcast");
4623 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
4632 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[Src1Idx]);
4633 Error(
Op.getStartLoc(),
"invalid operand for instruction");
4643bool AMDGPUAsmParser::validateVccOperand(
unsigned Reg)
const {
4644 auto FB = getFeatureBits();
4645 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4646 (FB[AMDGPU::FeatureWavefrontSize32] &&
Reg == AMDGPU::VCC_LO);
4650bool AMDGPUAsmParser::validateVOPLiteral(
const MCInst &Inst,
4656 !HasMandatoryLiteral && !
isVOPD(Opcode))
4661 unsigned NumExprs = 0;
4662 unsigned NumLiterals = 0;
4665 for (
int OpIdx : OpIndices) {
4675 if (MO.
isImm() && !isInlineConstant(Inst, OpIdx)) {
4681 if (!IsValid32Op && !isInt<32>(
Value) && !isUInt<32>(
Value)) {
4682 Error(getLitLoc(
Operands),
"invalid operand for instruction");
4686 if (IsFP64 && IsValid32Op)
4689 if (NumLiterals == 0 || LiteralValue !=
Value) {
4693 }
else if (MO.
isExpr()) {
4697 NumLiterals += NumExprs;
4702 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4703 Error(getLitLoc(
Operands),
"literal operands are not supported");
4707 if (NumLiterals > 1) {
4708 Error(getLitLoc(
Operands,
true),
"only one unique literal operand is allowed");
4726 unsigned Sub =
MRI->getSubReg(
Op.getReg(), AMDGPU::sub0);
4727 auto Reg = Sub ? Sub :
Op.getReg();
4729 return AGPR32.
contains(Reg) ? 1 : 0;
4732bool AMDGPUAsmParser::validateAGPRLdSt(
const MCInst &Inst)
const {
4740 : AMDGPU::OpName::vdata;
4748 if (Data2Areg >= 0 && Data2Areg != DataAreg)
4752 auto FB = getFeatureBits();
4753 if (FB[AMDGPU::FeatureGFX90AInsts]) {
4754 if (DataAreg < 0 || DstAreg < 0)
4756 return DstAreg == DataAreg;
4759 return DstAreg < 1 && DataAreg < 1;
4762bool AMDGPUAsmParser::validateVGPRAlign(
const MCInst &Inst)
const {
4763 auto FB = getFeatureBits();
4764 if (!FB[AMDGPU::FeatureGFX90AInsts])
4775 unsigned Sub =
MRI->getSubReg(
Op.getReg(), AMDGPU::sub0);
4779 if (VGPR32.
contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4781 if (AGPR32.
contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4789 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
4790 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
4792 return Op.getStartLoc();
4797bool AMDGPUAsmParser::validateBLGP(
const MCInst &Inst,
4807 auto FB = getFeatureBits();
4808 bool UsesNeg =
false;
4809 if (FB[AMDGPU::FeatureGFX940Insts]) {
4811 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4812 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4813 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4814 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4819 if (IsNeg == UsesNeg)
4823 UsesNeg ?
"invalid modifier: blgp is not supported"
4824 :
"invalid modifier: neg is not supported");
4829bool AMDGPUAsmParser::validateWaitCnt(
const MCInst &Inst,
4835 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4836 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4837 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4838 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4844 if (Reg == AMDGPU::SGPR_NULL)
4848 Error(RegLoc,
"src0 must be null");
4852bool AMDGPUAsmParser::validateDS(
const MCInst &Inst,
4858 return validateGWS(Inst,
Operands);
4869 Error(S,
"gds modifier is not supported on this GPU");
4877bool AMDGPUAsmParser::validateGWS(
const MCInst &Inst,
4879 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4883 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4884 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4893 auto RegIdx =
Reg - (VGPR32.
contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4896 Error(RegLoc,
"vgpr must be even aligned");
4903bool AMDGPUAsmParser::validateCoherencyBits(
const MCInst &Inst,
4905 const SMLoc &IDLoc) {
4907 AMDGPU::OpName::cpol);
4914 return validateTHAndScopeBits(Inst,
Operands, CPol);
4920 Error(S,
"cache policy is not supported for SMRD instructions");
4924 Error(IDLoc,
"invalid cache policy for SMEM instruction");
4933 if (!(TSFlags & AllowSCCModifier)) {
4938 "scc modifier is not supported for this instruction on this GPU");
4949 :
"instruction must use glc");
4957 &CStr.data()[CStr.find(
isGFX940() ?
"sc0" :
"glc")]);
4959 :
"instruction must not use glc");
4967bool AMDGPUAsmParser::validateTHAndScopeBits(
const MCInst &Inst,
4969 const unsigned CPol) {
4973 const unsigned Opcode = Inst.
getOpcode();
4985 return PrintError(
"instruction must use th:TH_ATOMIC_RETURN");
4993 return PrintError(
"invalid th value for SMEM instruction");
5000 return PrintError(
"scope and th combination is not valid");
5009 return PrintError(
"invalid th value for atomic instructions");
5010 }
else if (IsStore) {
5012 return PrintError(
"invalid th value for store instructions");
5015 return PrintError(
"invalid th value for load instructions");
5025 if (!Operand->isReg())
5027 unsigned Reg = Operand->getReg();
5028 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
5030 "execz and vccz are not supported on this GPU");
5037bool AMDGPUAsmParser::validateTFE(
const MCInst &Inst,
5040 if (
Desc.mayStore() &&
5044 Error(Loc,
"TFE modifier has no meaning for store instructions");
5052bool AMDGPUAsmParser::validateInstruction(
const MCInst &Inst,
5055 if (
auto ErrMsg = validateLdsDirect(Inst)) {
5059 if (!validateSOPLiteral(Inst)) {
5061 "only one unique literal operand is allowed");
5064 if (!validateVOPLiteral(Inst,
Operands)) {
5067 if (!validateConstantBusLimitations(Inst,
Operands)) {
5070 if (!validateVOPDRegBankConstraints(Inst,
Operands)) {
5073 if (!validateIntClampSupported(Inst)) {
5075 "integer clamping is not supported on this GPU");
5078 if (!validateOpSel(Inst)) {
5080 "invalid op_sel operand");
5083 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5085 "invalid neg_lo operand");
5088 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5090 "invalid neg_hi operand");
5093 if (!validateDPP(Inst,
Operands)) {
5097 if (!validateMIMGD16(Inst)) {
5099 "d16 modifier is not supported on this GPU");
5102 if (!validateMIMGMSAA(Inst)) {
5104 "invalid dim; must be MSAA type");
5107 if (!validateMIMGDataSize(Inst, IDLoc)) {
5110 if (!validateMIMGAddrSize(Inst, IDLoc))
5112 if (!validateMIMGAtomicDMask(Inst)) {
5114 "invalid atomic image dmask");
5117 if (!validateMIMGGatherDMask(Inst)) {
5119 "invalid image_gather dmask: only one bit must be set");
5122 if (!validateMovrels(Inst,
Operands)) {
5125 if (!validateOffset(Inst,
Operands)) {
5128 if (!validateMAIAccWrite(Inst,
Operands)) {
5131 if (!validateMAISrc2(Inst,
Operands)) {
5134 if (!validateMFMA(Inst,
Operands)) {
5137 if (!validateCoherencyBits(Inst,
Operands, IDLoc)) {
5141 if (!validateAGPRLdSt(Inst)) {
5142 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5143 ?
"invalid register class: data and dst should be all VGPR or AGPR"
5144 :
"invalid register class: agpr loads and stores not supported on this GPU"
5148 if (!validateVGPRAlign(Inst)) {
5150 "invalid register class: vgpr tuples must be 64 bit aligned");
5157 if (!validateBLGP(Inst,
Operands)) {
5161 if (!validateDivScale(Inst)) {
5162 Error(IDLoc,
"ABS not allowed in VOP3B instructions");
5165 if (!validateWaitCnt(Inst,
Operands)) {
5168 if (!validateExeczVcczOperands(
Operands)) {
5171 if (!validateTFE(Inst,
Operands)) {
5180 unsigned VariantID = 0);
5184 unsigned VariantID);
5186bool AMDGPUAsmParser::isSupportedMnemo(
StringRef Mnemo,
5191bool AMDGPUAsmParser::isSupportedMnemo(
StringRef Mnemo,
5194 for (
auto Variant : Variants) {
5202bool AMDGPUAsmParser::checkUnsupportedInstruction(
StringRef Mnemo,
5203 const SMLoc &IDLoc) {
5204 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5207 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5212 getParser().clearPendingErrors();
5216 StringRef VariantName = getMatchedVariantName();
5217 if (!VariantName.
empty() && isSupportedMnemo(Mnemo, FBS)) {
5220 " variant of this instruction is not supported"));
5224 if (
isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5225 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5228 FeaturesWS32.
flip(AMDGPU::FeatureWavefrontSize64)
5229 .
flip(AMDGPU::FeatureWavefrontSize32);
5231 ComputeAvailableFeatures(FeaturesWS32);
5233 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5234 return Error(IDLoc,
"instruction requires wavesize=32");
5239 return Error(IDLoc,
"instruction not supported on this GPU");
5244 return Error(IDLoc,
"invalid instruction" + Suggestion);
5250 const auto &
Op = ((AMDGPUOperand &)*
Operands[InvalidOprIdx]);
5251 if (
Op.isToken() && InvalidOprIdx > 1) {
5252 const auto &PrevOp = ((AMDGPUOperand &)*
Operands[InvalidOprIdx - 1]);
5253 return PrevOp.isToken() && PrevOp.getToken() ==
"::";
5258bool AMDGPUAsmParser::MatchAndEmitInstruction(
SMLoc IDLoc,
unsigned &Opcode,
5262 bool MatchingInlineAsm) {
5264 unsigned Result = Match_Success;
5265 for (
auto Variant : getMatchedVariants()) {
5267 auto R = MatchInstructionImpl(
Operands, Inst, EI, MatchingInlineAsm,
5272 if ((R == Match_Success) ||
5273 (R == Match_PreferE32) ||
5274 (R == Match_MissingFeature && Result != Match_PreferE32) ||
5275 (R == Match_InvalidOperand && Result != Match_MissingFeature
5276 && Result != Match_PreferE32) ||
5277 (R == Match_MnemonicFail && Result != Match_InvalidOperand
5278 && Result != Match_MissingFeature
5279 && Result != Match_PreferE32)) {
5283 if (R == Match_Success)
5287 if (Result == Match_Success) {
5288 if (!validateInstruction(Inst, IDLoc,
Operands)) {
5297 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5303 case Match_MissingFeature:
5307 return Error(IDLoc,
"operands are not valid for this GPU or mode");
5309 case Match_InvalidOperand: {
5310 SMLoc ErrorLoc = IDLoc;
5313 return Error(IDLoc,
"too few operands for instruction");
5316 if (ErrorLoc ==
SMLoc())
5320 return Error(ErrorLoc,
"invalid VOPDY instruction");
5322 return Error(ErrorLoc,
"invalid operand for instruction");
5325 case Match_PreferE32:
5326 return Error(IDLoc,
"internal error: instruction without _e64 suffix "
5327 "should be encoded as e32");
5328 case Match_MnemonicFail:
5334bool AMDGPUAsmParser::ParseAsAbsoluteExpression(
uint32_t &Ret) {
5339 if (getParser().parseAbsoluteExpression(Tmp)) {
5346bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5348 return TokError(
"directive only supported for amdgcn architecture");
5350 std::string TargetIDDirective;
5351 SMLoc TargetStart = getTok().getLoc();
5352 if (getParser().parseEscapedString(TargetIDDirective))
5356 if (getTargetStreamer().getTargetID()->
toString() != TargetIDDirective)
5357 return getParser().Error(TargetRange.
Start,
5358 (
Twine(
".amdgcn_target directive's target id ") +
5359 Twine(TargetIDDirective) +
5360 Twine(
" does not match the specified target id ") +
5361 Twine(getTargetStreamer().getTargetID()->
toString())).str());
5366bool AMDGPUAsmParser::OutOfRangeError(
SMRange Range) {
5367 return Error(
Range.Start,
"value out of range", Range);
5370bool AMDGPUAsmParser::calculateGPRBlocks(
5371 const FeatureBitset &Features,
bool VCCUsed,
bool FlatScrUsed,
5372 bool XNACKUsed, std::optional<bool> EnableWavefrontSize32,
5373 unsigned NextFreeVGPR,
SMRange VGPRRange,
unsigned NextFreeSGPR,
5374 SMRange SGPRRange,
unsigned &VGPRBlocks,
unsigned &SGPRBlocks) {
5385 unsigned MaxAddressableNumSGPRs =
5388 if (
Version.Major >= 8 && !Features.
test(FeatureSGPRInitBug) &&
5389 NumSGPRs > MaxAddressableNumSGPRs)
5390 return OutOfRangeError(SGPRRange);
5395 if ((
Version.Major <= 7 || Features.
test(FeatureSGPRInitBug)) &&
5396 NumSGPRs > MaxAddressableNumSGPRs)
5397 return OutOfRangeError(SGPRRange);
5399 if (Features.
test(FeatureSGPRInitBug))
5404 EnableWavefrontSize32);
5410bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5412 return TokError(
"directive only supported for amdgcn architecture");
5415 return TokError(
"directive only supported for amdhsa OS");
5418 if (getParser().parseIdentifier(KernelName))
5423 &getSTI(), getContext());
5439 unsigned ImpliedUserSGPRCount = 0;
5443 std::optional<unsigned> ExplicitUserSGPRCount;
5444 bool ReserveVCC =
true;
5445 bool ReserveFlatScr =
true;
5446 std::optional<bool> EnableWavefrontSize32;
5452 SMRange IDRange = getTok().getLocRange();
5453 if (!parseId(
ID,
"expected .amdhsa_ directive or .end_amdhsa_kernel"))
5456 if (
ID ==
".end_amdhsa_kernel")
5460 return TokError(
".amdhsa_ directives cannot be repeated");
5462 SMLoc ValStart = getLoc();
5464 if (getParser().parseExpression(ExprVal))
5466 SMLoc ValEnd = getLoc();
5471 bool EvaluatableExpr;
5472 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
5474 return OutOfRangeError(ValRange);
5478#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
5479 if (!isUInt<ENTRY##_WIDTH>(Val)) \
5480 return OutOfRangeError(RANGE); \
5481 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
5486#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
5488 return Error(IDRange.Start, "directive should have resolvable expression", \
5491 if (
ID ==
".amdhsa_group_segment_fixed_size") {
5494 return OutOfRangeError(ValRange);
5496 }
else if (
ID ==
".amdhsa_private_segment_fixed_size") {
5499 return OutOfRangeError(ValRange);
5501 }
else if (
ID ==
".amdhsa_kernarg_size") {
5503 return OutOfRangeError(ValRange);
5505 }
else if (
ID ==
".amdhsa_user_sgpr_count") {
5507 ExplicitUserSGPRCount = Val;
5508 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_buffer") {
5512 "directive is not supported with architected flat scratch",
5515 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5518 ImpliedUserSGPRCount += 4;
5519 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_preload_length") {
5522 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5525 return OutOfRangeError(ValRange);
5529 ImpliedUserSGPRCount += Val;
5530 PreloadLength = Val;
5532 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_preload_offset") {
5535 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5538 return OutOfRangeError(ValRange);
5542 PreloadOffset = Val;
5543 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_ptr") {
5546 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
5549 ImpliedUserSGPRCount += 2;
5550 }
else if (
ID ==
".amdhsa_user_sgpr_queue_ptr") {
5553 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
5556 ImpliedUserSGPRCount += 2;
5557 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_segment_ptr") {
5560 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5563 ImpliedUserSGPRCount += 2;
5564 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_id") {
5567 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
5570 ImpliedUserSGPRCount += 2;
5571 }
else if (
ID ==
".amdhsa_user_sgpr_flat_scratch_init") {
5574 "directive is not supported with architected flat scratch",
5578 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
5581 ImpliedUserSGPRCount += 2;
5582 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_size") {
5585 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5588 ImpliedUserSGPRCount += 1;
5589 }
else if (
ID ==
".amdhsa_wavefront_size32") {
5591 if (IVersion.
Major < 10)
5592 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5593 EnableWavefrontSize32 = Val;
5595 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
5597 }
else if (
ID ==
".amdhsa_uses_dynamic_stack") {
5599 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
5601 }
else if (
ID ==
".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5604 "directive is not supported with architected flat scratch",
5607 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5609 }
else if (
ID ==
".amdhsa_enable_private_segment") {
5613 "directive is not supported without architected flat scratch",
5616 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5618 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_x") {
5620 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
5622 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_y") {
5624 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
5626 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_z") {
5628 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
5630 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_info") {
5632 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
5634 }
else if (
ID ==
".amdhsa_system_vgpr_workitem_id") {
5636 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
5638 }
else if (
ID ==
".amdhsa_next_free_vgpr") {
5640 VGPRRange = ValRange;
5642 }
else if (
ID ==
".amdhsa_next_free_sgpr") {
5644 SGPRRange = ValRange;
5646 }
else if (
ID ==
".amdhsa_accum_offset") {
5648 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5651 }
else if (
ID ==
".amdhsa_reserve_vcc") {
5653 if (!isUInt<1>(Val))
5654 return OutOfRangeError(ValRange);
5656 }
else if (
ID ==
".amdhsa_reserve_flat_scratch") {
5658 if (IVersion.
Major < 7)
5659 return Error(IDRange.
Start,
"directive requires gfx7+", IDRange);
5662 "directive is not supported with architected flat scratch",
5664 if (!isUInt<1>(Val))
5665 return OutOfRangeError(ValRange);
5666 ReserveFlatScr = Val;
5667 }
else if (
ID ==
".amdhsa_reserve_xnack_mask") {
5668 if (IVersion.
Major < 8)
5669 return Error(IDRange.
Start,
"directive requires gfx8+", IDRange);
5670 if (!isUInt<1>(Val))
5671 return OutOfRangeError(ValRange);
5672 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5673 return getParser().Error(IDRange.
Start,
".amdhsa_reserve_xnack_mask does not match target id",
5675 }
else if (
ID ==
".amdhsa_float_round_mode_32") {
5677 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
5679 }
else if (
ID ==
".amdhsa_float_round_mode_16_64") {
5681 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
5683 }
else if (
ID ==
".amdhsa_float_denorm_mode_32") {
5685 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
5687 }
else if (
ID ==
".amdhsa_float_denorm_mode_16_64") {
5689 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
5691 }
else if (
ID ==
".amdhsa_dx10_clamp") {
5692 if (IVersion.
Major >= 12)
5693 return Error(IDRange.
Start,
"directive unsupported on gfx12+", IDRange);
5695 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
5697 }
else if (
ID ==
".amdhsa_ieee_mode") {
5698 if (IVersion.
Major >= 12)
5699 return Error(IDRange.
Start,
"directive unsupported on gfx12+", IDRange);
5701 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
5703 }
else if (
ID ==
".amdhsa_fp16_overflow") {
5704 if (IVersion.
Major < 9)
5705 return Error(IDRange.
Start,
"directive requires gfx9+", IDRange);
5707 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
5709 }
else if (
ID ==
".amdhsa_tg_split") {
5711 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
5714 }
else if (
ID ==
".amdhsa_workgroup_processor_mode") {
5715 if (IVersion.
Major < 10)
5716 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5718 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
5720 }
else if (
ID ==
".amdhsa_memory_ordered") {
5721 if (IVersion.
Major < 10)
5722 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5724 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
5726 }
else if (
ID ==
".amdhsa_forward_progress") {
5727 if (IVersion.
Major < 10)
5728 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
5730 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
5732 }
else if (
ID ==
".amdhsa_shared_vgpr_count") {
5734 if (IVersion.
Major < 10 || IVersion.
Major >= 12)
5735 return Error(IDRange.
Start,
"directive requires gfx10 or gfx11",
5737 SharedVGPRCount = Val;
5739 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
5741 }
else if (
ID ==
".amdhsa_exception_fp_ieee_invalid_op") {
5744 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
5746 }
else if (
ID ==
".amdhsa_exception_fp_denorm_src") {
5748 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5750 }
else if (
ID ==
".amdhsa_exception_fp_ieee_div_zero") {
5753 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
5755 }
else if (
ID ==
".amdhsa_exception_fp_ieee_overflow") {
5757 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5759 }
else if (
ID ==
".amdhsa_exception_fp_ieee_underflow") {
5761 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5763 }
else if (
ID ==
".amdhsa_exception_fp_ieee_inexact") {
5765 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5767 }
else if (
ID ==
".amdhsa_exception_int_div_zero") {
5769 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5771 }
else if (
ID ==
".amdhsa_round_robin_scheduling") {
5772 if (IVersion.
Major < 12)
5773 return Error(IDRange.
Start,
"directive requires gfx12+", IDRange);
5775 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
5778 return Error(IDRange.
Start,
"unknown .amdhsa_kernel directive", IDRange);
5781#undef PARSE_BITS_ENTRY
5784 if (!Seen.
contains(
".amdhsa_next_free_vgpr"))
5785 return TokError(
".amdhsa_next_free_vgpr directive is required");
5787 if (!Seen.
contains(
".amdhsa_next_free_sgpr"))
5788 return TokError(
".amdhsa_next_free_sgpr directive is required");
5790 unsigned VGPRBlocks;
5791 unsigned SGPRBlocks;
5792 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5793 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5794 EnableWavefrontSize32, NextFreeVGPR,
5795 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5799 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5801 return OutOfRangeError(VGPRRange);
5804 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
5805 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
5807 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5809 return OutOfRangeError(SGPRRange);
5812 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
5813 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
5815 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5816 return TokError(
"amdgpu_user_sgpr_count smaller than than implied by "
5817 "enabled user SGPRs");
5819 unsigned UserSGPRCount =
5820 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5822 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5823 return TokError(
"too many user SGPRs enabled");
5826 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
5827 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, getContext());
5831 return TokError(
"Kernarg size should be resolvable");
5833 if (PreloadLength && kernarg_size &&
5834 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
5835 return TokError(
"Kernarg preload length + offset is larger than the "
5836 "kernarg segment size");
5839 if (!Seen.
contains(
".amdhsa_accum_offset"))
5840 return TokError(
".amdhsa_accum_offset directive is required");
5841 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5842 return TokError(
"accum_offset should be in range [4..256] in "
5845 return TokError(
"accum_offset exceeds total VGPR allocation");
5849 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
5850 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, getContext());
5853 if (IVersion.
Major >= 10 && IVersion.
Major < 12) {
5855 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
5856 return TokError(
"shared_vgpr_count directive not valid on "
5857 "wavefront size 32");
5859 if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5860 return TokError(
"shared_vgpr_count*2 + "
5861 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5866 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
5867 NextFreeVGPR, NextFreeSGPR,
5868 ReserveVCC, ReserveFlatScr);
5872bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
5874 if (ParseAsAbsoluteExpression(Version))
5877 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
5881bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(
StringRef ID,
5885 if (
ID ==
"max_scratch_backing_memory_byte_size") {
5886 Parser.eatToEndOfStatement();
5893 return TokError(Err.str());
5897 if (
ID ==
"enable_dx10_clamp") {
5900 return TokError(
"enable_dx10_clamp=1 is not allowed on GFX12+");
5903 if (
ID ==
"enable_ieee_mode") {
5906 return TokError(
"enable_ieee_mode=1 is not allowed on GFX12+");
5909 if (
ID ==
"enable_wavefront_size32") {
5912 return TokError(
"enable_wavefront_size32=1 is only allowed on GFX10+");
5913 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5914 return TokError(
"enable_wavefront_size32=1 requires +WavefrontSize32");
5916 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5917 return TokError(
"enable_wavefront_size32=0 requires +WavefrontSize64");
5921 if (
ID ==
"wavefront_size") {
5922 if (Header.wavefront_size == 5) {
5924 return TokError(
"wavefront_size=5 is only allowed on GFX10+");
5925 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5926 return TokError(
"wavefront_size=5 requires +WavefrontSize32");
5927 }
else if (Header.wavefront_size == 6) {
5928 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5929 return TokError(
"wavefront_size=6 requires +WavefrontSize64");
5933 if (
ID ==
"enable_wgp_mode") {
5936 return TokError(
"enable_wgp_mode=1 is only allowed on GFX10+");
5939 if (
ID ==
"enable_mem_ordered") {
5942 return TokError(
"enable_mem_ordered=1 is only allowed on GFX10+");
5945 if (
ID ==
"enable_fwd_progress") {
5948 return TokError(
"enable_fwd_progress=1 is only allowed on GFX10+");
5954bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5964 if (!parseId(
ID,
"expected value identifier or .end_amd_kernel_code_t"))
5967 if (
ID ==
".end_amd_kernel_code_t")
5970 if (ParseAMDKernelCodeTValue(
ID, Header))
5974 getTargetStreamer().EmitAMDKernelCodeT(Header);
5979bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5981 if (!parseId(KernelName,
"expected symbol name"))
5984 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5987 KernelScope.initialize(getContext());
5991bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5993 return Error(getLoc(),
5994 ".amd_amdgpu_isa directive is not available on non-amdgcn "
5998 auto TargetIDDirective = getLexer().getTok().getStringContents();
5999 if (getTargetStreamer().getTargetID()->
toString() != TargetIDDirective)
6000 return Error(getParser().getTok().getLoc(),
"target id must match options");
6002 getTargetStreamer().EmitISAVersion();
6008bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6011 std::string HSAMetadataString;
6016 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6017 return Error(getLoc(),
"invalid HSA metadata");
6024bool AMDGPUAsmParser::ParseToEndDirective(
const char *AssemblerDirectiveBegin,
6025 const char *AssemblerDirectiveEnd,
6026 std::string &CollectString) {
6030 getLexer().setSkipSpace(
false);
6032 bool FoundEnd =
false;
6035 CollectStream << getTokenStr();
6039 if (trySkipId(AssemblerDirectiveEnd)) {
6044 CollectStream << Parser.parseStringToEndOfStatement()
6045 << getContext().getAsmInfo()->getSeparatorString();
6047 Parser.eatToEndOfStatement();
6050 getLexer().setSkipSpace(
true);
6053 return TokError(
Twine(
"expected directive ") +
6054 Twine(AssemblerDirectiveEnd) +
Twine(
" not found"));
6057 CollectStream.flush();
6062bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6068 auto PALMetadata = getTargetStreamer().getPALMetadata();
6069 if (!PALMetadata->setFromString(
String))
6070 return Error(getLoc(),
"invalid PAL metadata");
6075bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6077 return Error(getLoc(),
6079 "not available on non-amdpal OSes")).str());
6082 auto PALMetadata = getTargetStreamer().getPALMetadata();
6083 PALMetadata->setLegacy();
6086 if (ParseAsAbsoluteExpression(Key)) {
6087 return TokError(
Twine(
"invalid value in ") +
6091 return TokError(
Twine(
"expected an even number of values in ") +
6094 if (ParseAsAbsoluteExpression(
Value)) {
6095 return TokError(
Twine(
"invalid value in ") +
6098 PALMetadata->setRegister(Key,
Value);
6107bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6108 if (getParser().checkForValidSection())
6112 SMLoc NameLoc = getLoc();
6113 if (getParser().parseIdentifier(
Name))
6114 return TokError(
"expected identifier in directive");
6117 if (getParser().parseComma())
6123 SMLoc SizeLoc = getLoc();
6124 if (getParser().parseAbsoluteExpression(
Size))
6127 return Error(SizeLoc,
"size must be non-negative");
6128 if (
Size > LocalMemorySize)
6129 return Error(SizeLoc,
"size is too large");
6131 int64_t Alignment = 4;
6133 SMLoc AlignLoc = getLoc();
6134 if (getParser().parseAbsoluteExpression(Alignment))
6137 return Error(AlignLoc,
"alignment must be a power of two");
6142 if (Alignment >= 1u << 31)
6143 return Error(AlignLoc,
"alignment is too large");
6149 Symbol->redefineIfPossible();
6150 if (!
Symbol->isUndefined())
6151 return Error(NameLoc,
"invalid symbol redefinition");
6153 getTargetStreamer().emitAMDGPULDS(Symbol,
Size,
Align(Alignment));
6157bool AMDGPUAsmParser::ParseDirective(
AsmToken DirectiveID) {
6161 if (IDVal ==
".amdhsa_kernel")
6162 return ParseDirectiveAMDHSAKernel();
6164 if (IDVal ==
".amdhsa_code_object_version")
6165 return ParseDirectiveAMDHSACodeObjectVersion();
6169 return ParseDirectiveHSAMetadata();
6171 if (IDVal ==
".amd_kernel_code_t")
6172 return ParseDirectiveAMDKernelCodeT();
6174 if (IDVal ==
".amdgpu_hsa_kernel")
6175 return ParseDirectiveAMDGPUHsaKernel();
6177 if (IDVal ==
".amd_amdgpu_isa")
6178 return ParseDirectiveISAVersion();
6182 Twine(
" directive is "
6183 "not available on non-amdhsa OSes"))
6188 if (IDVal ==
".amdgcn_target")
6189 return ParseDirectiveAMDGCNTarget();
6191 if (IDVal ==
".amdgpu_lds")
6192 return ParseDirectiveAMDGPULDS();
6195 return ParseDirectivePALMetadataBegin();
6198 return ParseDirectivePALMetadata();
6206 if (
MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
6210 if (
MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
6211 return hasSGPR104_SGPR105();
6214 case AMDGPU::SRC_SHARED_BASE_LO:
6215 case AMDGPU::SRC_SHARED_BASE:
6216 case AMDGPU::SRC_SHARED_LIMIT_LO:
6217 case AMDGPU::SRC_SHARED_LIMIT:
6218 case AMDGPU::SRC_PRIVATE_BASE_LO:
6219 case AMDGPU::SRC_PRIVATE_BASE:
6220 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
6221 case AMDGPU::SRC_PRIVATE_LIMIT:
6223 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
6226 case AMDGPU::TBA_LO:
6227 case AMDGPU::TBA_HI:
6229 case AMDGPU::TMA_LO:
6230 case AMDGPU::TMA_HI:
6232 case AMDGPU::XNACK_MASK:
6233 case AMDGPU::XNACK_MASK_LO:
6234 case AMDGPU::XNACK_MASK_HI:
6235 return (
isVI() ||
isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6236 case AMDGPU::SGPR_NULL:
6250 case AMDGPU::FLAT_SCR:
6251 case AMDGPU::FLAT_SCR_LO:
6252 case AMDGPU::FLAT_SCR_HI:
6261 if (
MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
6262 return hasSGPR102_SGPR103();
6275 Res = MatchOperandParserImpl(
Operands, Mnemonic);
6287 SMLoc LBraceLoc = getLoc();
6292 auto Loc = getLoc();
6295 Error(Loc,
"expected a register");
6299 RBraceLoc = getLoc();
6304 "expected a comma or a closing square bracket"))
6308 if (
Operands.size() - Prefix > 1) {
6310 AMDGPUOperand::CreateToken(
this,
"[", LBraceLoc));
6311 Operands.push_back(AMDGPUOperand::CreateToken(
this,
"]", RBraceLoc));
6322 setForcedEncodingSize(0);
6323 setForcedDPP(
false);
6324 setForcedSDWA(
false);
6326 if (
Name.ends_with(
"_e64_dpp")) {
6328 setForcedEncodingSize(64);
6329 return Name.substr(0,
Name.size() - 8);
6330 }
else if (
Name.ends_with(
"_e64")) {
6331 setForcedEncodingSize(64);
6332 return Name.substr(0,
Name.size() - 4);
6333 }
else if (
Name.ends_with(
"_e32")) {
6334 setForcedEncodingSize(32);
6335 return Name.substr(0,
Name.size() - 4);
6336 }
else if (
Name.ends_with(
"_dpp")) {
6338 return Name.substr(0,
Name.size() - 4);
6339 }
else if (
Name.ends_with(
"_sdwa")) {
6340 setForcedSDWA(
true);
6341 return Name.substr(0,
Name.size() - 5);
6348 unsigned VariantID);
6360 Operands.push_back(AMDGPUOperand::CreateToken(
this,
Name, NameLoc));
6362 bool IsMIMG =
Name.starts_with(
"image_");
6365 OperandMode Mode = OperandMode_Default;
6367 Mode = OperandMode_NSA;
6371 checkUnsupportedInstruction(
Name, NameLoc);
6372 if (!Parser.hasPendingError()) {
6375 :
"not a valid operand.";
6376 Error(getLoc(), Msg);
6398 if (!trySkipId(
Name))
6401 Operands.push_back(AMDGPUOperand::CreateToken(
this,
Name, S));
6405ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
const char *Prefix,
6416 std::function<
bool(int64_t &)> ConvertResult) {
6424 if (ConvertResult && !ConvertResult(
Value)) {
6428 Operands.push_back(AMDGPUOperand::CreateImm(
this,
Value, S, ImmTy));
6432ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
6434 bool (*ConvertResult)(int64_t &)) {
6443 const unsigned MaxSize = 4;
6447 for (
int I = 0; ; ++
I) {
6449 SMLoc Loc = getLoc();
6453 if (
Op != 0 &&
Op != 1)
6461 if (
I + 1 == MaxSize)
6462 return Error(getLoc(),
"expected a closing square bracket");
6468 Operands.push_back(AMDGPUOperand::CreateImm(
this, Val, S, ImmTy));
6474 AMDGPUOperand::ImmTy ImmTy) {
6478 if (trySkipId(
Name)) {
6480 }
else if (trySkipId(
"no",
Name)) {
6487 return Error(S,
"r128 modifier is not supported on this GPU");
6489 return Error(S,
"a16 modifier is not supported on this GPU");
6491 if (
isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
6492 ImmTy = AMDGPUOperand::ImmTyR128A16;
6494 Operands.push_back(AMDGPUOperand::CreateImm(
this, Bit, S, ImmTy));
6499 bool &Disabling)
const {
6500 Disabling =
Id.consume_front(
"no");
6520 SMLoc StringLoc = getLoc();
6522 int64_t CPolVal = 0;
6540 ResScope = parseScope(
Operands, Scope);
6555 Operands.push_back(AMDGPUOperand::CreateImm(
this, CPolVal, StringLoc,
6556 AMDGPUOperand::ImmTyCPol));
6561 SMLoc OpLoc = getLoc();
6562 unsigned Enabled = 0, Seen = 0;
6566 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
6573 return Error(S,
"dlc modifier is not supported on this GPU");
6576 return Error(S,
"scc modifier is not supported on this GPU");
6579 return Error(S,
"duplicate cache policy modifier");
6591 AMDGPUOperand::CreateImm(
this,
Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
6603 Res = parseStringWithPrefix(
"scope",
Value, StringLoc);
6614 if (Scope == 0xffffffff)
6615 return Error(StringLoc,
"invalid scope value");
6629 if (
Value ==
"TH_DEFAULT")
6631 else if (
Value ==
"TH_STORE_LU" ||
Value ==
"TH_LOAD_RT_WB" ||
6632 Value ==
"TH_LOAD_NT_WB") {
6633 return Error(StringLoc,
"invalid th value");
6634 }
else if (
Value.consume_front(
"TH_ATOMIC_")) {
6636 }
else if (
Value.consume_front(
"TH_LOAD_")) {
6638 }
else if (
Value.consume_front(
"TH_STORE_")) {
6641 return Error(StringLoc,
"invalid th value");
6644 if (
Value ==
"BYPASS")
6675 if (TH == 0xffffffff)
6676 return Error(StringLoc,
"invalid th value");
6683 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
6684 AMDGPUOperand::ImmTy ImmT,
6686 auto i = OptionalIdx.find(ImmT);
6687 if (i != OptionalIdx.end()) {
6688 unsigned Idx = i->second;
6689 ((AMDGPUOperand &)*
Operands[
Idx]).addImmOperands(Inst, 1);
6701 StringLoc = getLoc();
6710bool AMDGPUAsmParser::tryParseFmt(
const char *Pref,
6714 SMLoc Loc = getLoc();
6716 auto Res = parseIntWithPrefix(Pref, Val);
6722 if (Val < 0 || Val > MaxVal) {
6732 AMDGPUOperand::ImmTy ImmTy) {
6733 const char *Pref =
"index_key";
6735 SMLoc Loc = getLoc();
6736 auto Res = parseIntWithPrefix(Pref, ImmVal);
6740 if (ImmTy == AMDGPUOperand::ImmTyIndexKey16bit && (ImmVal < 0 || ImmVal > 1))
6743 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
6746 Operands.push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc, ImmTy));
6751 return tryParseIndexKey(
Operands, AMDGPUOperand::ImmTyIndexKey8bit);
6755 return tryParseIndexKey(
Operands, AMDGPUOperand::ImmTyIndexKey16bit);
6760ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6767 for (
int I = 0;
I < 2; ++
I) {
6768 if (Dfmt == DFMT_UNDEF && !tryParseFmt(
"dfmt", DFMT_MAX, Dfmt))
6771 if (Nfmt == NFMT_UNDEF && !tryParseFmt(
"nfmt", NFMT_MAX, Nfmt))
6776 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6782 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6785 Dfmt = (Dfmt ==
DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6786 Nfmt = (Nfmt ==
NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6792ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6797 if (!tryParseFmt(
"format", UFMT_MAX, Fmt))
6800 if (Fmt == UFMT_UNDEF)
6807bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6815 if (Format != DFMT_UNDEF) {
6821 if (Format != NFMT_UNDEF) {
6826 Error(Loc,
"unsupported format");
6837 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6842 SMLoc Loc = getLoc();
6843 if (!parseId(Str,
"expected a format string") ||
6844 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
6846 if (Dfmt == DFMT_UNDEF)
6847 return Error(Loc,
"duplicate numeric format");
6848 if (Nfmt == NFMT_UNDEF)
6849 return Error(Loc,
"duplicate data format");
6852 Dfmt = (Dfmt ==
DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6853 Nfmt = (Nfmt ==
NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6857 if (Ufmt == UFMT_UNDEF)
6858 return Error(FormatLoc,
"unsupported format");
6873 if (Id == UFMT_UNDEF)
6877 return Error(Loc,
"unified format is not supported on this GPU");
6883ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6885 SMLoc Loc = getLoc();
6887 if (!parseExpr(Format))
6890 return Error(Loc,
"out of range format");
6895ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6903 SMLoc Loc = getLoc();
6904 if (!parseId(FormatStr,
"expected a format string"))
6907 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6909 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6919 return parseNumericFormat(Format);
6927 SMLoc Loc = getLoc();
6937 AMDGPUOperand::CreateImm(
this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6956 Res = parseSymbolicOrNumericFormat(Format);
6961 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands[
Size - 2]);
6962 assert(
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6969 return Error(getLoc(),
"duplicate format");
6975 parseIntWithPrefix(
"offset",
Operands, AMDGPUOperand::ImmTyOffset);
6977 Res = parseIntWithPrefix(
"inst_offset",
Operands,
6978 AMDGPUOperand::ImmTyInstOffset);
6985 parseNamedBit(
"r128",
Operands, AMDGPUOperand::ImmTyR128A16);
6987 Res = parseNamedBit(
"a16",
Operands, AMDGPUOperand::ImmTyA16);
6993 parseIntWithPrefix(
"blgp",
Operands, AMDGPUOperand::ImmTyBLGP);
6996 parseOperandArrayWithPrefix(
"neg",
Operands, AMDGPUOperand::ImmTyBLGP);
7006 OptionalImmIndexMap OptionalIdx;
7008 unsigned OperandIdx[4];
7009 unsigned EnMask = 0;
7012 for (
unsigned i = 1, e =
Operands.size(); i != e; ++i) {
7013 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
7018 OperandIdx[SrcIdx] = Inst.
size();
7019 Op.addRegOperands(Inst, 1);
7026 OperandIdx[SrcIdx] = Inst.
size();
7032 if (
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7033 Op.addImmOperands(Inst, 1);
7037 if (
Op.isToken() && (
Op.getToken() ==
"done" ||
Op.getToken() ==
"row_en"))
7041 OptionalIdx[
Op.getImmTy()] = i;
7047 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7054 for (
auto i = 0; i < SrcIdx; ++i) {
7056 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7081 IntVal =
encode(ISA, IntVal, CntVal);
7082 if (CntVal !=
decode(ISA, IntVal)) {
7084 IntVal =
encode(ISA, IntVal, -1);
7092bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7094 SMLoc CntLoc = getLoc();
7102 SMLoc ValLoc = getLoc();
7103 if (!parseExpr(CntVal))
7111 if (CntName ==
"vmcnt" || CntName ==
"vmcnt_sat") {
7113 }
else if (CntName ==
"expcnt" || CntName ==
"expcnt_sat") {
7115 }
else if (CntName ==
"lgkmcnt" || CntName ==
"lgkmcnt_sat") {
7118 Error(CntLoc,
"invalid counter name " + CntName);
7123 Error(ValLoc,
"too large value for " + CntName);
7132 Error(getLoc(),
"expected a counter name");
7159bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7160 SMLoc FieldLoc = getLoc();
7166 SMLoc ValueLoc = getLoc();
7173 if (FieldName ==
"instid0") {
7175 }
else if (FieldName ==
"instskip") {
7177 }
else if (FieldName ==
"instid1") {
7180 Error(FieldLoc,
"invalid field name " + FieldName);
7199 .
Case(
"VALU_DEP_1", 1)
7200 .
Case(
"VALU_DEP_2", 2)
7201 .
Case(
"VALU_DEP_3", 3)
7202 .
Case(
"VALU_DEP_4", 4)
7203 .
Case(
"TRANS32_DEP_1", 5)
7204 .
Case(
"TRANS32_DEP_2", 6)
7205 .
Case(
"TRANS32_DEP_3", 7)
7206 .
Case(
"FMA_ACCUM_CYCLE_1", 8)
7207 .
Case(
"SALU_CYCLE_1", 9)
7208 .
Case(
"SALU_CYCLE_2", 10)
7209 .
Case(
"SALU_CYCLE_3", 11)
7217 Delay |=
Value << Shift;
7227 if (!parseDelay(Delay))
7231 if (!parseExpr(Delay))
7235 Operands.push_back(AMDGPUOperand::CreateImm(
this, Delay, S));
7240AMDGPUOperand::isSWaitCnt()
const {
7244bool AMDGPUOperand::isSDelayALU()
const {
return isImm(); }
7250void AMDGPUAsmParser::depCtrError(
SMLoc Loc,
int ErrorId,
7254 Error(Loc,
Twine(
"invalid counter name ", DepCtrName));
7257 Error(Loc,
Twine(DepCtrName,
" is not supported on this GPU"));
7260 Error(Loc,
Twine(
"duplicate counter name ", DepCtrName));
7263 Error(Loc,
Twine(
"invalid value for ", DepCtrName));
7270bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr,
unsigned &UsedOprMask) {
7274 SMLoc DepCtrLoc = getLoc();
7282 if (!parseExpr(ExprVal))
7285 unsigned PrevOprMask = UsedOprMask;
7286 int CntVal =
encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
7289 depCtrError(DepCtrLoc, CntVal, DepCtrName);
7298 Error(getLoc(),
"expected a counter name");
7303 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
7304 DepCtr = (DepCtr & ~CntValMask) | CntVal;
7312 SMLoc Loc = getLoc();
7315 unsigned UsedOprMask = 0;
7317 if (!parseDepCtr(DepCtr, UsedOprMask))
7321 if (!parseExpr(DepCtr))
7325 Operands.push_back(AMDGPUOperand::CreateImm(
this, DepCtr, Loc));
7329bool AMDGPUOperand::isDepCtr()
const {
return isS16Imm(); }
7335ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
7337 OperandInfoTy &Width) {
7344 HwReg.Loc = getLoc();
7347 HwReg.IsSymbolic =
true;
7349 }
else if (!parseExpr(HwReg.Val,
"a register name")) {
7357 if (!skipToken(
AsmToken::Comma,
"expected a comma or a closing parenthesis"))
7361 if (!parseExpr(
Offset.Val))
7367 Width.Loc = getLoc();
7368 if (!parseExpr(Width.Val) ||
7379 SMLoc Loc = getLoc();
7381 StructuredOpField HwReg(
"id",
"hardware register", HwregId::Width,
7383 StructuredOpField
Offset(
"offset",
"bit offset", HwregOffset::Width,
7384 HwregOffset::Default);
7385 struct : StructuredOpField {
7386 using StructuredOpField::StructuredOpField;
7387 bool validate(AMDGPUAsmParser &Parser)
const override {
7389 return Error(Parser,
"only values from 1 to 32 are legal");
7392 } Width(
"size",
"bitfield width", HwregSize::Width, HwregSize::Default);
7396 Res = parseHwregFunc(HwReg,
Offset, Width);
7399 if (!validateStructuredOpFields({&HwReg, &
Offset, &Width}))
7401 ImmVal = HwregEncoding::encode(HwReg.Val,
Offset.Val, Width.Val);
7405 parseExpr(ImmVal,
"a hwreg macro, structured immediate"))
7411 if (!isUInt<16>(ImmVal))
7412 return Error(Loc,
"invalid immediate: only 16-bit values are legal");
7414 AMDGPUOperand::CreateImm(
this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
7418bool AMDGPUOperand::isHwreg()
const {
7419 return isImmTy(ImmTyHwreg);
7427AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
7429 OperandInfoTy &Stream) {
7435 Msg.IsSymbolic =
true;
7437 }
else if (!parseExpr(
Msg.Val,
"a message name")) {
7442 Op.IsDefined =
true;
7447 }
else if (!parseExpr(
Op.Val,
"an operation name")) {
7452 Stream.IsDefined =
true;
7453 Stream.Loc = getLoc();
7454 if (!parseExpr(Stream.Val))
7463AMDGPUAsmParser::validateSendMsg(
const OperandInfoTy &Msg,
7464 const OperandInfoTy &
Op,
7465 const OperandInfoTy &Stream) {
7475 Error(
Msg.Loc,
"specified message id is not supported on this GPU");
7480 Error(
Msg.Loc,
"invalid message id");
7486 Error(
Op.Loc,
"message does not support operations");
7488 Error(
Msg.Loc,
"missing message operation");
7493 Error(
Op.Loc,
"invalid operation id");
7498 Error(Stream.Loc,
"message operation does not support streams");
7502 Error(Stream.Loc,
"invalid message stream id");
7512 SMLoc Loc = getLoc();
7516 OperandInfoTy
Op(OP_NONE_);
7517 OperandInfoTy Stream(STREAM_ID_NONE_);
7518 if (parseSendMsgBody(Msg,
Op, Stream) &&
7519 validateSendMsg(Msg,
Op, Stream)) {
7524 }
else if (parseExpr(ImmVal,
"a sendmsg macro")) {
7525 if (ImmVal < 0 || !isUInt<16>(ImmVal))
7526 return Error(Loc,
"invalid immediate: only 16-bit values are legal");
7531 Operands.push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
7535bool AMDGPUOperand::isSendMsg()
const {
7536 return isImmTy(ImmTySendMsg);
7557 return Error(S,
"invalid interpolation slot");
7559 Operands.push_back(AMDGPUOperand::CreateImm(
this, Slot, S,
7560 AMDGPUOperand::ImmTyInterpSlot));
7571 if (!Str.starts_with(
"attr"))
7572 return Error(S,
"invalid interpolation attribute");
7582 return Error(S,
"invalid or missing interpolation attribute channel");
7584 Str = Str.drop_back(2).drop_front(4);
7587 if (Str.getAsInteger(10, Attr))
7588 return Error(S,
"invalid or missing interpolation attribute number");
7591 return Error(S,
"out of bounds interpolation attribute number");
7595 Operands.push_back(AMDGPUOperand::CreateImm(
this, Attr, S,
7596 AMDGPUOperand::ImmTyInterpAttr));
7597 Operands.push_back(AMDGPUOperand::CreateImm(
7598 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
7617 return Error(S, (
Id == ET_INVALID)
7618 ?
"invalid exp target"
7619 :
"exp target is not supported on this GPU");
7621 Operands.push_back(AMDGPUOperand::CreateImm(
this,
Id, S,
7622 AMDGPUOperand::ImmTyExpTgt));
7637 return isId(getToken(),
Id);
7642 return getTokenKind() ==
Kind;
7645StringRef AMDGPUAsmParser::getId()
const {
7672 if (isId(
Id) && peekToken().is(Kind)) {
7682 if (isToken(Kind)) {
7692 if (!trySkipToken(Kind)) {
7693 Error(getLoc(), ErrMsg);
7704 if (Parser.parseExpression(Expr))
7707 if (Expr->evaluateAsAbsolute(Imm))
7711 Error(S,
"expected absolute expression");
7714 Twine(
" or an absolute expression"));
7724 if (Parser.parseExpression(Expr))
7728 if (Expr->evaluateAsAbsolute(IntVal)) {
7729 Operands.push_back(AMDGPUOperand::CreateImm(
this, IntVal, S));
7731 Operands.push_back(AMDGPUOperand::CreateExpr(
this, Expr, S));
7739 Val = getToken().getStringContents();
7743 Error(getLoc(), ErrMsg);
7751 Val = getTokenStr();
7755 if (!ErrMsg.
empty())
7756 Error(getLoc(), ErrMsg);
7762AMDGPUAsmParser::getToken()
const {
7763 return Parser.getTok();
7766AsmToken AMDGPUAsmParser::peekToken(
bool ShouldSkipSpace) {
7769 : getLexer().peekTok(ShouldSkipSpace);
7774 auto TokCount = getLexer().peekTokens(Tokens);
7781AMDGPUAsmParser::getTokenKind()
const {
7786AMDGPUAsmParser::getLoc()
const {
7787 return getToken().getLoc();
7791AMDGPUAsmParser::getTokenStr()
const {
7792 return getToken().getString();
7796AMDGPUAsmParser::lex() {
7801 return ((AMDGPUOperand &)*
Operands[0]).getStartLoc();
7805AMDGPUAsmParser::getOperandLoc(std::function<
bool(
const AMDGPUOperand&)>
Test,
7807 for (
unsigned i =
Operands.size() - 1; i > 0; --i) {
7808 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
7810 return Op.getStartLoc();
7816AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy
Type,
7818 auto Test = [=](
const AMDGPUOperand&
Op) {
return Op.isImmTy(
Type); };
7823AMDGPUAsmParser::getRegLoc(
unsigned Reg,
7825 auto Test = [=](
const AMDGPUOperand&
Op) {
7826 return Op.isRegKind() &&
Op.getReg() ==
Reg;
7832 bool SearchMandatoryLiterals)
const {
7833 auto Test = [](
const AMDGPUOperand&
Op) {
7834 return Op.IsImmKindLiteral() ||
Op.isExpr();
7837 if (SearchMandatoryLiterals && Loc == getInstLoc(
Operands))
7838 Loc = getMandatoryLitLoc(
Operands);
7843 auto Test = [](
const AMDGPUOperand &
Op) {
7844 return Op.IsImmKindMandatoryLiteral();
7851 auto Test = [](
const AMDGPUOperand&
Op) {
7852 return Op.isImmKindConst();
7869 SMLoc IdLoc = getLoc();
7875 find_if(Fields, [
Id](StructuredOpField *
F) {
return F->Id ==
Id; });
7876 if (
I == Fields.
end())
7877 return Error(IdLoc,
"unknown field");
7878 if ((*I)->IsDefined)
7879 return Error(IdLoc,
"duplicate field");
7882 (*I)->Loc = getLoc();
7883 if (!parseExpr((*I)->Val))
7885 (*I)->IsDefined =
true;
7892bool AMDGPUAsmParser::validateStructuredOpFields(
7894 return all_of(Fields, [
this](
const StructuredOpField *
F) {
7895 return F->validate(*
this);
7906 const unsigned OrMask,
7907 const unsigned XorMask) {
7910 return BITMASK_PERM_ENC |
7911 (AndMask << BITMASK_AND_SHIFT) |
7912 (OrMask << BITMASK_OR_SHIFT) |
7913 (XorMask << BITMASK_XOR_SHIFT);
7917AMDGPUAsmParser::parseSwizzleOperand(int64_t &
Op,
7918 const unsigned MinVal,
7919 const unsigned MaxVal,
7926 if (!parseExpr(
Op)) {
7929 if (Op < MinVal || Op > MaxVal) {
7938AMDGPUAsmParser::parseSwizzleOperands(
const unsigned OpNum, int64_t*
Op,
7939 const unsigned MinVal,
7940 const unsigned MaxVal,
7943 for (
unsigned i = 0; i < OpNum; ++i) {
7944 if (!parseSwizzleOperand(
Op[i], MinVal, MaxVal, ErrMsg, Loc))
7952AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7956 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7957 "expected a 2-bit lane id")) {
7968AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7975 if (!parseSwizzleOperand(GroupSize,
7977 "group size must be in the interval [2,32]",
7982 Error(Loc,
"group size must be a power of two");
7985 if (parseSwizzleOperand(LaneIdx,
7987 "lane id must be in the interval [0,group size - 1]",
7996AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8002 if (!parseSwizzleOperand(GroupSize,
8004 "group size must be in the interval [2,32]",
8009 Error(Loc,
"group size must be a power of two");
8018AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8024 if (!parseSwizzleOperand(GroupSize,
8026 "group size must be in the interval [1,16]",
8031 Error(Loc,
"group size must be a power of two");
8040AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8048 SMLoc StrLoc = getLoc();
8049 if (!parseString(Ctl)) {
8052 if (Ctl.
size() != BITMASK_WIDTH) {
8053 Error(StrLoc,
"expected a 5-character mask");
8057 unsigned AndMask = 0;
8058 unsigned OrMask = 0;
8059 unsigned XorMask = 0;
8061 for (
size_t i = 0; i < Ctl.
size(); ++i) {
8065 Error(StrLoc,
"invalid mask");
8087AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8089 SMLoc OffsetLoc = getLoc();
8091 if (!parseExpr(Imm,
"a swizzle macro")) {
8094 if (!isUInt<16>(Imm)) {
8095 Error(OffsetLoc,
"expected a 16-bit offset");
8102AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8107 SMLoc ModeLoc = getLoc();
8110 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8111 Ok = parseSwizzleQuadPerm(Imm);
8112 }
else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8113 Ok = parseSwizzleBitmaskPerm(Imm);
8114 }
else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8115 Ok = parseSwizzleBroadcast(Imm);
8116 }
else if (trySkipId(IdSymbolic[ID_SWAP])) {
8117 Ok = parseSwizzleSwap(Imm);
8118 }
else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8119 Ok = parseSwizzleReverse(Imm);
8121 Error(ModeLoc,
"expected a swizzle mode");
8124 return Ok && skipToken(
AsmToken::RParen,
"expected a closing parentheses");
8134 if (trySkipId(
"offset")) {
8138 if (trySkipId(
"swizzle")) {
8139 Ok = parseSwizzleMacro(Imm);
8141 Ok = parseSwizzleOffset(Imm);
8145 Operands.push_back(AMDGPUOperand::CreateImm(
this, Imm, S, AMDGPUOperand::ImmTySwizzle));
8153AMDGPUOperand::isSwizzle()
const {
8154 return isImmTy(ImmTySwizzle);
8161int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8175 for (
unsigned ModeId = ID_MIN; ModeId <=
ID_MAX; ++ModeId) {
8176 if (trySkipId(IdSymbolic[ModeId])) {
8183 Error(S, (Imm == 0)?
8184 "expected a VGPR index mode or a closing parenthesis" :
8185 "expected a VGPR index mode");
8190 Error(S,
"duplicate VGPR index mode");
8198 "expected a comma or a closing parenthesis"))
8213 Imm = parseGPRIdxMacro();
8217 if (getParser().parseAbsoluteExpression(Imm))
8219 if (Imm < 0 || !isUInt<4>(Imm))
8220 return Error(S,
"invalid immediate: only 4-bit values are legal");
8224 AMDGPUOperand::CreateImm(
this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
8228bool AMDGPUOperand::isGPRIdxMode()
const {
8229 return isImmTy(ImmTyGprIdxMode);
8241 if (isRegister() || isModifier())
8253 if (
Opr.isExpr() && !
Opr.isSymbolRefExpr()) {
8254 Error(Loc,
"expected an absolute expression or a label");
8255 }
else if (
Opr.isImm() && !
Opr.isS16Imm()) {
8256 Error(Loc,
"expected a 16-bit signed jump offset");
8274void AMDGPUAsmParser::cvtMubufImpl(
MCInst &Inst,
8277 OptionalImmIndexMap OptionalIdx;
8278 unsigned FirstOperandIdx = 1;
8279 bool IsAtomicReturn =
false;
8286 for (
unsigned i = FirstOperandIdx, e =
Operands.size(); i != e; ++i) {
8287 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
8291 Op.addRegOperands(Inst, 1);
8295 if (IsAtomicReturn && i == FirstOperandIdx)
8296 Op.addRegOperands(Inst, 1);
8301 if (
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
8302 Op.addImmOperands(Inst, 1);
8314 OptionalIdx[
Op.getImmTy()] = i;
8325bool AMDGPUOperand::isSMRDOffset8()
const {
8326 return isImmLiteral() && isUInt<8>(getImm());
8329bool AMDGPUOperand::isSMEMOffset()
const {
8331 return isImmLiteral();
8334bool AMDGPUOperand::isSMRDLiteralOffset()
const {
8337 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
8369bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
8370 if (BoundCtrl == 0 || BoundCtrl == 1) {
8378void AMDGPUAsmParser::onBeginOfFile() {
8379 if (!getParser().getStreamer().getTargetStreamer() ||
8383 if (!getTargetStreamer().getTargetID())
8384 getTargetStreamer().initializeTargetID(getSTI(),
8385 getSTI().getFeatureString());
8388 getTargetStreamer().EmitDirectiveAMDGCNTarget();
8396bool AMDGPUAsmParser::parsePrimaryExpr(
const MCExpr *&Res,
SMLoc &EndLoc) {
8402 .
Case(
"max", AGVK::AGVK_Max)
8403 .
Case(
"or", AGVK::AGVK_Or)
8413 if (Exprs.
empty()) {
8414 Error(getToken().getLoc(),
8415 "empty " +
Twine(TokenId) +
" expression");
8418 if (CommaCount + 1 != Exprs.
size()) {
8419 Error(getToken().getLoc(),
8420 "mismatch of commas in " +
Twine(TokenId) +
" expression");
8427 if (getParser().parseExpression(Expr, EndLoc))
8431 if (LastTokenWasComma)
8434 Error(getToken().getLoc(),
8435 "unexpected token in " +
Twine(TokenId) +
" expression");
8441 return getParser().parsePrimaryExpr(Res, EndLoc,
nullptr);
8446 if (
Name ==
"mul") {
8447 return parseIntWithPrefix(
"mul",
Operands,
8451 if (
Name ==
"div") {
8452 return parseIntWithPrefix(
"div",
Operands,
8468 const int Ops[] = { AMDGPU::OpName::src0,
8469 AMDGPU::OpName::src1,
8470 AMDGPU::OpName::src2 };
8485 if (
DstOp.isReg() &&
8486 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(
DstOp.
getReg())) {
8490 if ((OpSel & (1 << SrcNum)) != 0)
8496void AMDGPUAsmParser::cvtVOP3OpSel(
MCInst &Inst,
8503 OptionalImmIndexMap &OptionalIdx) {
8504 cvtVOP3P(Inst,
Operands, OptionalIdx);
8513 &&
Desc.NumOperands > (OpNum + 1)
8515 &&
Desc.operands()[OpNum + 1].RegClass != -1
8517 &&
Desc.getOperandConstraint(OpNum + 1,
8518 MCOI::OperandConstraint::TIED_TO) == -1;
8523 OptionalImmIndexMap OptionalIdx;
8528 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
8529 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
8532 for (
unsigned E =
Operands.size();
I != E; ++
I) {
8533 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
8535 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8536 }
else if (
Op.isInterpSlot() ||
Op.isInterpAttr() ||
8537 Op.isInterpAttrChan()) {
8539 }
else if (
Op.isImmModifier()) {
8540 OptionalIdx[
Op.getImmTy()] =
I;
8548 AMDGPUOperand::ImmTyHigh);
8552 AMDGPUOperand::ImmTyClampSI);
8556 AMDGPUOperand::ImmTyOModSI);
8561 OptionalImmIndexMap OptionalIdx;
8566 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
8567 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
8570 for (
unsigned E =
Operands.size();
I != E; ++
I) {
8571 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
8573 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8574 }
else if (
Op.isImmModifier()) {
8575 OptionalIdx[
Op.getImmTy()] =
I;
8592 const int Ops[] = { AMDGPU::OpName::src0,
8593 AMDGPU::OpName::src1,
8594 AMDGPU::OpName::src2 };
8595 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8596 AMDGPU::OpName::src1_modifiers,
8597 AMDGPU::OpName::src2_modifiers };
8601 for (
int J = 0; J < 3; ++J) {
8609 if ((OpSel & (1 << J)) != 0)
8611 if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8612 (OpSel & (1 << 3)) != 0)
8620 OptionalImmIndexMap &OptionalIdx) {
8625 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
8626 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
8629 for (
unsigned E =
Operands.size();
I != E; ++
I) {
8630 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
8632 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8633 }
else if (
Op.isImmModifier()) {
8634 OptionalIdx[
Op.getImmTy()] =
I;
8635 }
else if (
Op.isRegOrImm()) {
8636 Op.addRegOrImmOperands(Inst, 1);
8644 AMDGPUOperand::ImmTyClampSI);
8648 AMDGPUOperand::ImmTyOModSI);
8655 auto it = Inst.
begin();
8665 OptionalImmIndexMap OptionalIdx;
8666 cvtVOP3(Inst,
Operands, OptionalIdx);
8670 OptionalImmIndexMap &OptIdx) {
8676 if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
8677 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
8678 Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_gfx12 ||
8679 Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_gfx12) {
8687 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp_gfx12 ||
8688 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp_gfx12 ||
8689 Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp8_gfx12 ||
8690 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12)) {
8699 if (OpSelIdx != -1) {
8704 if (OpSelHiIdx != -1) {
8718 const int Ops[] = { AMDGPU::OpName::src0,
8719 AMDGPU::OpName::src1,
8720 AMDGPU::OpName::src2 };
8721 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8722 AMDGPU::OpName::src1_modifiers,
8723 AMDGPU::OpName::src2_modifiers };
8726 unsigned OpSelHi = 0;
8733 if (OpSelHiIdx != -1)
8742 for (
int J = 0; J < 3; ++J) {
8755 if (
SrcOp.isReg() && getMRI()
8762 if ((OpSel & (1 << J)) != 0)
8766 if ((OpSelHi & (1 << J)) != 0)
8769 if ((NegLo & (1 << J)) != 0)
8772 if ((NegHi & (1 << J)) != 0)
8780 OptionalImmIndexMap OptIdx;
8786 unsigned i,
unsigned Opc,
unsigned OpName) {
8788 ((AMDGPUOperand &)*
Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
8790 ((AMDGPUOperand &)*
Operands[i]).addRegOperands(Inst, 1);
8796 ((AMDGPUOperand &)*
Operands[1]).addRegOperands(Inst, 1);
8799 ((AMDGPUOperand &)*
Operands[1]).addRegOperands(Inst, 1);
8800 ((AMDGPUOperand &)*
Operands[4]).addRegOperands(Inst, 1);
8802 OptionalImmIndexMap OptIdx;
8803 for (
unsigned i = 5; i <
Operands.size(); ++i) {
8804 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[i]);
8805 OptIdx[
Op.getImmTy()] = i;
8810 AMDGPUOperand::ImmTyIndexKey8bit);
8814 AMDGPUOperand::ImmTyIndexKey16bit);
8834 Operands.push_back(AMDGPUOperand::CreateToken(
this,
"::", S));
8835 SMLoc OpYLoc = getLoc();
8838 Operands.push_back(AMDGPUOperand::CreateToken(
this, OpYName, OpYLoc));
8841 return Error(OpYLoc,
"expected a VOPDY instruction after ::");
8848 auto addOp = [&](
uint16_t ParsedOprIdx) {
8849 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[ParsedOprIdx]);
8851 Op.addRegOperands(Inst, 1);
8855 Op.addImmOperands(Inst, 1);
8867 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
8871 const auto &CInfo = InstInfo[CompIdx];
8872 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
8873 for (
unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
8874 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
8875 if (CInfo.hasSrc2Acc())
8876 addOp(CInfo.getIndexOfDstInParsedOperands());
8884bool AMDGPUOperand::isDPP8()
const {
8885 return isImmTy(ImmTyDPP8);
8888bool AMDGPUOperand::isDPPCtrl()
const {
8889 using namespace AMDGPU::DPP;
8891 bool result =
isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8893 int64_t
Imm = getImm();
8894 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8895 (
Imm >= DppCtrl::ROW_SHL_FIRST &&
Imm <= DppCtrl::ROW_SHL_LAST) ||
8896 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8897 (
Imm >= DppCtrl::ROW_ROR_FIRST &&
Imm <= DppCtrl::ROW_ROR_LAST) ||
8898 (Imm == DppCtrl::WAVE_SHL1) ||
8899 (
Imm == DppCtrl::WAVE_ROL1) ||
8900 (Imm == DppCtrl::WAVE_SHR1) ||
8901 (
Imm == DppCtrl::WAVE_ROR1) ||
8902 (Imm == DppCtrl::ROW_MIRROR) ||
8903 (
Imm == DppCtrl::ROW_HALF_MIRROR) ||
8904 (Imm == DppCtrl::BCAST15) ||
8905 (
Imm == DppCtrl::BCAST31) ||
8906 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8907 (
Imm >= DppCtrl::ROW_XMASK_FIRST &&
Imm <= DppCtrl::ROW_XMASK_LAST);
8916bool AMDGPUOperand::isBLGP()
const {
8917 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8920bool AMDGPUOperand::isCBSZ()
const {
8921 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
8924bool AMDGPUOperand::isABID()
const {
8925 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
8928bool AMDGPUOperand::isS16Imm()
const {
8929 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8932bool AMDGPUOperand::isU16Imm()
const {
8933 return isImmLiteral() && isUInt<16>(getImm());
8940bool AMDGPUAsmParser::parseDimId(
unsigned &Encoding) {
8945 SMLoc Loc = getToken().getEndLoc();
8946 Token = std::string(getTokenStr());
8948 if (getLoc() != Loc)
8953 if (!parseId(Suffix))
8979 SMLoc Loc = getLoc();
8980 if (!parseDimId(Encoding))
8981 return Error(Loc,
"invalid dim value");
8983 Operands.push_back(AMDGPUOperand::CreateImm(
this, Encoding, S,
8984 AMDGPUOperand::ImmTyDim));
9002 if (!skipToken(
AsmToken::LBrac,
"expected an opening square bracket"))
9005 for (
size_t i = 0; i < 8; ++i) {
9009 SMLoc Loc = getLoc();
9010 if (getParser().parseAbsoluteExpression(Sels[i]))
9012 if (0 > Sels[i] || 7 < Sels[i])
9013 return Error(Loc,
"expected a 3-bit value");
9020 for (
size_t i = 0; i < 8; ++i)
9021 DPP8 |= (Sels[i] << (i * 3));
9023 Operands.push_back(AMDGPUOperand::CreateImm(
this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
9028AMDGPUAsmParser::isSupportedDPPCtrl(
StringRef Ctrl,
9030 if (Ctrl ==
"row_newbcast")
9033 if (Ctrl ==
"row_share" ||
9034 Ctrl ==
"row_xmask")
9037 if (Ctrl ==
"wave_shl" ||
9038 Ctrl ==
"wave_shr" ||
9039 Ctrl ==
"wave_rol" ||
9040 Ctrl ==
"wave_ror" ||
9041 Ctrl ==
"row_bcast")
9044 return Ctrl ==
"row_mirror" ||
9045 Ctrl ==
"row_half_mirror" ||
9046 Ctrl ==
"quad_perm" ||
9047 Ctrl ==
"row_shl" ||
9048 Ctrl ==
"row_shr" ||
9053AMDGPUAsmParser::parseDPPCtrlPerm() {
9056 if (!skipToken(
AsmToken::LBrac,
"expected an opening square bracket"))
9060 for (
int i = 0; i < 4; ++i) {
9065 SMLoc Loc = getLoc();
9066 if (getParser().parseAbsoluteExpression(Temp))
9068 if (Temp < 0 || Temp > 3) {
9069 Error(Loc,
"expected a 2-bit value");
9073 Val += (Temp << i * 2);
9083AMDGPUAsmParser::parseDPPCtrlSel(
StringRef Ctrl) {
9084 using namespace AMDGPU::DPP;
9089 SMLoc Loc = getLoc();
9091 if (getParser().parseAbsoluteExpression(Val))
9094 struct DppCtrlCheck {
9101 .
Case(
"wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
9102 .Case(
"wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
9103 .Case(
"wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
9104 .Case(
"wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
9105 .Case(
"row_shl", {DppCtrl::ROW_SHL0, 1, 15})
9106 .Case(
"row_shr", {DppCtrl::ROW_SHR0, 1, 15})
9107 .Case(
"row_ror", {DppCtrl::ROW_ROR0, 1, 15})
9108 .Case(
"row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
9109 .Case(
"row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
9110 .Case(
"row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
9114 if (
Check.Ctrl == -1) {
9115 Valid = (
Ctrl ==
"row_bcast" && (Val == 15 || Val == 31));
9116 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
9131 using namespace AMDGPU::DPP;
9134 !isSupportedDPPCtrl(getTokenStr(),
Operands))
9143 if (Ctrl ==
"row_mirror") {
9144 Val = DppCtrl::ROW_MIRROR;
9145 }
else if (Ctrl ==
"row_half_mirror") {
9146 Val = DppCtrl::ROW_HALF_MIRROR;
9149 if (Ctrl ==
"quad_perm") {
9150 Val = parseDPPCtrlPerm();
9152 Val = parseDPPCtrlSel(Ctrl);
9161 AMDGPUOperand::CreateImm(
this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
9167 OptionalImmIndexMap OptionalIdx;
9177 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
9181 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9182 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
9186 for (
unsigned E =
Operands.size();
I != E; ++
I) {
9190 if (OldIdx == NumOperands) {
9192 constexpr int DST_IDX = 0;
9194 }
else if (Src2ModIdx == NumOperands) {
9205 bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp8_gfx12 ||
9206 Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp8_gfx12 ||
9207 Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp_gfx12 ||
9208 Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp_gfx12;
9209 if (IsVOP3CvtSrDpp) {
9223 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
9225 if (IsDPP8 &&
Op.isDppFI()) {
9228 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9229 }
else if (
Op.isReg()) {
9230 Op.addRegOperands(Inst, 1);
9231 }
else if (
Op.isImm() &&
9233 assert(!
Op.IsImmKindLiteral() &&
"Cannot use literal with DPP");
9234 Op.addImmOperands(Inst, 1);
9235 }
else if (
Op.isImm()) {
9236 OptionalIdx[
Op.getImmTy()] =
I;
9248 cvtVOP3P(Inst,
Operands, OptionalIdx);
9250 cvtVOP3OpSel(Inst,
Operands, OptionalIdx);
9267 AMDGPUOperand::ImmTyDppFI);
9272 OptionalImmIndexMap OptionalIdx;
9276 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9277 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
9281 for (
unsigned E =
Operands.size();
I != E; ++
I) {
9289 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
9291 if (
Op.isReg() && validateVccOperand(
Op.getReg())) {
9299 Op.addImmOperands(Inst, 1);
9301 Op.addRegWithFPInputModsOperands(Inst, 2);
9302 }
else if (
Op.isDppFI()) {
9304 }
else if (
Op.isReg()) {
9305 Op.addRegOperands(Inst, 1);
9311 Op.addRegWithFPInputModsOperands(Inst, 2);
9312 }
else if (
Op.isReg()) {
9313 Op.addRegOperands(Inst, 1);
9314 }
else if (
Op.isDPPCtrl()) {
9315 Op.addImmOperands(Inst, 1);
9316 }
else if (
Op.isImm()) {
9318 OptionalIdx[
Op.getImmTy()] =
I;
9334 AMDGPUOperand::ImmTyDppFI);
9345 AMDGPUOperand::ImmTy
Type) {
9358 .
Case(
"BYTE_0", SdwaSel::BYTE_0)
9359 .
Case(
"BYTE_1", SdwaSel::BYTE_1)
9360 .
Case(
"BYTE_2", SdwaSel::BYTE_2)
9361 .
Case(
"BYTE_3", SdwaSel::BYTE_3)
9362 .
Case(
"WORD_0", SdwaSel::WORD_0)
9363 .
Case(
"WORD_1", SdwaSel::WORD_1)
9364 .
Case(
"DWORD", SdwaSel::DWORD)
9367 if (
Int == 0xffffffff)
9368 return Error(StringLoc,
"invalid " +
Twine(Prefix) +
" value");
9387 .
Case(
"UNUSED_PAD", DstUnused::UNUSED_PAD)
9388 .
Case(
"UNUSED_SEXT", DstUnused::UNUSED_SEXT)
9389 .
Case(
"UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
9392 if (
Int == 0xffffffff)
9393 return Error(StringLoc,
"invalid dst_unused value");
9395 Operands.push_back(AMDGPUOperand::CreateImm(
this,
Int, S, AMDGPUOperand::ImmTySDWADstUnused));
9425 OptionalImmIndexMap OptionalIdx;
9426 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
9427 bool SkippedVcc =
false;
9431 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9432 ((AMDGPUOperand &)*
Operands[
I++]).addRegOperands(Inst, 1);
9435 for (
unsigned E =
Operands.size();
I != E; ++
I) {
9436 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
9437 if (SkipVcc && !SkippedVcc &&
Op.isReg() &&
9438 (
Op.getReg() == AMDGPU::VCC ||
Op.getReg() == AMDGPU::VCC_LO)) {
9456 Op.addRegOrImmWithInputModsOperands(Inst, 2);
9457 }
else if (
Op.isImm()) {
9459 OptionalIdx[
Op.getImmTy()] =
I;
9467 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
9468 Opc != AMDGPU::V_NOP_sdwa_vi) {
9470 switch (BasicInstType) {
9474 AMDGPUOperand::ImmTyClampSI, 0);
9478 AMDGPUOperand::ImmTyOModSI, 0);
9482 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9486 AMDGPUOperand::ImmTySDWADstUnused,
9487 DstUnused::UNUSED_PRESERVE);
9512 llvm_unreachable(
"Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
9518 if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
9519 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
9520 auto it = Inst.
begin();
9533#define GET_REGISTER_MATCHER
9534#define GET_MATCHER_IMPLEMENTATION
9535#define GET_MNEMONIC_SPELL_CHECKER
9536#define GET_MNEMONIC_CHECKER
9537#include "AMDGPUGenAsmMatcher.inc"
9543 return parseTokenOp(
"addr64",
Operands);
9545 return parseTokenOp(
"done",
Operands);
9547 return parseTokenOp(
"idxen",
Operands);
9549 return parseTokenOp(
"lds",
Operands);
9551 return parseTokenOp(
"offen",
Operands);
9553 return parseTokenOp(
"off",
Operands);
9555 return parseTokenOp(
"row_en",
Operands);
9557 return parseNamedBit(
"gds",
Operands, AMDGPUOperand::ImmTyGDS);
9559 return parseNamedBit(
"tfe",
Operands, AMDGPUOperand::ImmTyTFE);
9561 return tryCustomParseOperand(
Operands, MCK);
9572 AMDGPUOperand &Operand = (AMDGPUOperand&)
Op;
9575 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
9577 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
9579 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
9581 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
9583 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
9585 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
9593 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
9595 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
9596 case MCK_SOPPBrTarget:
9597 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
9598 case MCK_VReg32OrOff:
9599 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
9600 case MCK_InterpSlot:
9601 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
9602 case MCK_InterpAttr:
9603 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
9604 case MCK_InterpAttrChan:
9605 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
9607 case MCK_SReg_64_XEXEC:
9613 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
9615 return Match_InvalidOperand;
9627 if (!parseExpr(Imm)) {
9632 if (!isUInt<16>(Imm))
9633 return Error(S,
"expected a 16-bit value");
9636 AMDGPUOperand::CreateImm(
this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
9640bool AMDGPUOperand::isEndpgm()
const {
return isImmTy(ImmTyEndpgm); }
9646bool AMDGPUOperand::isWaitVDST()
const {
9647 return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm());
9650bool AMDGPUOperand::isWaitVAVDst()
const {
9651 return isImmTy(ImmTyWaitVAVDst) && isUInt<4>(getImm());
9654bool AMDGPUOperand::isWaitVMVSrc()
const {
9655 return isImmTy(ImmTyWaitVMVSrc) && isUInt<1>(getImm());
9662bool AMDGPUOperand::isWaitEXP()
const {
9663 return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm());
9670bool AMDGPUOperand::isSplitBarrier()
const {
return isInlinableImm(MVT::i32); }
unsigned const MachineRegisterInfo * MRI
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static unsigned getSpecialRegForName(StringRef RegName)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, unsigned OpName)
static bool IsRevOpcode(const unsigned Opcode)
static int getRegClass(RegisterKind Is, unsigned RegWidth)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, const MCRegisterInfo *MRI)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
#define LLVM_EXTERNAL_VISIBILITY
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static unsigned getOperandSize(MachineInstr &MI, unsigned Idx, MachineRegisterInfo &MRI)
static llvm::Expected< InlineInfo > decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
#define G_00B848_FWD_PROGRESS(x)
#define G_00B848_MEM_ORDERED(x)
#define G_00B848_IEEE_MODE(x)
#define G_00B848_DX10_CLAMP(x)
#define G_00B848_WGP_MODE(x)
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
unsigned unsigned DefaultVal
This file implements the SmallBitVector class.
StringSet - A set-like wrapper for the StringMap.
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
support::ulittle16_t & Lo
support::ulittle16_t & Hi
static const AMDGPUVariadicMCExpr * create(VariadicKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
Target independent representation for an assembler token.
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
bool is(TokenKind K) const
TokenKind getKind() const
This class represents an Operation in the Expression.
Base class for user error types.
Lightweight error class with error context and mandatory checking.
Tagged union holding either a T or a Error.
Class representing an expression and its matching format.
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
MCAsmParser & getParser()
Generic assembler parser interface, for use by target specific assembly parsers.
virtual MCStreamer & getStreamer()=0
Return the output streamer for the assembler.
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Context object for machine code objects.
const MCRegisterInfo * getRegisterInfo() const
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
const MCSubtargetInfo * getSubtargetInfo() const
Base class for the full range of assembler expressions which are needed for parsing.
Instances of this class represent a single low-level machine instruction.
unsigned getNumOperands() const
unsigned getOpcode() const
iterator insert(iterator I, const MCOperand &Op)
void addOperand(const MCOperand Op)
const MCOperand & getOperand(unsigned i) const
Describe properties that are true of each instruction in the target description file.
bool mayStore() const
Return true if this instruction could possibly modify memory.
Interface to description of machine instruction set.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Instances of this class represent operands of the MCInst class.
static MCOperand createReg(unsigned Reg)
static MCOperand createExpr(const MCExpr *Val)
void setReg(unsigned Reg)
Set the register number.
static MCOperand createImm(int64_t Val)
unsigned getReg() const
Returns the register number.
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
virtual bool isReg() const =0
isReg - Is this a register operand?
virtual bool isMem() const =0
isMem - Is this a memory operand?
virtual MCRegister getReg() const =0
virtual bool isToken() const =0
isToken - Is this a token operand?
virtual bool isImm() const =0
isImm - Is this an immediate operand?
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
unsigned getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
static constexpr unsigned NoRegister
Streaming machine code generation interface.
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
MCTargetStreamer * getTargetStreamer()
Generic base class for all target subtargets.
const FeatureBitset & getFeatureBits() const
FeatureBitset ToggleFeature(uint64_t FB)
Toggle a feature and return the re-computed feature bits.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
void setVariableValue(const MCExpr *Value)
MCTargetAsmParser - Generic interface to target specific assembly parsers.
MCSubtargetInfo & copySTI()
Create a copy of STI and return a non-const reference to it.
virtual bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
virtual bool ParseDirective(AsmToken DirectiveID)
ParseDirective - Parse a target specific assembler directive This method is deprecated,...
virtual ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
tryParseRegister - parse one register if possible
void setAvailableFeatures(const FeatureBitset &Value)
const MCSubtargetInfo & getSTI() const
virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind)
Allow a target to add special case operand matching for things that tblgen doesn't/can't handle effec...
virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands)=0
ParseInstruction - Parse one assembly instruction.
virtual unsigned checkTargetMatchPredicate(MCInst &Inst)
checkTargetMatchPredicate - Validate the instruction match against any complex target predicates not ...
virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm)=0
MatchAndEmitInstruction - Recognize a series of operands of a parsed instruction as an actual MCInst ...
Target specific streamer interface.
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
Wrapper class representing virtual and physical registers.
Represents a location in source code.
static SMLoc getFromPointer(const char *Ptr)
constexpr const char * getPointer() const
constexpr bool isValid() const
Represents a range in source code.
Implements a dense probed hash-table based set with some number of buckets stored inline.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringMapEntry - This is used to represent one value that is inserted into a StringMap.
StringRef - Represent a constant reference to a string, i.e.
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
empty - Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
constexpr size_t size() const
size - Get the string size.
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
StringSet - A wrapper for StringMap that provides set-like functionality.
bool contains(StringRef key) const
Check if the set contains the given key.
std::pair< typename Base::iterator, bool > insert(StringRef key)
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char NumVGPRs[]
Key for Kernel::CodeProps::Metadata::mNumVGPRs.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
const CustomOperand< const MCSubtargetInfo & > Opr[]
int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI)
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
int64_t getMsgOpId(int64_t MsgId, const StringRef Name)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
const CustomOperand< const MCSubtargetInfo & > Msg[]
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
constexpr unsigned COMPONENTS[]
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READNONE bool isLegalDPALU_DPPControl(unsigned DC)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
unsigned mc2PseudoReg(unsigned Reg)
Convert hardware register Reg to a pseudo register.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isDPALU_DPP(const MCInstrDesc &OpDesc)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
bool isGFX9(const MCSubtargetInfo &STI)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
bool hasMAIInsts(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool isSGPR(unsigned Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
bool isHi(unsigned Reg, const MCRegisterInfo &MRI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_V2INT32
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_INLINE_AC_V2FP16
@ OPERAND_REG_IMM_INT32
Operands with register or 32-bit immediate.
@ OPERAND_REG_IMM_BF16_DEFERRED
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_AC_BF16
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_INLINE_AC_INT16
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_V2INT16
@ OPERAND_REG_INLINE_AC_FP16
@ OPERAND_REG_INLINE_AC_INT32
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_INLINE_AC_V2BF16
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_REG_INLINE_C_V2FP32
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
@ OPERAND_REG_IMM_FP32_DEFERRED
@ OPERAND_REG_IMM_FP16_DEFERRED
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isVI(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ UNDEF
UNDEF - An undefined node.
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Reg
All possible values of the reg field in the ModR/M byte.
std::optional< const char * > toString(const std::optional< DWARFFormValue > &V)
Take an optional DWARFFormValue and try to extract a string value from it.
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
This is an optimization pass for GlobalISel generic memory operations.
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
int popcount(T Value) noexcept
Count the number of set bits in a value.
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
void PrintError(const Twine &Msg)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Target & getTheR600Target()
The target for R600 GPUs.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Target & getTheGCNTarget()
The target for GCN GPUs.
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool parseAmdKernelCodeField(StringRef ID, MCAsmParser &Parser, amd_kernel_code_t &C, raw_ostream &Err)
AMD Kernel Code Object (amd_kernel_code_t).
Instruction set architecture version.
const MCExpr * compute_pgm_rsrc2
const MCExpr * kernarg_size
const MCExpr * kernarg_preload
const MCExpr * compute_pgm_rsrc3
const MCExpr * private_segment_fixed_size
const MCExpr * compute_pgm_rsrc1
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
const MCExpr * group_segment_fixed_size
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
const MCExpr * kernel_code_properties
Represents the counter values to wait for in an s_waitcnt instruction.
static const fltSemantics & IEEEsingle() LLVM_READNONE
static const fltSemantics & IEEEhalf() LLVM_READNONE
static const fltSemantics & BFloat() LLVM_READNONE
opStatus
IEEE-754R 7: Default exception handling.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Description of the encoding of one expression Op.
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...
uint32_t group_segment_fixed_size
uint32_t private_segment_fixed_size