57enum RegisterKind { IS_UNKNOWN,
IS_VGPR, IS_SGPR,
IS_AGPR, IS_TTMP, IS_SPECIAL };
71 SMLoc StartLoc, EndLoc;
72 const AMDGPUAsmParser *AsmParser;
75 AMDGPUOperand(KindTy Kind_,
const AMDGPUAsmParser *AsmParser_)
76 : Kind(Kind_), AsmParser(AsmParser_) {}
78 using Ptr = std::unique_ptr<AMDGPUOperand>;
86 bool hasFPModifiers()
const {
return Abs || Neg; }
87 bool hasIntModifiers()
const {
return Sext; }
88 bool hasModifiers()
const {
return hasFPModifiers() || hasIntModifiers(); }
89 bool isForcedLit()
const {
return Lit == LitModifier::Lit; }
90 bool isForcedLit64()
const {
return Lit == LitModifier::Lit64; }
92 int64_t getFPModifiersOperand()
const {
99 int64_t getIntModifiersOperand()
const {
105 int64_t getModifiersOperand()
const {
106 assert(!(hasFPModifiers() && hasIntModifiers())
107 &&
"fp and int modifiers should not be used simultaneously");
108 if (hasFPModifiers())
109 return getFPModifiersOperand();
110 if (hasIntModifiers())
111 return getIntModifiersOperand();
115 friend raw_ostream &
operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
189 ImmTyMatrixAScaleFmt,
190 ImmTyMatrixBScaleFmt,
223 mutable int MCOpIdx = -1;
226 bool isToken()
const override {
return Kind == Token; }
228 bool isSymbolRefExpr()
const {
232 bool isImm()
const override {
233 return Kind == Immediate;
236 bool isInlinableImm(MVT type)
const;
237 bool isLiteralImm(MVT type)
const;
239 bool isRegKind()
const {
240 return Kind == Register;
243 bool isReg()
const override {
244 return isRegKind() && !hasModifiers();
247 bool isRegOrInline(
unsigned RCID, MVT type)
const {
248 return isRegClass(RCID) || isInlinableImm(type);
252 return isRegOrInline(RCID, type) || isLiteralImm(type);
255 bool isRegOrImmWithInt16InputMods()
const {
259 template <
bool IsFake16>
bool isRegOrImmWithIntT16InputMods()
const {
261 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
264 bool isRegOrImmWithInt32InputMods()
const {
268 bool isRegOrInlineImmWithInt16InputMods()
const {
269 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
272 template <
bool IsFake16>
bool isRegOrInlineImmWithIntT16InputMods()
const {
273 return isRegOrInline(
274 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
277 bool isRegOrInlineImmWithInt32InputMods()
const {
278 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
281 bool isRegOrImmWithInt64InputMods()
const {
285 bool isRegOrImmWithFP16InputMods()
const {
289 template <
bool IsFake16>
bool isRegOrImmWithFPT16InputMods()
const {
291 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
294 bool isRegOrImmWithFP32InputMods()
const {
298 bool isRegOrImmWithFP64InputMods()
const {
302 template <
bool IsFake16>
bool isRegOrInlineImmWithFP16InputMods()
const {
303 return isRegOrInline(
304 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
307 bool isRegOrInlineImmWithFP32InputMods()
const {
308 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
311 bool isRegOrInlineImmWithFP64InputMods()
const {
312 return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64);
315 bool isVRegWithInputMods(
unsigned RCID)
const {
return isRegClass(RCID); }
317 bool isVRegWithFP32InputMods()
const {
318 return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID);
321 bool isVRegWithFP64InputMods()
const {
322 return isVRegWithInputMods(AMDGPU::VReg_64RegClassID);
325 bool isPackedFP16InputMods()
const {
329 bool isPackedVGPRFP32InputMods()
const {
333 bool isVReg()
const {
334 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
335 isRegClass(AMDGPU::VReg_64RegClassID) ||
336 isRegClass(AMDGPU::VReg_96RegClassID) ||
337 isRegClass(AMDGPU::VReg_128RegClassID) ||
338 isRegClass(AMDGPU::VReg_160RegClassID) ||
339 isRegClass(AMDGPU::VReg_192RegClassID) ||
340 isRegClass(AMDGPU::VReg_256RegClassID) ||
341 isRegClass(AMDGPU::VReg_512RegClassID) ||
342 isRegClass(AMDGPU::VReg_1024RegClassID);
345 bool isVReg32()
const {
346 return isRegClass(AMDGPU::VGPR_32RegClassID);
349 bool isVReg32OrOff()
const {
350 return isOff() || isVReg32();
354 return isRegKind() &&
getReg() == AMDGPU::SGPR_NULL;
357 bool isAV_LdSt_32_Align2_RegOp()
const {
358 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
359 isRegClass(AMDGPU::AGPR_32RegClassID);
362 bool isVRegWithInputMods()
const;
363 template <
bool IsFake16>
bool isT16_Lo128VRegWithInputMods()
const;
364 template <
bool IsFake16>
bool isT16VRegWithInputMods()
const;
366 bool isSDWAOperand(MVT type)
const;
367 bool isSDWAFP16Operand()
const;
368 bool isSDWAFP32Operand()
const;
369 bool isSDWAInt16Operand()
const;
370 bool isSDWAInt32Operand()
const;
372 bool isImmTy(ImmTy ImmT)
const {
373 return isImm() &&
Imm.Type == ImmT;
376 template <ImmTy Ty>
bool isImmTy()
const {
return isImmTy(Ty); }
378 bool isImmLiteral()
const {
return isImmTy(ImmTyNone); }
380 bool isImmModifier()
const {
381 return isImm() &&
Imm.Type != ImmTyNone;
384 bool isOModSI()
const {
return isImmTy(ImmTyOModSI); }
385 bool isDim()
const {
return isImmTy(ImmTyDim); }
386 bool isR128A16()
const {
return isImmTy(ImmTyR128A16); }
387 bool isOff()
const {
return isImmTy(ImmTyOff); }
388 bool isExpTgt()
const {
return isImmTy(ImmTyExpTgt); }
389 bool isOffen()
const {
return isImmTy(ImmTyOffen); }
390 bool isIdxen()
const {
return isImmTy(ImmTyIdxen); }
391 bool isAddr64()
const {
return isImmTy(ImmTyAddr64); }
392 bool isSMEMOffsetMod()
const {
return isImmTy(ImmTySMEMOffsetMod); }
393 bool isFlatOffset()
const {
return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
394 bool isGDS()
const {
return isImmTy(ImmTyGDS); }
395 bool isLDS()
const {
return isImmTy(ImmTyLDS); }
396 bool isCPol()
const {
return isImmTy(ImmTyCPol); }
397 bool isIndexKey8bit()
const {
return isImmTy(ImmTyIndexKey8bit); }
398 bool isIndexKey16bit()
const {
return isImmTy(ImmTyIndexKey16bit); }
399 bool isIndexKey32bit()
const {
return isImmTy(ImmTyIndexKey32bit); }
400 bool isMatrixAFMT()
const {
return isImmTy(ImmTyMatrixAFMT); }
401 bool isMatrixBFMT()
const {
return isImmTy(ImmTyMatrixBFMT); }
402 bool isMatrixAScale()
const {
return isImmTy(ImmTyMatrixAScale); }
403 bool isMatrixBScale()
const {
return isImmTy(ImmTyMatrixBScale); }
404 bool isMatrixAScaleFmt()
const {
return isImmTy(ImmTyMatrixAScaleFmt); }
405 bool isMatrixBScaleFmt()
const {
return isImmTy(ImmTyMatrixBScaleFmt); }
406 bool isMatrixAReuse()
const {
return isImmTy(ImmTyMatrixAReuse); }
407 bool isMatrixBReuse()
const {
return isImmTy(ImmTyMatrixBReuse); }
408 bool isTFE()
const {
return isImmTy(ImmTyTFE); }
409 bool isFORMAT()
const {
return isImmTy(ImmTyFORMAT) &&
isUInt<7>(
getImm()); }
410 bool isDppFI()
const {
return isImmTy(ImmTyDppFI); }
411 bool isSDWADstSel()
const {
return isImmTy(ImmTySDWADstSel); }
412 bool isSDWASrc0Sel()
const {
return isImmTy(ImmTySDWASrc0Sel); }
413 bool isSDWASrc1Sel()
const {
return isImmTy(ImmTySDWASrc1Sel); }
414 bool isSDWADstUnused()
const {
return isImmTy(ImmTySDWADstUnused); }
415 bool isInterpSlot()
const {
return isImmTy(ImmTyInterpSlot); }
416 bool isInterpAttr()
const {
return isImmTy(ImmTyInterpAttr); }
417 bool isInterpAttrChan()
const {
return isImmTy(ImmTyInterpAttrChan); }
418 bool isOpSel()
const {
return isImmTy(ImmTyOpSel); }
419 bool isOpSelHi()
const {
return isImmTy(ImmTyOpSelHi); }
420 bool isNegLo()
const {
return isImmTy(ImmTyNegLo); }
421 bool isNegHi()
const {
return isImmTy(ImmTyNegHi); }
422 bool isBitOp3()
const {
return isImmTy(ImmTyBitOp3) &&
isUInt<8>(
getImm()); }
423 bool isDone()
const {
return isImmTy(ImmTyDone); }
424 bool isRowEn()
const {
return isImmTy(ImmTyRowEn); }
426 bool isRegOrImm()
const {
427 return isReg() || isImm();
430 bool isRegClass(
unsigned RCID)
const;
434 bool isRegOrInlineNoMods(
unsigned RCID, MVT type)
const {
435 return isRegOrInline(RCID, type) && !hasModifiers();
438 bool isSCSrcB16()
const {
439 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
442 bool isSCSrcV2B16()
const {
446 bool isSCSrc_b32()
const {
447 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
450 bool isSCSrc_b64()
const {
451 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
454 bool isBoolReg()
const;
456 bool isSCSrcF16()
const {
457 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
460 bool isSCSrcV2F16()
const {
464 bool isSCSrcF32()
const {
465 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
468 bool isSCSrcF64()
const {
469 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
472 bool isSSrc_b32()
const {
473 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
476 bool isSSrc_b16()
const {
return isSCSrcB16() || isLiteralImm(MVT::i16); }
478 bool isSSrcV2B16()
const {
483 bool isSSrc_b64()
const {
486 return isSCSrc_b64() || isLiteralImm(MVT::i64) ||
487 (((
const MCTargetAsmParser *)AsmParser)
488 ->getAvailableFeatures()[AMDGPU::Feature64BitLiterals] &&
492 bool isSSrc_f32()
const {
493 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
496 bool isSSrcF64()
const {
return isSCSrc_b64() || isLiteralImm(MVT::f64); }
498 bool isSSrc_bf16()
const {
return isSCSrcB16() || isLiteralImm(MVT::bf16); }
500 bool isSSrc_f16()
const {
return isSCSrcB16() || isLiteralImm(MVT::f16); }
502 bool isSSrcV2F16()
const {
507 bool isSSrcV2FP32()
const {
512 bool isSCSrcV2FP32()
const {
517 bool isSSrcV2INT32()
const {
522 bool isSCSrcV2INT32()
const {
524 return isSCSrc_b32();
527 bool isSSrcOrLds_b32()
const {
528 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
529 isLiteralImm(MVT::i32) || isExpr();
532 bool isVCSrc_b32()
const {
533 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
536 bool isVCSrc_b32_Lo256()
const {
537 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo256RegClassID, MVT::i32);
540 bool isVCSrc_b64_Lo256()
const {
541 return isRegOrInlineNoMods(AMDGPU::VS_64_Lo256RegClassID, MVT::i64);
544 bool isVCSrc_b64()
const {
545 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
548 bool isVCSrcT_b16()
const {
549 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
552 bool isVCSrcTB16_Lo128()
const {
553 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
556 bool isVCSrcFake16B16_Lo128()
const {
557 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
560 bool isVCSrc_b16()
const {
561 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
564 bool isVCSrc_v2b16()
const {
return isVCSrc_b16(); }
566 bool isVCSrc_f32()
const {
567 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
570 bool isVCSrc_f64()
const {
571 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
574 bool isVCSrcTBF16()
const {
575 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
578 bool isVCSrcT_f16()
const {
579 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
582 bool isVCSrcT_bf16()
const {
583 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
586 bool isVCSrcTBF16_Lo128()
const {
587 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
590 bool isVCSrcTF16_Lo128()
const {
591 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
594 bool isVCSrcFake16BF16_Lo128()
const {
595 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
598 bool isVCSrcFake16F16_Lo128()
const {
599 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
602 bool isVCSrc_bf16()
const {
603 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
606 bool isVCSrc_f16()
const {
607 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
610 bool isVCSrc_v2bf16()
const {
return isVCSrc_bf16(); }
612 bool isVCSrc_v2f16()
const {
return isVCSrc_f16(); }
614 bool isVSrc_b32()
const {
615 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
618 bool isVSrc_b64()
const {
return isVCSrc_f64() || isLiteralImm(MVT::i64); }
620 bool isVSrcT_b16()
const {
return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
622 bool isVSrcT_b16_Lo128()
const {
623 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
626 bool isVSrcFake16_b16_Lo128()
const {
627 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
630 bool isVSrc_b16()
const {
return isVCSrc_b16() || isLiteralImm(MVT::i16); }
632 bool isVSrc_v2b16()
const {
return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
634 bool isVCSrcV2FP32()
const {
return isVCSrc_f64(); }
636 bool isVSrc_v2f32()
const {
return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
638 bool isVCSrc_v2b32()
const {
return isVCSrc_b64(); }
640 bool isVSrc_v2b32()
const {
return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
642 bool isVSrc_f32()
const {
643 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
646 bool isVSrc_f64()
const {
return isVCSrc_f64() || isLiteralImm(MVT::f64); }
648 bool isVSrcT_bf16()
const {
return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
650 bool isVSrcT_f16()
const {
return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
652 bool isVSrcT_bf16_Lo128()
const {
653 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
656 bool isVSrcT_f16_Lo128()
const {
657 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
660 bool isVSrcFake16_bf16_Lo128()
const {
661 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
664 bool isVSrcFake16_f16_Lo128()
const {
665 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
668 bool isVSrc_bf16()
const {
return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
670 bool isVSrc_f16()
const {
return isVCSrc_f16() || isLiteralImm(MVT::f16); }
672 bool isVSrc_v2bf16()
const {
673 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
676 bool isVSrc_v2f16()
const {
return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
678 bool isVSrc_v2f16_splat()
const {
return isVSrc_v2f16(); }
680 bool isVSrc_NoInline_v2f16()
const {
return isVSrc_v2f16(); }
682 bool isVISrcB32()
const {
683 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
686 bool isVISrcB16()
const {
687 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
690 bool isVISrcV2B16()
const {
694 bool isVISrcF32()
const {
695 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
698 bool isVISrcF16()
const {
699 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
702 bool isVISrcV2F16()
const {
703 return isVISrcF16() || isVISrcB32();
706 bool isVISrc_64_bf16()
const {
707 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
710 bool isVISrc_64_f16()
const {
711 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
714 bool isVISrc_64_b32()
const {
715 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
718 bool isVISrc_64B64()
const {
719 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
722 bool isVISrc_64_f64()
const {
723 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
726 bool isVISrc_64V2FP32()
const {
727 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
730 bool isVISrc_64V2INT32()
const {
731 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
734 bool isVISrc_256_b32()
const {
735 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
738 bool isVISrc_256_f32()
const {
739 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
742 bool isVISrc_256B64()
const {
743 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
746 bool isVISrc_256_f64()
const {
747 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
750 bool isVISrc_512_f64()
const {
751 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f64);
754 bool isVISrc_128B16()
const {
755 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
758 bool isVISrc_128V2B16()
const {
759 return isVISrc_128B16();
762 bool isVISrc_128_b32()
const {
763 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
766 bool isVISrc_128_f32()
const {
767 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
770 bool isVISrc_256V2FP32()
const {
771 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
774 bool isVISrc_256V2INT32()
const {
775 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
778 bool isVISrc_512_b32()
const {
779 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
782 bool isVISrc_512B16()
const {
783 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
786 bool isVISrc_512V2B16()
const {
787 return isVISrc_512B16();
790 bool isVISrc_512_f32()
const {
791 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
794 bool isVISrc_512F16()
const {
795 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
798 bool isVISrc_512V2F16()
const {
799 return isVISrc_512F16() || isVISrc_512_b32();
802 bool isVISrc_1024_b32()
const {
803 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
806 bool isVISrc_1024B16()
const {
807 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
810 bool isVISrc_1024V2B16()
const {
811 return isVISrc_1024B16();
814 bool isVISrc_1024_f32()
const {
815 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
818 bool isVISrc_1024F16()
const {
819 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
822 bool isVISrc_1024V2F16()
const {
823 return isVISrc_1024F16() || isVISrc_1024_b32();
826 bool isAISrcB32()
const {
827 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
830 bool isAISrcB16()
const {
831 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
834 bool isAISrcV2B16()
const {
838 bool isAISrcF32()
const {
839 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
842 bool isAISrcF16()
const {
843 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
846 bool isAISrcV2F16()
const {
847 return isAISrcF16() || isAISrcB32();
850 bool isAISrc_64B64()
const {
851 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
854 bool isAISrc_64_f64()
const {
855 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
858 bool isAISrc_128_b32()
const {
859 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
862 bool isAISrc_128B16()
const {
863 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
866 bool isAISrc_128V2B16()
const {
867 return isAISrc_128B16();
870 bool isAISrc_128_f32()
const {
871 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
874 bool isAISrc_128F16()
const {
875 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
878 bool isAISrc_128V2F16()
const {
879 return isAISrc_128F16() || isAISrc_128_b32();
882 bool isVISrc_128_bf16()
const {
883 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
886 bool isVISrc_128_f16()
const {
887 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
890 bool isVISrc_128V2F16()
const {
891 return isVISrc_128_f16() || isVISrc_128_b32();
894 bool isAISrc_256B64()
const {
895 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
898 bool isAISrc_256_f64()
const {
899 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
902 bool isAISrc_512_b32()
const {
903 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
906 bool isAISrc_512B16()
const {
907 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
910 bool isAISrc_512V2B16()
const {
911 return isAISrc_512B16();
914 bool isAISrc_512_f32()
const {
915 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
918 bool isAISrc_512F16()
const {
919 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
922 bool isAISrc_512V2F16()
const {
923 return isAISrc_512F16() || isAISrc_512_b32();
926 bool isAISrc_1024_b32()
const {
927 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
930 bool isAISrc_1024B16()
const {
931 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
934 bool isAISrc_1024V2B16()
const {
935 return isAISrc_1024B16();
938 bool isAISrc_1024_f32()
const {
939 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
942 bool isAISrc_1024F16()
const {
943 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
946 bool isAISrc_1024V2F16()
const {
947 return isAISrc_1024F16() || isAISrc_1024_b32();
950 bool isKImmFP32()
const {
951 return isLiteralImm(MVT::f32);
954 bool isKImmFP16()
const {
955 return isLiteralImm(MVT::f16);
958 bool isKImmFP64()
const {
return isLiteralImm(MVT::f64); }
960 bool isMem()
const override {
964 bool isExpr()
const {
965 return Kind == Expression;
968 bool isSOPPBrTarget()
const {
return isExpr() || isImm(); }
970 bool isSWaitCnt()
const;
971 bool isDepCtr()
const;
972 bool isSDelayALU()
const;
973 bool isHwreg()
const;
974 bool isSendMsg()
const;
975 bool isWaitEvent()
const;
976 bool isSplitBarrier()
const;
977 bool isSwizzle()
const;
978 bool isSMRDOffset8()
const;
979 bool isSMEMOffset()
const;
980 bool isSMRDLiteralOffset()
const;
982 bool isDPPCtrl()
const;
984 bool isGPRIdxMode()
const;
985 bool isS16Imm()
const;
986 bool isU16Imm()
const;
987 bool isEndpgm()
const;
989 auto getPredicate(std::function<
bool(
const AMDGPUOperand &
Op)>
P)
const {
990 return [
this,
P]() {
return P(*
this); };
995 return StringRef(Tok.Data, Tok.Length);
1003 void setImm(int64_t Val) {
1008 ImmTy getImmTy()
const {
1013 MCRegister
getReg()
const override {
1018 SMLoc getStartLoc()
const override {
1022 SMLoc getEndLoc()
const override {
1026 SMRange getLocRange()
const {
1027 return SMRange(StartLoc, EndLoc);
1030 int getMCOpIdx()
const {
return MCOpIdx; }
1032 Modifiers getModifiers()
const {
1033 assert(isRegKind() || isImmTy(ImmTyNone));
1034 return isRegKind() ?
Reg.Mods :
Imm.Mods;
1037 void setModifiers(Modifiers Mods) {
1038 assert(isRegKind() || isImmTy(ImmTyNone));
1045 bool hasModifiers()
const {
1046 return getModifiers().hasModifiers();
1049 bool hasFPModifiers()
const {
1050 return getModifiers().hasFPModifiers();
1053 bool hasIntModifiers()
const {
1054 return getModifiers().hasIntModifiers();
1057 bool isForcedLit()
const {
1058 return isImmLiteral() && getModifiers().isForcedLit();
1061 bool isForcedLit64()
const {
1062 return isImmLiteral() && getModifiers().isForcedLit64();
1065 uint64_t applyInputFPModifiers(uint64_t Val,
unsigned Size)
const;
1067 void addImmOperands(MCInst &Inst,
unsigned N,
bool ApplyModifiers =
true)
const;
1069 void addLiteralImmOperand(MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const;
1071 void addRegOperands(MCInst &Inst,
unsigned N)
const;
1073 void addRegOrImmOperands(MCInst &Inst,
unsigned N)
const {
1075 addRegOperands(Inst,
N);
1077 addImmOperands(Inst,
N);
1080 void addRegOrImmWithInputModsOperands(MCInst &Inst,
unsigned N)
const {
1081 Modifiers Mods = getModifiers();
1084 addRegOperands(Inst,
N);
1086 addImmOperands(Inst,
N,
false);
1090 void addRegOrImmWithFPInputModsOperands(MCInst &Inst,
unsigned N)
const {
1091 assert(!hasIntModifiers());
1092 addRegOrImmWithInputModsOperands(Inst,
N);
1095 void addRegOrImmWithIntInputModsOperands(MCInst &Inst,
unsigned N)
const {
1096 assert(!hasFPModifiers());
1097 addRegOrImmWithInputModsOperands(Inst,
N);
1100 void addRegWithInputModsOperands(MCInst &Inst,
unsigned N)
const {
1101 Modifiers Mods = getModifiers();
1104 addRegOperands(Inst,
N);
1107 void addRegWithFPInputModsOperands(MCInst &Inst,
unsigned N)
const {
1108 assert(!hasIntModifiers());
1109 addRegWithInputModsOperands(Inst,
N);
1112 void addRegWithIntInputModsOperands(MCInst &Inst,
unsigned N)
const {
1113 assert(!hasFPModifiers());
1114 addRegWithInputModsOperands(Inst,
N);
1117 static void printImmTy(raw_ostream& OS, ImmTy
Type) {
1120 case ImmTyNone: OS <<
"None";
break;
1121 case ImmTyGDS: OS <<
"GDS";
break;
1122 case ImmTyLDS: OS <<
"LDS";
break;
1123 case ImmTyOffen: OS <<
"Offen";
break;
1124 case ImmTyIdxen: OS <<
"Idxen";
break;
1125 case ImmTyAddr64: OS <<
"Addr64";
break;
1126 case ImmTyOffset: OS <<
"Offset";
break;
1127 case ImmTyInstOffset: OS <<
"InstOffset";
break;
1128 case ImmTyOffset0: OS <<
"Offset0";
break;
1129 case ImmTyOffset1: OS <<
"Offset1";
break;
1130 case ImmTySMEMOffsetMod: OS <<
"SMEMOffsetMod";
break;
1131 case ImmTyCPol: OS <<
"CPol";
break;
1132 case ImmTyIndexKey8bit: OS <<
"index_key";
break;
1133 case ImmTyIndexKey16bit: OS <<
"index_key";
break;
1134 case ImmTyIndexKey32bit: OS <<
"index_key";
break;
1135 case ImmTyTFE: OS <<
"TFE";
break;
1136 case ImmTyIsAsync: OS <<
"IsAsync";
break;
1137 case ImmTyD16: OS <<
"D16";
break;
1138 case ImmTyFORMAT: OS <<
"FORMAT";
break;
1139 case ImmTyClamp: OS <<
"Clamp";
break;
1140 case ImmTyOModSI: OS <<
"OModSI";
break;
1141 case ImmTyDPP8: OS <<
"DPP8";
break;
1142 case ImmTyDppCtrl: OS <<
"DppCtrl";
break;
1143 case ImmTyDppRowMask: OS <<
"DppRowMask";
break;
1144 case ImmTyDppBankMask: OS <<
"DppBankMask";
break;
1145 case ImmTyDppBoundCtrl: OS <<
"DppBoundCtrl";
break;
1146 case ImmTyDppFI: OS <<
"DppFI";
break;
1147 case ImmTySDWADstSel: OS <<
"SDWADstSel";
break;
1148 case ImmTySDWASrc0Sel: OS <<
"SDWASrc0Sel";
break;
1149 case ImmTySDWASrc1Sel: OS <<
"SDWASrc1Sel";
break;
1150 case ImmTySDWADstUnused: OS <<
"SDWADstUnused";
break;
1151 case ImmTyDMask: OS <<
"DMask";
break;
1152 case ImmTyDim: OS <<
"Dim";
break;
1153 case ImmTyUNorm: OS <<
"UNorm";
break;
1154 case ImmTyDA: OS <<
"DA";
break;
1155 case ImmTyR128A16: OS <<
"R128A16";
break;
1156 case ImmTyA16: OS <<
"A16";
break;
1157 case ImmTyLWE: OS <<
"LWE";
break;
1158 case ImmTyOff: OS <<
"Off";
break;
1159 case ImmTyExpTgt: OS <<
"ExpTgt";
break;
1160 case ImmTyExpCompr: OS <<
"ExpCompr";
break;
1161 case ImmTyExpVM: OS <<
"ExpVM";
break;
1162 case ImmTyDone: OS <<
"Done";
break;
1163 case ImmTyRowEn: OS <<
"RowEn";
break;
1164 case ImmTyHwreg: OS <<
"Hwreg";
break;
1165 case ImmTySendMsg: OS <<
"SendMsg";
break;
1166 case ImmTyWaitEvent: OS <<
"WaitEvent";
break;
1167 case ImmTyInterpSlot: OS <<
"InterpSlot";
break;
1168 case ImmTyInterpAttr: OS <<
"InterpAttr";
break;
1169 case ImmTyInterpAttrChan: OS <<
"InterpAttrChan";
break;
1170 case ImmTyOpSel: OS <<
"OpSel";
break;
1171 case ImmTyOpSelHi: OS <<
"OpSelHi";
break;
1172 case ImmTyNegLo: OS <<
"NegLo";
break;
1173 case ImmTyNegHi: OS <<
"NegHi";
break;
1174 case ImmTySwizzle: OS <<
"Swizzle";
break;
1175 case ImmTyGprIdxMode: OS <<
"GprIdxMode";
break;
1176 case ImmTyHigh: OS <<
"High";
break;
1177 case ImmTyBLGP: OS <<
"BLGP";
break;
1178 case ImmTyCBSZ: OS <<
"CBSZ";
break;
1179 case ImmTyABID: OS <<
"ABID";
break;
1180 case ImmTyEndpgm: OS <<
"Endpgm";
break;
1181 case ImmTyWaitVDST: OS <<
"WaitVDST";
break;
1182 case ImmTyWaitEXP: OS <<
"WaitEXP";
break;
1183 case ImmTyWaitVAVDst: OS <<
"WaitVAVDst";
break;
1184 case ImmTyWaitVMVSrc: OS <<
"WaitVMVSrc";
break;
1185 case ImmTyBitOp3: OS <<
"BitOp3";
break;
1186 case ImmTyMatrixAFMT: OS <<
"ImmTyMatrixAFMT";
break;
1187 case ImmTyMatrixBFMT: OS <<
"ImmTyMatrixBFMT";
break;
1188 case ImmTyMatrixAScale: OS <<
"ImmTyMatrixAScale";
break;
1189 case ImmTyMatrixBScale: OS <<
"ImmTyMatrixBScale";
break;
1190 case ImmTyMatrixAScaleFmt: OS <<
"ImmTyMatrixAScaleFmt";
break;
1191 case ImmTyMatrixBScaleFmt: OS <<
"ImmTyMatrixBScaleFmt";
break;
1192 case ImmTyMatrixAReuse: OS <<
"ImmTyMatrixAReuse";
break;
1193 case ImmTyMatrixBReuse: OS <<
"ImmTyMatrixBReuse";
break;
1194 case ImmTyScaleSel: OS <<
"ScaleSel" ;
break;
1195 case ImmTyByteSel: OS <<
"ByteSel" ;
break;
1200 void print(raw_ostream &OS,
const MCAsmInfo &MAI)
const override {
1204 <<
" mods: " <<
Reg.Mods <<
'>';
1208 if (getImmTy() != ImmTyNone) {
1209 OS <<
" type: "; printImmTy(OS, getImmTy());
1211 OS <<
" mods: " <<
Imm.Mods <<
'>';
1224 static AMDGPUOperand::Ptr CreateImm(
const AMDGPUAsmParser *AsmParser,
1225 int64_t Val, SMLoc Loc,
1226 ImmTy
Type = ImmTyNone,
1227 bool IsFPImm =
false) {
1228 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1230 Op->Imm.IsFPImm = IsFPImm;
1232 Op->Imm.Mods = Modifiers();
1238 static AMDGPUOperand::Ptr CreateToken(
const AMDGPUAsmParser *AsmParser,
1239 StringRef Str, SMLoc Loc,
1240 bool HasExplicitEncodingSize =
true) {
1241 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1242 Res->Tok.Data = Str.data();
1243 Res->Tok.Length = Str.size();
1244 Res->StartLoc = Loc;
1249 static AMDGPUOperand::Ptr CreateReg(
const AMDGPUAsmParser *AsmParser,
1250 MCRegister
Reg, SMLoc S, SMLoc
E) {
1251 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1252 Op->Reg.RegNo =
Reg;
1253 Op->Reg.Mods = Modifiers();
1259 static AMDGPUOperand::Ptr CreateExpr(
const AMDGPUAsmParser *AsmParser,
1260 const class MCExpr *Expr, SMLoc S) {
1261 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1270 OS <<
"abs:" << Mods.Abs <<
" neg: " << Mods.Neg <<
" sext:" << Mods.Sext;
1279#define GET_REGISTER_MATCHER
1280#include "AMDGPUGenAsmMatcher.inc"
1281#undef GET_REGISTER_MATCHER
1282#undef GET_SUBTARGET_FEATURE_NAME
1287class KernelScopeInfo {
1288 int SgprIndexUnusedMin = -1;
1289 int VgprIndexUnusedMin = -1;
1290 int AgprIndexUnusedMin = -1;
1294 void usesSgprAt(
int i) {
1295 if (i >= SgprIndexUnusedMin) {
1296 SgprIndexUnusedMin = ++i;
1299 Ctx->getOrCreateSymbol(
Twine(
".kernel.sgpr_count"));
1305 void usesVgprAt(
int i) {
1306 if (i >= VgprIndexUnusedMin) {
1307 VgprIndexUnusedMin = ++i;
1310 Ctx->getOrCreateSymbol(
Twine(
".kernel.vgpr_count"));
1312 VgprIndexUnusedMin);
1318 void usesAgprAt(
int i) {
1323 if (i >= AgprIndexUnusedMin) {
1324 AgprIndexUnusedMin = ++i;
1327 Ctx->getOrCreateSymbol(
Twine(
".kernel.agpr_count"));
1332 Ctx->getOrCreateSymbol(
Twine(
".kernel.vgpr_count"));
1334 VgprIndexUnusedMin);
1341 KernelScopeInfo() =
default;
1345 MSTI = Ctx->getSubtargetInfo();
1347 usesSgprAt(SgprIndexUnusedMin = -1);
1348 usesVgprAt(VgprIndexUnusedMin = -1);
1350 usesAgprAt(AgprIndexUnusedMin = -1);
1354 void usesRegister(RegisterKind RegKind,
unsigned DwordRegIndex,
1355 unsigned RegWidth) {
1358 usesSgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1361 usesAgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1364 usesVgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1373 MCAsmParser &Parser;
1375 unsigned ForcedEncodingSize = 0;
1376 bool ForcedDPP =
false;
1377 bool ForcedSDWA =
false;
1378 KernelScopeInfo KernelScope;
1379 const unsigned HwMode;
1384#define GET_ASSEMBLER_HEADER
1385#include "AMDGPUGenAsmMatcher.inc"
1390 unsigned getRegOperandSize(
const MCInstrDesc &
Desc,
unsigned OpNo)
const {
1392 int16_t RCID = MII.getOpRegClassID(
Desc.operands()[OpNo], HwMode);
1396 std::optional<AMDGPU::InfoSectionData> InfoData;
1399 void createConstantSymbol(StringRef Id, int64_t Val);
1401 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1402 bool OutOfRangeError(SMRange
Range);
1418 bool calculateGPRBlocks(
const FeatureBitset &Features,
const MCExpr *VCCUsed,
1419 const MCExpr *FlatScrUsed,
bool XNACKUsed,
1420 std::optional<bool> EnableWavefrontSize32,
1421 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1422 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1423 const MCExpr *&VGPRBlocks,
const MCExpr *&SGPRBlocks);
1424 bool ParseDirectiveAMDGCNTarget();
1425 bool ParseDirectiveAMDHSACodeObjectVersion();
1426 bool ParseDirectiveAMDHSAKernel();
1427 bool ParseAMDKernelCodeTValue(StringRef
ID, AMDGPUMCKernelCodeT &Header);
1428 bool ParseDirectiveAMDKernelCodeT();
1430 bool subtargetHasRegister(
const MCRegisterInfo &MRI, MCRegister
Reg);
1431 bool ParseDirectiveAMDGPUHsaKernel();
1433 bool ParseDirectiveISAVersion();
1434 bool ParseDirectiveHSAMetadata();
1435 bool ParseDirectivePALMetadataBegin();
1436 bool ParseDirectivePALMetadata();
1437 bool ParseDirectiveAMDGPULDS();
1438 bool ParseDirectiveAMDGPUInfo();
1442 bool ParseToEndDirective(
const char *AssemblerDirectiveBegin,
1443 const char *AssemblerDirectiveEnd,
1444 std::string &CollectString);
1446 bool AddNextRegisterToList(MCRegister &
Reg,
unsigned &RegWidth,
1447 RegisterKind RegKind, MCRegister Reg1,
1448 RegisterKind RegKind1, SMLoc Loc);
1449 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &
Reg,
1450 unsigned &RegNum,
unsigned &RegWidth,
1451 bool RestoreOnFailure =
false);
1452 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &
Reg,
1453 unsigned &RegNum,
unsigned &RegWidth,
1454 SmallVectorImpl<AsmToken> &Tokens);
1455 MCRegister ParseRegularReg(RegisterKind &RegKind,
unsigned &RegNum,
1457 SmallVectorImpl<AsmToken> &Tokens);
1458 MCRegister ParseSpecialReg(RegisterKind &RegKind,
unsigned &RegNum,
1460 SmallVectorImpl<AsmToken> &Tokens);
1461 MCRegister ParseRegList(RegisterKind &RegKind,
unsigned &RegNum,
1463 SmallVectorImpl<AsmToken> &Tokens);
1464 bool ParseRegRange(
unsigned &Num,
unsigned &Width,
unsigned &SubReg);
1465 MCRegister getRegularReg(RegisterKind RegKind,
unsigned RegNum,
1466 unsigned SubReg,
unsigned RegWidth, SMLoc Loc);
1469 bool isRegister(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1470 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1471 void initializeGprCountSymbol(RegisterKind RegKind);
1472 bool updateGprCountSymbols(RegisterKind RegKind,
unsigned DwordRegIndex,
1474 void cvtMubufImpl(MCInst &Inst,
const OperandVector &Operands,
1479 OperandMode_Default,
1483 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1485 AMDGPUAsmParser(
const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1486 const MCInstrInfo &MII)
1487 : MCTargetAsmParser(STI, MII), Parser(_Parser),
1488 HwMode(STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo)) {
1491 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1495 createConstantSymbol(
".amdgcn.gfx_generation_number",
ISA.Major);
1496 createConstantSymbol(
".amdgcn.gfx_generation_minor",
ISA.Minor);
1497 createConstantSymbol(
".amdgcn.gfx_generation_stepping",
ISA.Stepping);
1499 createConstantSymbol(
".option.machine_version_major",
ISA.Major);
1500 createConstantSymbol(
".option.machine_version_minor",
ISA.Minor);
1501 createConstantSymbol(
".option.machine_version_stepping",
ISA.Stepping);
1504 initializeGprCountSymbol(IS_VGPR);
1505 initializeGprCountSymbol(IS_SGPR);
1510 createConstantSymbol(Symbol, Code);
1512 createConstantSymbol(
"UC_VERSION_W64_BIT", 0x2000);
1513 createConstantSymbol(
"UC_VERSION_W32_BIT", 0x4000);
1514 createConstantSymbol(
"UC_VERSION_MDP_BIT", 0x8000);
1592 bool isWave32()
const {
return getAvailableFeatures()[Feature_isWave32Bit]; }
1594 bool isWave64()
const {
return getAvailableFeatures()[Feature_isWave64Bit]; }
1596 bool hasInv2PiInlineImm()
const {
1597 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1600 bool has64BitLiterals()
const {
1601 return getFeatureBits()[AMDGPU::Feature64BitLiterals];
1604 bool hasFlatOffsets()
const {
1605 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1608 bool hasTrue16Insts()
const {
1609 return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts];
1613 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1616 bool hasSGPR102_SGPR103()
const {
1620 bool hasSGPR104_SGPR105()
const {
return isGFX10Plus(); }
1622 bool hasIntClamp()
const {
1623 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1626 bool hasPartialNSAEncoding()
const {
1627 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1630 bool hasGloballyAddressableScratch()
const {
1631 return getFeatureBits()[AMDGPU::FeatureGloballyAddressableScratch];
1644 AMDGPUTargetStreamer &getTargetStreamer() {
1645 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1646 return static_cast<AMDGPUTargetStreamer &
>(TS);
1652 return const_cast<AMDGPUAsmParser *
>(
this)->MCTargetAsmParser::getContext();
1655 const MCRegisterInfo *getMRI()
const {
1659 const MCInstrInfo *getMII()
const {
1665 const FeatureBitset &getFeatureBits()
const {
1666 return getSTI().getFeatureBits();
1669 void setForcedEncodingSize(
unsigned Size) { ForcedEncodingSize =
Size; }
1670 void setForcedDPP(
bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1671 void setForcedSDWA(
bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1673 unsigned getForcedEncodingSize()
const {
return ForcedEncodingSize; }
1674 bool isForcedVOP3()
const {
return ForcedEncodingSize == 64; }
1675 bool isForcedDPP()
const {
return ForcedDPP; }
1676 bool isForcedSDWA()
const {
return ForcedSDWA; }
1677 ArrayRef<unsigned> getMatchedVariants()
const;
1678 StringRef getMatchedVariantName()
const;
1680 std::unique_ptr<AMDGPUOperand> parseRegister(
bool RestoreOnFailure =
false);
1681 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1682 bool RestoreOnFailure);
1683 bool parseRegister(MCRegister &
Reg, SMLoc &StartLoc, SMLoc &EndLoc)
override;
1684 ParseStatus tryParseRegister(MCRegister &
Reg, SMLoc &StartLoc,
1685 SMLoc &EndLoc)
override;
1686 unsigned checkTargetMatchPredicate(MCInst &Inst)
override;
1687 unsigned validateTargetOperandClass(MCParsedAsmOperand &
Op,
1688 unsigned Kind)
override;
1689 bool matchAndEmitInstruction(SMLoc IDLoc,
unsigned &Opcode,
1691 uint64_t &ErrorInfo,
1692 bool MatchingInlineAsm)
override;
1693 bool ParseDirective(AsmToken DirectiveID)
override;
1694 void onEndOfFile()
override;
1695 ParseStatus parseOperand(
OperandVector &Operands, StringRef Mnemonic,
1696 OperandMode
Mode = OperandMode_Default);
1697 StringRef parseMnemonicSuffix(StringRef Name);
1698 bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
1702 ParseStatus parseTokenOp(StringRef Name,
OperandVector &Operands);
1704 ParseStatus parseIntWithPrefix(
const char *Prefix, int64_t &
Int);
1707 parseIntWithPrefix(
const char *Prefix,
OperandVector &Operands,
1708 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1709 std::function<
bool(int64_t &)> ConvertResult =
nullptr);
1711 ParseStatus parseOperandArrayWithPrefix(
1713 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1714 bool (*ConvertResult)(int64_t &) =
nullptr);
1718 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1719 bool IgnoreNegative =
false);
1720 unsigned getCPolKind(StringRef Id, StringRef Mnemo,
bool &Disabling)
const;
1722 ParseStatus parseScope(
OperandVector &Operands, int64_t &Scope);
1724 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &
Value,
1726 ParseStatus parseStringOrIntWithPrefix(
OperandVector &Operands,
1728 ArrayRef<const char *> Ids,
1730 ParseStatus parseStringOrIntWithPrefix(
OperandVector &Operands,
1732 ArrayRef<const char *> Ids,
1733 AMDGPUOperand::ImmTy
Type);
1736 bool isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1737 bool isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1738 bool isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1739 bool isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1740 bool parseSP3NegModifier();
1741 ParseStatus parseImm(
OperandVector &Operands,
bool HasSP3AbsModifier =
false,
1744 ParseStatus parseRegOrImm(
OperandVector &Operands,
bool HasSP3AbsMod =
false,
1746 ParseStatus parseRegOrImmWithFPInputMods(
OperandVector &Operands,
1747 bool AllowImm =
true);
1748 ParseStatus parseRegOrImmWithIntInputMods(
OperandVector &Operands,
1749 bool AllowImm =
true);
1750 ParseStatus parseRegWithFPInputMods(
OperandVector &Operands);
1751 ParseStatus parseRegWithIntInputMods(
OperandVector &Operands);
1754 AMDGPUOperand::ImmTy ImmTy);
1758 ParseStatus tryParseMatrixFMT(
OperandVector &Operands, StringRef Name,
1759 AMDGPUOperand::ImmTy
Type);
1762 ParseStatus tryParseMatrixScale(
OperandVector &Operands, StringRef Name,
1763 AMDGPUOperand::ImmTy
Type);
1766 ParseStatus tryParseMatrixScaleFmt(
OperandVector &Operands, StringRef Name,
1767 AMDGPUOperand::ImmTy
Type);
1771 ParseStatus parseDfmtNfmt(int64_t &
Format);
1772 ParseStatus parseUfmt(int64_t &
Format);
1773 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1775 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1778 ParseStatus parseSymbolicOrNumericFormat(int64_t &
Format);
1779 ParseStatus parseNumericFormat(int64_t &
Format);
1783 bool tryParseFmt(
const char *Pref, int64_t MaxVal, int64_t &Val);
1784 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1788 bool parseCnt(int64_t &IntVal);
1791 bool parseDepCtr(int64_t &IntVal,
unsigned &Mask);
1792 void depCtrError(SMLoc Loc,
int ErrorId, StringRef DepCtrName);
1795 bool parseDelay(int64_t &Delay);
1801 struct OperandInfoTy {
1804 bool IsSymbolic =
false;
1805 bool IsDefined =
false;
1807 constexpr OperandInfoTy(int64_t Val) : Val(Val) {}
1810 struct StructuredOpField : OperandInfoTy {
1814 bool IsDefined =
false;
1816 constexpr StructuredOpField(StringLiteral Id, StringLiteral Desc,
1817 unsigned Width, int64_t
Default)
1818 : OperandInfoTy(
Default), Id(Id), Desc(Desc), Width(Width) {}
1819 virtual ~StructuredOpField() =
default;
1821 bool Error(AMDGPUAsmParser &Parser,
const Twine &Err)
const {
1822 Parser.Error(Loc,
"invalid " + Desc +
": " + Err);
1826 virtual bool validate(AMDGPUAsmParser &Parser)
const {
1828 return Error(Parser,
"not supported on this GPU");
1830 return Error(Parser,
"only " + Twine(Width) +
"-bit values are legal");
1838 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &
Op, OperandInfoTy &Stream);
1839 bool validateSendMsg(
const OperandInfoTy &Msg,
1840 const OperandInfoTy &
Op,
1841 const OperandInfoTy &Stream);
1843 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &
Offset,
1844 OperandInfoTy &Width);
1846 const AMDGPUOperand &findMCOperand(
const OperandVector &Operands,
1849 static SMLoc getLaterLoc(SMLoc a, SMLoc b);
1851 SMLoc getFlatOffsetLoc(
const OperandVector &Operands)
const;
1852 SMLoc getSMEMOffsetLoc(
const OperandVector &Operands)
const;
1855 SMLoc getOperandLoc(
const OperandVector &Operands,
int MCOpIdx)
const;
1856 SMLoc getOperandLoc(std::function<
bool(
const AMDGPUOperand&)>
Test,
1858 SMLoc getImmLoc(AMDGPUOperand::ImmTy
Type,
1862 bool validateInstruction(
const MCInst &Inst, SMLoc IDLoc,
1864 bool validateOffset(
const MCInst &Inst,
const OperandVector &Operands);
1865 bool validateFlatOffset(
const MCInst &Inst,
const OperandVector &Operands);
1866 bool validateSMEMOffset(
const MCInst &Inst,
const OperandVector &Operands);
1867 bool validateSOPLiteral(
const MCInst &Inst,
const OperandVector &Operands);
1868 bool validateConstantBusLimitations(
const MCInst &Inst,
const OperandVector &Operands);
1869 std::optional<unsigned> checkVOPDRegBankConstraints(
const MCInst &Inst,
1871 bool validateVOPD(
const MCInst &Inst,
const OperandVector &Operands);
1872 bool tryVOPD(
const MCInst &Inst);
1873 bool tryVOPD3(
const MCInst &Inst);
1874 bool tryAnotherVOPDEncoding(
const MCInst &Inst);
1876 bool validateIntClampSupported(
const MCInst &Inst);
1877 bool validateMIMGAtomicDMask(
const MCInst &Inst);
1878 bool validateMIMGGatherDMask(
const MCInst &Inst);
1879 bool validateMovrels(
const MCInst &Inst,
const OperandVector &Operands);
1880 bool validateMIMGDataSize(
const MCInst &Inst, SMLoc IDLoc);
1881 bool validateMIMGAddrSize(
const MCInst &Inst, SMLoc IDLoc);
1882 bool validateMIMGD16(
const MCInst &Inst);
1883 bool validateMIMGDim(
const MCInst &Inst,
const OperandVector &Operands);
1884 bool validateTensorR128(
const MCInst &Inst);
1885 bool validateMIMGMSAA(
const MCInst &Inst);
1886 bool validateOpSel(
const MCInst &Inst);
1887 bool validateTrue16OpSel(
const MCInst &Inst);
1888 bool validateNeg(
const MCInst &Inst, AMDGPU::OpName OpName);
1889 bool validateDPP(
const MCInst &Inst,
const OperandVector &Operands);
1890 bool validateVccOperand(MCRegister
Reg)
const;
1891 bool validateVOPLiteral(
const MCInst &Inst,
const OperandVector &Operands);
1892 bool validateMAIAccWrite(
const MCInst &Inst,
const OperandVector &Operands);
1893 bool validateMAISrc2(
const MCInst &Inst,
const OperandVector &Operands);
1894 bool validateMFMA(
const MCInst &Inst,
const OperandVector &Operands);
1895 bool validateAGPRLdSt(
const MCInst &Inst)
const;
1896 bool validateVGPRAlign(
const MCInst &Inst)
const;
1897 bool validateBLGP(
const MCInst &Inst,
const OperandVector &Operands);
1898 bool validateDS(
const MCInst &Inst,
const OperandVector &Operands);
1899 bool validateGWS(
const MCInst &Inst,
const OperandVector &Operands);
1900 bool validateDivScale(
const MCInst &Inst);
1901 bool validateWaitCnt(
const MCInst &Inst,
const OperandVector &Operands);
1902 bool validateCoherencyBits(
const MCInst &Inst,
const OperandVector &Operands,
1904 bool validateTHAndScopeBits(
const MCInst &Inst,
const OperandVector &Operands,
1905 const unsigned CPol);
1906 bool validateTFE(
const MCInst &Inst,
const OperandVector &Operands);
1907 bool validateLdsDirect(
const MCInst &Inst,
const OperandVector &Operands);
1908 bool validateWMMA(
const MCInst &Inst,
const OperandVector &Operands);
1909 unsigned getConstantBusLimit(
unsigned Opcode)
const;
1910 bool usesConstantBus(
const MCInst &Inst,
unsigned OpIdx);
1911 bool isInlineConstant(
const MCInst &Inst,
unsigned OpIdx)
const;
1912 MCRegister findImplicitSGPRReadInVOP(
const MCInst &Inst)
const;
1914 bool isSupportedMnemo(StringRef Mnemo,
1915 const FeatureBitset &FBS);
1916 bool isSupportedMnemo(StringRef Mnemo,
1917 const FeatureBitset &FBS,
1918 ArrayRef<unsigned> Variants);
1919 bool checkUnsupportedInstruction(StringRef Name, SMLoc IDLoc);
1921 bool isId(
const StringRef Id)
const;
1922 bool isId(
const AsmToken &Token,
const StringRef Id)
const;
1924 StringRef getId()
const;
1925 bool trySkipId(
const StringRef Id);
1926 bool trySkipId(
const StringRef Pref,
const StringRef Id);
1930 bool parseString(StringRef &Val,
const StringRef ErrMsg =
"expected a string");
1931 bool parseId(StringRef &Val,
const StringRef ErrMsg =
"");
1937 StringRef getTokenStr()
const;
1938 AsmToken peekToken(
bool ShouldSkipSpace =
true);
1940 SMLoc getLoc()
const;
1944 void onBeginOfFile()
override;
1945 bool parsePrimaryExpr(
const MCExpr *&Res, SMLoc &EndLoc)
override;
1947 ParseStatus parseCustomOperand(
OperandVector &Operands,
unsigned MCK);
1957 bool parseSwizzleOperand(int64_t &
Op,
const unsigned MinVal,
1958 const unsigned MaxVal,
const Twine &ErrMsg,
1960 bool parseSwizzleOperands(
const unsigned OpNum, int64_t*
Op,
1961 const unsigned MinVal,
1962 const unsigned MaxVal,
1963 const StringRef ErrMsg);
1965 bool parseSwizzleOffset(int64_t &
Imm);
1966 bool parseSwizzleMacro(int64_t &
Imm);
1967 bool parseSwizzleQuadPerm(int64_t &
Imm);
1968 bool parseSwizzleBitmaskPerm(int64_t &
Imm);
1969 bool parseSwizzleBroadcast(int64_t &
Imm);
1970 bool parseSwizzleSwap(int64_t &
Imm);
1971 bool parseSwizzleReverse(int64_t &
Imm);
1972 bool parseSwizzleFFT(int64_t &
Imm);
1973 bool parseSwizzleRotate(int64_t &
Imm);
1976 int64_t parseGPRIdxMacro();
1978 void cvtMubuf(MCInst &Inst,
const OperandVector &Operands) { cvtMubufImpl(Inst, Operands,
false); }
1979 void cvtMubufAtomic(MCInst &Inst,
const OperandVector &Operands) { cvtMubufImpl(Inst, Operands,
true); }
1984 OptionalImmIndexMap &OptionalIdx);
1985 void cvtScaledMFMA(MCInst &Inst,
const OperandVector &Operands);
1986 void cvtVOP3OpSel(MCInst &Inst,
const OperandVector &Operands);
1989 void cvtSWMMAC(MCInst &Inst,
const OperandVector &Operands);
1992 void cvtVOP3OpSel(MCInst &Inst,
const OperandVector &Operands,
1993 OptionalImmIndexMap &OptionalIdx);
1995 OptionalImmIndexMap &OptionalIdx);
1997 void cvtVOP3Interp(MCInst &Inst,
const OperandVector &Operands);
1998 void cvtVINTERP(MCInst &Inst,
const OperandVector &Operands);
1999 void cvtOpSelHelper(MCInst &Inst,
unsigned OpSel);
2001 bool parseDimId(
unsigned &Encoding);
2003 bool convertDppBoundCtrl(int64_t &BoundCtrl);
2006 bool isSupportedDPPCtrl(StringRef Ctrl,
const OperandVector &Operands);
2007 int64_t parseDPPCtrlSel(StringRef Ctrl);
2008 int64_t parseDPPCtrlPerm();
2009 void cvtDPP(MCInst &Inst,
const OperandVector &Operands,
bool IsDPP8 =
false);
2011 cvtDPP(Inst, Operands,
true);
2013 void cvtVOP3DPP(MCInst &Inst,
const OperandVector &Operands,
2014 bool IsDPP8 =
false);
2015 void cvtVOP3DPP8(MCInst &Inst,
const OperandVector &Operands) {
2016 cvtVOP3DPP(Inst, Operands,
true);
2019 ParseStatus parseSDWASel(
OperandVector &Operands, StringRef Prefix,
2020 AMDGPUOperand::ImmTy
Type);
2022 void cvtSdwaVOP1(MCInst &Inst,
const OperandVector &Operands);
2023 void cvtSdwaVOP2(MCInst &Inst,
const OperandVector &Operands);
2024 void cvtSdwaVOP2b(MCInst &Inst,
const OperandVector &Operands);
2025 void cvtSdwaVOP2e(MCInst &Inst,
const OperandVector &Operands);
2026 void cvtSdwaVOPC(MCInst &Inst,
const OperandVector &Operands);
2028 enum class SDWAInstType :
unsigned {
VOP1 = 0,
VOP2 = 1,
VOPC = 2 };
2031 SDWAInstType BasicInstType,
bool SkipDstVcc =
false,
2032 bool SkipSrcVcc =
false);
2141bool AMDGPUOperand::isInlinableImm(
MVT type)
const {
2151 if (!isImmTy(ImmTyNone)) {
2156 if (getModifiers().
Lit != LitModifier::None)
2166 if (type == MVT::f64 || type == MVT::i64) {
2168 AsmParser->hasInv2PiInlineImm());
2171 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64,
Imm.Val));
2190 APFloat::rmNearestTiesToEven, &Lost);
2197 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2199 AsmParser->hasInv2PiInlineImm());
2204 static_cast<int32_t
>(FPLiteral.bitcastToAPInt().getZExtValue()),
2205 AsmParser->hasInv2PiInlineImm());
2209 if (type == MVT::f64 || type == MVT::i64) {
2211 AsmParser->hasInv2PiInlineImm());
2220 static_cast<int16_t
>(
Literal.getLoBits(16).getSExtValue()),
2221 type, AsmParser->hasInv2PiInlineImm());
2225 static_cast<int32_t
>(
Literal.getLoBits(32).getZExtValue()),
2226 AsmParser->hasInv2PiInlineImm());
2229bool AMDGPUOperand::isLiteralImm(MVT type)
const {
2231 if (!isImmTy(ImmTyNone)) {
2236 (type == MVT::i64 || type == MVT::f64) && AsmParser->has64BitLiterals();
2241 if (type == MVT::f64 && hasFPModifiers()) {
2261 if (type == MVT::f64) {
2266 if (type == MVT::i64) {
2279 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2280 : (type == MVT::v2i16) ? MVT::f32
2281 : (type == MVT::v2f32) ? MVT::f32
2284 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64,
Imm.Val));
2288bool AMDGPUOperand::isRegClass(
unsigned RCID)
const {
2289 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(
getReg());
2292bool AMDGPUOperand::isVRegWithInputMods()
const {
2293 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2295 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2296 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2299template <
bool IsFake16>
2300bool AMDGPUOperand::isT16_Lo128VRegWithInputMods()
const {
2301 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2302 : AMDGPU::VGPR_16_Lo128RegClassID);
2305template <
bool IsFake16>
bool AMDGPUOperand::isT16VRegWithInputMods()
const {
2306 return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
2307 : AMDGPU::VGPR_16RegClassID);
2310bool AMDGPUOperand::isSDWAOperand(MVT type)
const {
2311 if (AsmParser->isVI())
2313 if (AsmParser->isGFX9Plus())
2314 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2318bool AMDGPUOperand::isSDWAFP16Operand()
const {
2319 return isSDWAOperand(MVT::f16);
2322bool AMDGPUOperand::isSDWAFP32Operand()
const {
2323 return isSDWAOperand(MVT::f32);
2326bool AMDGPUOperand::isSDWAInt16Operand()
const {
2327 return isSDWAOperand(MVT::i16);
2330bool AMDGPUOperand::isSDWAInt32Operand()
const {
2331 return isSDWAOperand(MVT::i32);
2334bool AMDGPUOperand::isBoolReg()
const {
2335 return isReg() && ((AsmParser->isWave64() && isSCSrc_b64()) ||
2336 (AsmParser->isWave32() && isSCSrc_b32()));
2339uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val,
unsigned Size)
const
2341 assert(isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2344 const uint64_t FpSignMask = (1ULL << (
Size * 8 - 1));
2356void AMDGPUOperand::addImmOperands(MCInst &Inst,
unsigned N,
bool ApplyModifiers)
const {
2366 addLiteralImmOperand(Inst,
Imm.Val,
2368 isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2370 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2375void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const {
2376 const auto& InstDesc = AsmParser->getMII()->get(Inst.
getOpcode());
2381 if (ApplyModifiers) {
2384 Val = applyInputFPModifiers(Val,
Size);
2388 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2390 bool CanUse64BitLiterals =
2391 AsmParser->has64BitLiterals() &&
2394 MCContext &Ctx = AsmParser->getContext();
2403 if (
Lit == LitModifier::None &&
2405 AsmParser->hasInv2PiInlineImm())) {
2413 bool HasMandatoryLiteral =
2416 if (
Literal.getLoBits(32) != 0 &&
2417 (InstDesc.getSize() != 4 || !AsmParser->has64BitLiterals()) &&
2418 !HasMandatoryLiteral) {
2419 const_cast<AMDGPUAsmParser *
>(AsmParser)->
Warning(
2421 "Can't encode literal as exact 64-bit floating-point operand. "
2422 "Low 32-bits will be set to zero");
2423 Val &= 0xffffffff00000000u;
2429 if (CanUse64BitLiterals &&
Lit == LitModifier::None &&
2435 Lit = LitModifier::Lit64;
2436 }
else if (
Lit == LitModifier::Lit) {
2450 if (CanUse64BitLiterals &&
Lit == LitModifier::None &&
2452 Lit = LitModifier::Lit64;
2459 if (
Lit == LitModifier::None && AsmParser->hasInv2PiInlineImm() &&
2460 Literal == 0x3fc45f306725feed) {
2495 APFloat::rmNearestTiesToEven, &lost);
2499 Val = FPLiteral.bitcastToAPInt().getZExtValue();
2506 if (
Lit != LitModifier::None) {
2536 if (
Lit == LitModifier::None &&
2546 if (!AsmParser->has64BitLiterals() ||
Lit == LitModifier::Lit)
2553 if (
Lit == LitModifier::None &&
2561 if (!AsmParser->has64BitLiterals()) {
2562 Val =
static_cast<uint64_t
>(Val) << 32;
2569 if (
Lit == LitModifier::Lit ||
2571 Val =
static_cast<uint64_t
>(Val) << 32;
2575 if (
Lit == LitModifier::Lit)
2601 if (
Lit != LitModifier::None) {
2609void AMDGPUOperand::addRegOperands(MCInst &Inst,
unsigned N)
const {
2614bool AMDGPUOperand::isInlineValue()
const {
2622void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2633 if (Is == IS_VGPR) {
2637 return AMDGPU::VGPR_32RegClassID;
2639 return AMDGPU::VReg_64RegClassID;
2641 return AMDGPU::VReg_96RegClassID;
2643 return AMDGPU::VReg_128RegClassID;
2645 return AMDGPU::VReg_160RegClassID;
2647 return AMDGPU::VReg_192RegClassID;
2649 return AMDGPU::VReg_224RegClassID;
2651 return AMDGPU::VReg_256RegClassID;
2653 return AMDGPU::VReg_288RegClassID;
2655 return AMDGPU::VReg_320RegClassID;
2657 return AMDGPU::VReg_352RegClassID;
2659 return AMDGPU::VReg_384RegClassID;
2661 return AMDGPU::VReg_512RegClassID;
2663 return AMDGPU::VReg_1024RegClassID;
2665 }
else if (Is == IS_TTMP) {
2669 return AMDGPU::TTMP_32RegClassID;
2671 return AMDGPU::TTMP_64RegClassID;
2673 return AMDGPU::TTMP_128RegClassID;
2675 return AMDGPU::TTMP_256RegClassID;
2677 return AMDGPU::TTMP_512RegClassID;
2679 }
else if (Is == IS_SGPR) {
2683 return AMDGPU::SGPR_32RegClassID;
2685 return AMDGPU::SGPR_64RegClassID;
2687 return AMDGPU::SGPR_96RegClassID;
2689 return AMDGPU::SGPR_128RegClassID;
2691 return AMDGPU::SGPR_160RegClassID;
2693 return AMDGPU::SGPR_192RegClassID;
2695 return AMDGPU::SGPR_224RegClassID;
2697 return AMDGPU::SGPR_256RegClassID;
2699 return AMDGPU::SGPR_288RegClassID;
2701 return AMDGPU::SGPR_320RegClassID;
2703 return AMDGPU::SGPR_352RegClassID;
2705 return AMDGPU::SGPR_384RegClassID;
2707 return AMDGPU::SGPR_512RegClassID;
2709 }
else if (Is == IS_AGPR) {
2713 return AMDGPU::AGPR_32RegClassID;
2715 return AMDGPU::AReg_64RegClassID;
2717 return AMDGPU::AReg_96RegClassID;
2719 return AMDGPU::AReg_128RegClassID;
2721 return AMDGPU::AReg_160RegClassID;
2723 return AMDGPU::AReg_192RegClassID;
2725 return AMDGPU::AReg_224RegClassID;
2727 return AMDGPU::AReg_256RegClassID;
2729 return AMDGPU::AReg_288RegClassID;
2731 return AMDGPU::AReg_320RegClassID;
2733 return AMDGPU::AReg_352RegClassID;
2735 return AMDGPU::AReg_384RegClassID;
2737 return AMDGPU::AReg_512RegClassID;
2739 return AMDGPU::AReg_1024RegClassID;
2747 .
Case(
"exec", AMDGPU::EXEC)
2748 .
Case(
"vcc", AMDGPU::VCC)
2749 .
Case(
"flat_scratch", AMDGPU::FLAT_SCR)
2750 .
Case(
"xnack_mask", AMDGPU::XNACK_MASK)
2751 .
Case(
"shared_base", AMDGPU::SRC_SHARED_BASE)
2752 .
Case(
"src_shared_base", AMDGPU::SRC_SHARED_BASE)
2753 .
Case(
"shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2754 .
Case(
"src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2755 .
Case(
"private_base", AMDGPU::SRC_PRIVATE_BASE)
2756 .
Case(
"src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2757 .
Case(
"private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2758 .
Case(
"src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2759 .
Case(
"src_flat_scratch_base_lo", AMDGPU::SRC_FLAT_SCRATCH_BASE_LO)
2760 .
Case(
"src_flat_scratch_base_hi", AMDGPU::SRC_FLAT_SCRATCH_BASE_HI)
2761 .
Case(
"pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2762 .
Case(
"src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2763 .
Case(
"lds_direct", AMDGPU::LDS_DIRECT)
2764 .
Case(
"src_lds_direct", AMDGPU::LDS_DIRECT)
2765 .
Case(
"m0", AMDGPU::M0)
2766 .
Case(
"vccz", AMDGPU::SRC_VCCZ)
2767 .
Case(
"src_vccz", AMDGPU::SRC_VCCZ)
2768 .
Case(
"execz", AMDGPU::SRC_EXECZ)
2769 .
Case(
"src_execz", AMDGPU::SRC_EXECZ)
2770 .
Case(
"scc", AMDGPU::SRC_SCC)
2771 .
Case(
"src_scc", AMDGPU::SRC_SCC)
2772 .
Case(
"tba", AMDGPU::TBA)
2773 .
Case(
"tma", AMDGPU::TMA)
2774 .
Case(
"flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2775 .
Case(
"flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2776 .
Case(
"xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2777 .
Case(
"xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2778 .
Case(
"vcc_lo", AMDGPU::VCC_LO)
2779 .
Case(
"vcc_hi", AMDGPU::VCC_HI)
2780 .
Case(
"exec_lo", AMDGPU::EXEC_LO)
2781 .
Case(
"exec_hi", AMDGPU::EXEC_HI)
2782 .
Case(
"tma_lo", AMDGPU::TMA_LO)
2783 .
Case(
"tma_hi", AMDGPU::TMA_HI)
2784 .
Case(
"tba_lo", AMDGPU::TBA_LO)
2785 .
Case(
"tba_hi", AMDGPU::TBA_HI)
2786 .
Case(
"pc", AMDGPU::PC_REG)
2787 .
Case(
"null", AMDGPU::SGPR_NULL)
2791bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2792 SMLoc &EndLoc,
bool RestoreOnFailure) {
2793 auto R = parseRegister();
2794 if (!R)
return true;
2796 RegNo =
R->getReg();
2797 StartLoc =
R->getStartLoc();
2798 EndLoc =
R->getEndLoc();
2802bool AMDGPUAsmParser::parseRegister(MCRegister &
Reg, SMLoc &StartLoc,
2804 return ParseRegister(
Reg, StartLoc, EndLoc,
false);
2807ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &
Reg, SMLoc &StartLoc,
2809 bool Result = ParseRegister(
Reg, StartLoc, EndLoc,
true);
2810 bool PendingErrors = getParser().hasPendingError();
2811 getParser().clearPendingErrors();
2819bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &
Reg,
unsigned &RegWidth,
2820 RegisterKind RegKind,
2822 RegisterKind RegKind1, SMLoc Loc) {
2824 if (RegKind == IS_SGPR) {
2825 unsigned RegIdx = (
Reg - AMDGPU::SGPR0) + RegWidth / 32;
2826 if ((RegIdx == 106 && Reg1 == AMDGPU::VCC_LO) ||
2827 (RegIdx == 107 && Reg1 == AMDGPU::VCC_HI)) {
2833 if (RegKind != RegKind1) {
2834 Error(Loc,
"registers in a list must be of the same kind");
2835 return MCRegister();
2840 if (
Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2845 if (
Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2846 Reg = AMDGPU::FLAT_SCR;
2850 if (
Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2851 Reg = AMDGPU::XNACK_MASK;
2855 if (
Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2860 if (
Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2865 if (
Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2870 Error(Loc,
"register does not fit in the list");
2876 if (Reg1 !=
Reg + RegWidth / 32) {
2877 Error(Loc,
"registers in a list must have consecutive indices");
2895 {{
"ttmp"}, IS_TTMP},
2901 return Kind == IS_VGPR ||
2909 if (Str.starts_with(
Reg.Name))
2915 return !Str.getAsInteger(10, Num);
2919AMDGPUAsmParser::isRegister(
const AsmToken &Token,
2920 const AsmToken &NextToken)
const {
2935 StringRef RegSuffix = Str.substr(
RegName.size());
2936 if (!RegSuffix.
empty()) {
2954AMDGPUAsmParser::isRegister()
2956 return isRegister(
getToken(), peekToken());
2959MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
unsigned RegNum,
2960 unsigned SubReg,
unsigned RegWidth,
2964 unsigned AlignSize = 1;
2965 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2971 if (RegNum % AlignSize != 0) {
2972 Error(Loc,
"invalid register alignment");
2973 return MCRegister();
2976 unsigned RegIdx = RegNum / AlignSize;
2979 Error(Loc,
"invalid or unsupported register size");
2980 return MCRegister();
2984 const MCRegisterClass RC =
TRI->getRegClass(RCID);
2985 if (RegIdx >= RC.
getNumRegs() || (RegKind == IS_VGPR && RegIdx > 255)) {
2986 Error(Loc,
"register index is out of range");
2987 return AMDGPU::NoRegister;
2990 if (RegKind == IS_VGPR && !
isGFX1250Plus() && RegIdx + RegWidth / 32 > 256) {
2991 Error(Loc,
"register index is out of range");
2992 return MCRegister();
3008bool AMDGPUAsmParser::ParseRegRange(
unsigned &Num,
unsigned &RegWidth,
3010 int64_t RegLo, RegHi;
3014 SMLoc FirstIdxLoc = getLoc();
3021 SecondIdxLoc = getLoc();
3032 Error(FirstIdxLoc,
"invalid register index");
3037 Error(SecondIdxLoc,
"invalid register index");
3041 if (RegLo > RegHi) {
3042 Error(FirstIdxLoc,
"first register index should not exceed second index");
3046 if (RegHi == RegLo) {
3047 StringRef RegSuffix = getTokenStr();
3048 if (RegSuffix ==
".l") {
3049 SubReg = AMDGPU::lo16;
3051 }
else if (RegSuffix ==
".h") {
3052 SubReg = AMDGPU::hi16;
3057 Num =
static_cast<unsigned>(RegLo);
3058 RegWidth = 32 * ((RegHi - RegLo) + 1);
3063MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
3066 SmallVectorImpl<AsmToken> &Tokens) {
3072 RegKind = IS_SPECIAL;
3079MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
3082 SmallVectorImpl<AsmToken> &Tokens) {
3084 StringRef
RegName = getTokenStr();
3085 auto Loc = getLoc();
3089 Error(Loc,
"invalid register name");
3090 return MCRegister();
3098 unsigned SubReg = NoSubRegister;
3099 bool IsRange =
false;
3100 if (!RegSuffix.
empty()) {
3102 SubReg = AMDGPU::lo16;
3104 SubReg = AMDGPU::hi16;
3108 Error(Loc,
"invalid register index");
3109 return MCRegister();
3115 if (!ParseRegRange(RegNum, RegWidth, SubReg))
3116 return MCRegister();
3120 MCRegister
Reg = getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
3121 const MCRegisterInfo &
TRI = *
getContext().getRegisterInfo();
3122 if (RegKind == IS_SGPR && IsRange
3123 ? (
TRI.isSubRegister(
Reg, VCC_LO) ||
TRI.isSubRegister(
Reg, VCC_HI))
3124 : (
Reg == VCC_LO ||
Reg == VCC_HI)) {
3125 Error(Loc,
"register index is out of range");
3126 return MCRegister();
3132MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
3133 unsigned &RegNum,
unsigned &RegWidth,
3134 SmallVectorImpl<AsmToken> &Tokens) {
3136 auto ListLoc = getLoc();
3139 "expected a register or a list of registers")) {
3140 return MCRegister();
3145 auto Loc = getLoc();
3146 if (!ParseAMDGPURegister(RegKind,
Reg, RegNum, RegWidth))
3147 return MCRegister();
3148 if (RegWidth != 32) {
3149 Error(Loc,
"expected a single 32-bit register");
3150 return MCRegister();
3154 RegisterKind NextRegKind;
3156 unsigned NextRegNum, NextRegWidth;
3159 if (!ParseAMDGPURegister(NextRegKind, NextReg,
3160 NextRegNum, NextRegWidth,
3162 return MCRegister();
3164 if (NextRegWidth != 32) {
3165 Error(Loc,
"expected a single 32-bit register");
3166 return MCRegister();
3168 if (!AddNextRegisterToList(
Reg, RegWidth, RegKind, NextReg, NextRegKind,
3170 return MCRegister();
3174 "expected a comma or a closing square bracket")) {
3175 return MCRegister();
3179 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3184bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3185 MCRegister &
Reg,
unsigned &RegNum,
3187 SmallVectorImpl<AsmToken> &Tokens) {
3188 auto Loc = getLoc();
3192 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3194 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3196 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3201 assert(Parser.hasPendingError());
3205 if (!subtargetHasRegister(*
TRI,
Reg)) {
3206 if (
Reg == AMDGPU::SGPR_NULL) {
3207 Error(Loc,
"'null' operand is not supported on this GPU");
3210 " register not available on this GPU");
3218bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3219 MCRegister &
Reg,
unsigned &RegNum,
3221 bool RestoreOnFailure ) {
3225 if (ParseAMDGPURegister(RegKind,
Reg, RegNum, RegWidth, Tokens)) {
3226 if (RestoreOnFailure) {
3227 while (!Tokens.
empty()) {
3236std::optional<StringRef>
3237AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3240 return StringRef(
".amdgcn.next_free_vgpr");
3242 return StringRef(
".amdgcn.next_free_sgpr");
3244 return std::nullopt;
3248void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3249 auto SymbolName = getGprCountSymbolName(RegKind);
3250 assert(SymbolName &&
"initializing invalid register kind");
3256bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3257 unsigned DwordRegIndex,
3258 unsigned RegWidth) {
3263 auto SymbolName = getGprCountSymbolName(RegKind);
3268 int64_t NewMax = DwordRegIndex +
divideCeil(RegWidth, 32) - 1;
3272 return !
Error(getLoc(),
3273 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3277 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3279 if (OldCount <= NewMax)
3285std::unique_ptr<AMDGPUOperand>
3286AMDGPUAsmParser::parseRegister(
bool RestoreOnFailure) {
3288 SMLoc StartLoc = Tok.getLoc();
3289 SMLoc EndLoc = Tok.getEndLoc();
3290 RegisterKind RegKind;
3292 unsigned RegNum, RegWidth;
3294 if (!ParseAMDGPURegister(RegKind,
Reg, RegNum, RegWidth)) {
3298 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3301 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3302 return AMDGPUOperand::CreateReg(
this,
Reg, StartLoc, EndLoc);
3305ParseStatus AMDGPUAsmParser::parseImm(
OperandVector &Operands,
3309 if (isRegister() || isModifier())
3312 if (
Lit == LitModifier::None) {
3313 if (trySkipId(
"lit"))
3314 Lit = LitModifier::Lit;
3315 else if (trySkipId(
"lit64"))
3316 Lit = LitModifier::Lit64;
3318 if (
Lit != LitModifier::None) {
3321 ParseStatus S = parseImm(Operands, HasSP3AbsModifier,
Lit);
3330 const auto& NextTok = peekToken();
3333 bool Negate =
false;
3341 AMDGPUOperand::Modifiers Mods;
3349 StringRef Num = getTokenStr();
3352 APFloat RealVal(APFloat::IEEEdouble());
3353 auto roundMode = APFloat::rmNearestTiesToEven;
3354 if (
errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3357 RealVal.changeSign();
3360 AMDGPUOperand::CreateImm(
this, RealVal.bitcastToAPInt().getZExtValue(), S,
3361 AMDGPUOperand::ImmTyNone,
true));
3362 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*Operands.
back());
3363 Op.setModifiers(Mods);
3372 if (HasSP3AbsModifier) {
3381 if (getParser().parsePrimaryExpr(Expr, EndLoc,
nullptr))
3384 if (Parser.parseExpression(Expr))
3388 if (Expr->evaluateAsAbsolute(IntVal)) {
3390 return Error(S,
"literal value out of range");
3391 Operands.
push_back(AMDGPUOperand::CreateImm(
this, IntVal, S));
3392 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*Operands.
back());
3393 Op.setModifiers(Mods);
3395 if (
Lit != LitModifier::None)
3397 Operands.
push_back(AMDGPUOperand::CreateExpr(
this, Expr, S));
3406ParseStatus AMDGPUAsmParser::parseReg(
OperandVector &Operands) {
3410 if (
auto R = parseRegister()) {
3418ParseStatus AMDGPUAsmParser::parseRegOrImm(
OperandVector &Operands,
3420 ParseStatus Res = parseReg(Operands);
3425 return parseImm(Operands, HasSP3AbsMod,
Lit);
3429AMDGPUAsmParser::isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3432 return str ==
"abs" || str ==
"neg" || str ==
"sext";
3438AMDGPUAsmParser::isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3443AMDGPUAsmParser::isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3444 return isNamedOperandModifier(Token, NextToken) || Token.
is(
AsmToken::Pipe);
3448AMDGPUAsmParser::isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3449 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3466AMDGPUAsmParser::isModifier() {
3469 AsmToken NextToken[2];
3470 peekTokens(NextToken);
3472 return isOperandModifier(Tok, NextToken[0]) ||
3473 (Tok.
is(
AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3474 isOpcodeModifierWithVal(Tok, NextToken[0]);
3500AMDGPUAsmParser::parseSP3NegModifier() {
3502 AsmToken NextToken[2];
3503 peekTokens(NextToken);
3506 (isRegister(NextToken[0], NextToken[1]) ||
3508 isId(NextToken[0],
"abs"))) {
3517AMDGPUAsmParser::parseRegOrImmWithFPInputMods(
OperandVector &Operands,
3525 return Error(getLoc(),
"invalid syntax, expected 'neg' modifier");
3527 SP3Neg = parseSP3NegModifier();
3530 Neg = trySkipId(
"neg");
3532 return Error(Loc,
"expected register or immediate");
3536 Abs = trySkipId(
"abs");
3541 if (trySkipId(
"lit")) {
3542 Lit = LitModifier::Lit;
3545 }
else if (trySkipId(
"lit64")) {
3546 Lit = LitModifier::Lit64;
3549 if (!has64BitLiterals())
3550 return Error(Loc,
"lit64 is not supported on this GPU");
3556 return Error(Loc,
"expected register or immediate");
3560 Res = parseRegOrImm(Operands, SP3Abs,
Lit);
3562 Res = parseReg(Operands);
3565 return (SP3Neg || Neg || SP3Abs || Abs ||
Lit != LitModifier::None)
3569 if (
Lit != LitModifier::None && !Operands.
back()->isImm())
3570 Error(Loc,
"expected immediate with lit modifier");
3572 if (SP3Abs && !skipToken(
AsmToken::Pipe,
"expected vertical bar"))
3578 if (
Lit != LitModifier::None &&
3582 AMDGPUOperand::Modifiers Mods;
3583 Mods.Abs = Abs || SP3Abs;
3584 Mods.Neg = Neg || SP3Neg;
3587 if (Mods.hasFPModifiers() ||
Lit != LitModifier::None) {
3588 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*Operands.
back());
3590 return Error(
Op.getStartLoc(),
"expected an absolute expression");
3591 Op.setModifiers(Mods);
3597AMDGPUAsmParser::parseRegOrImmWithIntInputMods(
OperandVector &Operands,
3599 bool Sext = trySkipId(
"sext");
3600 if (Sext && !skipToken(
AsmToken::LParen,
"expected left paren after sext"))
3605 Res = parseRegOrImm(Operands);
3607 Res = parseReg(Operands);
3615 AMDGPUOperand::Modifiers Mods;
3618 if (Mods.hasIntModifiers()) {
3619 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*Operands.
back());
3621 return Error(
Op.getStartLoc(),
"expected an absolute expression");
3622 Op.setModifiers(Mods);
3628ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(
OperandVector &Operands) {
3629 return parseRegOrImmWithFPInputMods(Operands,
false);
3632ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(
OperandVector &Operands) {
3633 return parseRegOrImmWithIntInputMods(Operands,
false);
3636ParseStatus AMDGPUAsmParser::parseVReg32OrOff(
OperandVector &Operands) {
3637 auto Loc = getLoc();
3638 if (trySkipId(
"off")) {
3639 Operands.
push_back(AMDGPUOperand::CreateImm(
this, 0, Loc,
3640 AMDGPUOperand::ImmTyOff,
false));
3647 std::unique_ptr<AMDGPUOperand>
Reg = parseRegister();
3656unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3663 return Match_InvalidOperand;
3665 if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3666 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3669 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::dst_sel);
3671 if (!
Op.isImm() ||
Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3672 return Match_InvalidOperand;
3680 if (tryAnotherVOPDEncoding(Inst))
3681 return Match_InvalidOperand;
3683 return Match_Success;
3687 static const unsigned Variants[] = {
3697ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants()
const {
3698 if (isForcedDPP() && isForcedVOP3()) {
3702 if (getForcedEncodingSize() == 32) {
3707 if (isForcedVOP3()) {
3712 if (isForcedSDWA()) {
3718 if (isForcedDPP()) {
3726StringRef AMDGPUAsmParser::getMatchedVariantName()
const {
3727 if (isForcedDPP() && isForcedVOP3())
3730 if (getForcedEncodingSize() == 32)
3746AMDGPUAsmParser::findImplicitSGPRReadInVOP(
const MCInst &Inst)
const {
3750 case AMDGPU::FLAT_SCR:
3752 case AMDGPU::VCC_LO:
3753 case AMDGPU::VCC_HI:
3760 return MCRegister();
3767bool AMDGPUAsmParser::isInlineConstant(
const MCInst &Inst,
3768 unsigned OpIdx)
const {
3825unsigned AMDGPUAsmParser::getConstantBusLimit(
unsigned Opcode)
const {
3831 case AMDGPU::V_LSHLREV_B64_e64:
3832 case AMDGPU::V_LSHLREV_B64_gfx10:
3833 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3834 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3835 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3836 case AMDGPU::V_LSHRREV_B64_e64:
3837 case AMDGPU::V_LSHRREV_B64_gfx10:
3838 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3839 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3840 case AMDGPU::V_ASHRREV_I64_e64:
3841 case AMDGPU::V_ASHRREV_I64_gfx10:
3842 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3843 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3844 case AMDGPU::V_LSHL_B64_e64:
3845 case AMDGPU::V_LSHR_B64_e64:
3846 case AMDGPU::V_ASHR_I64_e64:
3859 bool AddMandatoryLiterals =
false) {
3862 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3866 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immX) : -1;
3868 return {getNamedOperandIdx(Opcode, OpName::src0X),
3869 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3870 getNamedOperandIdx(Opcode, OpName::vsrc2X),
3871 getNamedOperandIdx(Opcode, OpName::src0Y),
3872 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3873 getNamedOperandIdx(Opcode, OpName::vsrc2Y),
3878 return {getNamedOperandIdx(Opcode, OpName::src0),
3879 getNamedOperandIdx(Opcode, OpName::src1),
3880 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3883bool AMDGPUAsmParser::usesConstantBus(
const MCInst &Inst,
unsigned OpIdx) {
3886 return !isInlineConstant(Inst,
OpIdx);
3893 return isSGPR(PReg,
TRI) && PReg != SGPR_NULL;
3904 const unsigned Opcode = Inst.
getOpcode();
3905 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3908 if (!LaneSelOp.
isReg())
3911 return LaneSelReg ==
M0 || LaneSelReg == M0_gfxpre11;
3914bool AMDGPUAsmParser::validateConstantBusLimitations(
3916 const unsigned Opcode = Inst.
getOpcode();
3917 const MCInstrDesc &
Desc = MII.
get(Opcode);
3918 MCRegister LastSGPR;
3919 unsigned ConstantBusUseCount = 0;
3920 unsigned NumLiterals = 0;
3921 unsigned LiteralSize;
3923 if (!(
Desc.TSFlags &
3938 SmallDenseSet<MCRegister> SGPRsUsed;
3939 MCRegister SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3941 SGPRsUsed.
insert(SGPRUsed);
3942 ++ConstantBusUseCount;
3947 unsigned ConstantBusLimit = getConstantBusLimit(Opcode);
3949 for (
int OpIdx : OpIndices) {
3954 if (usesConstantBus(Inst,
OpIdx)) {
3963 if (SGPRsUsed.
insert(LastSGPR).second) {
3964 ++ConstantBusUseCount;
3984 if (NumLiterals == 0) {
3987 }
else if (LiteralSize !=
Size) {
3993 if (ConstantBusUseCount + NumLiterals > ConstantBusLimit) {
3995 "invalid operand (violates constant bus restrictions)");
4002std::optional<unsigned>
4003AMDGPUAsmParser::checkVOPDRegBankConstraints(
const MCInst &Inst,
bool AsVOPD3) {
4005 const unsigned Opcode = Inst.
getOpcode();
4011 auto getVRegIdx = [&](unsigned,
unsigned OperandIdx) {
4012 const MCOperand &Opr = Inst.
getOperand(OperandIdx);
4021 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1170 ||
4022 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
4023 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
4024 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx13 ||
4025 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250 ||
4026 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx13;
4030 for (
auto OpName : {OpName::src0X, OpName::src0Y}) {
4031 int I = getNamedOperandIdx(Opcode, OpName);
4035 int64_t
Imm =
Op.getImm();
4041 for (
auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
4042 OpName::vsrc2Y, OpName::imm}) {
4043 int I = getNamedOperandIdx(Opcode, OpName);
4053 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(
4054 getVRegIdx, *
TRI, SkipSrc, AllowSameVGPR, AsVOPD3);
4056 return InvalidCompOprIdx;
4059bool AMDGPUAsmParser::validateVOPD(
const MCInst &Inst,
4066 for (
const std::unique_ptr<MCParsedAsmOperand> &Operand : Operands) {
4067 AMDGPUOperand &
Op = (AMDGPUOperand &)*Operand;
4068 if ((
Op.isRegKind() ||
Op.isImmTy(AMDGPUOperand::ImmTyNone)) &&
4070 Error(
Op.getStartLoc(),
"ABS not allowed in VOPD3 instructions");
4074 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
4075 if (!InvalidCompOprIdx.has_value())
4078 auto CompOprIdx = *InvalidCompOprIdx;
4081 std::max(InstInfo[
VOPD::X].getIndexInParsedOperands(CompOprIdx),
4082 InstInfo[
VOPD::Y].getIndexInParsedOperands(CompOprIdx));
4083 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
4085 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
4086 if (CompOprIdx == VOPD::Component::DST) {
4088 Error(Loc,
"dst registers must be distinct");
4090 Error(Loc,
"one dst register must be even and the other odd");
4092 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
4093 Error(Loc, Twine(
"src") + Twine(CompSrcIdx) +
4094 " operands must use different VGPR banks");
4102bool AMDGPUAsmParser::tryVOPD3(
const MCInst &Inst) {
4104 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst,
false);
4105 if (!InvalidCompOprIdx.has_value())
4109 InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst,
true);
4110 if (InvalidCompOprIdx.has_value()) {
4115 if (*InvalidCompOprIdx == VOPD::Component::DST)
4128bool AMDGPUAsmParser::tryVOPD(
const MCInst &Inst) {
4129 const unsigned Opcode = Inst.
getOpcode();
4144 for (
auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
4145 OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
4146 OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
4147 int I = getNamedOperandIdx(Opcode, OpName);
4154 return !tryVOPD3(Inst);
4159bool AMDGPUAsmParser::tryAnotherVOPDEncoding(
const MCInst &Inst) {
4160 const unsigned Opcode = Inst.
getOpcode();
4165 return tryVOPD(Inst);
4166 return tryVOPD3(Inst);
4169bool AMDGPUAsmParser::validateIntClampSupported(
const MCInst &Inst) {
4175 int ClampIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::clamp);
4186bool AMDGPUAsmParser::validateMIMGDataSize(
const MCInst &Inst,
SMLoc IDLoc) {
4194 int VDataIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
4195 int DMaskIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dmask);
4196 int TFEIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::tfe);
4204 unsigned VDataSize = getRegOperandSize(
Desc, VDataIdx);
4205 unsigned TFESize = (TFEIdx != -1 && Inst.
getOperand(TFEIdx).
getImm()) ? 1 : 0;
4210 bool IsPackedD16 =
false;
4214 int D16Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::d16);
4215 IsPackedD16 = D16Idx >= 0;
4220 if ((VDataSize / 4) ==
DataSize + TFESize)
4225 Modifiers = IsPackedD16 ?
"dmask and d16" :
"dmask";
4227 Modifiers = IsPackedD16 ?
"dmask, d16 and tfe" :
"dmask and tfe";
4229 Error(IDLoc,
Twine(
"image data size does not match ") + Modifiers);
4233bool AMDGPUAsmParser::validateMIMGAddrSize(
const MCInst &Inst, SMLoc IDLoc) {
4242 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4244 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
4246 ? AMDGPU::OpName::srsrc
4247 : AMDGPU::OpName::rsrc;
4248 int SrsrcIdx = AMDGPU::getNamedOperandIdx(
Opc, RSrcOpName);
4249 int DimIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dim);
4250 int A16Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::a16);
4254 assert(SrsrcIdx > VAddr0Idx);
4257 if (BaseOpcode->
BVH) {
4258 if (IsA16 == BaseOpcode->
A16)
4260 Error(IDLoc,
"image address size does not match a16");
4266 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
4267 unsigned ActualAddrSize =
4268 IsNSA ? SrsrcIdx - VAddr0Idx : getRegOperandSize(
Desc, VAddr0Idx) / 4;
4270 unsigned ExpectedAddrSize =
4274 if (hasPartialNSAEncoding() &&
4277 int VAddrLastIdx = SrsrcIdx - 1;
4278 unsigned VAddrLastSize = getRegOperandSize(
Desc, VAddrLastIdx) / 4;
4280 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
4283 if (ExpectedAddrSize > 12)
4284 ExpectedAddrSize = 16;
4289 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
4293 if (ActualAddrSize == ExpectedAddrSize)
4296 Error(IDLoc,
"image address size does not match dim and a16");
4300bool AMDGPUAsmParser::validateMIMGAtomicDMask(
const MCInst &Inst) {
4307 if (!
Desc.mayLoad() || !
Desc.mayStore())
4310 int DMaskIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dmask);
4317 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4320bool AMDGPUAsmParser::validateMIMGGatherDMask(
const MCInst &Inst) {
4328 int DMaskIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dmask);
4336 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4339bool AMDGPUAsmParser::validateMIMGDim(
const MCInst &Inst,
4354 for (
unsigned i = 1, e = Operands.
size(); i != e; ++i) {
4355 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
4362bool AMDGPUAsmParser::validateMIMGMSAA(
const MCInst &Inst) {
4370 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4373 if (!BaseOpcode->
MSAA)
4376 int DimIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dim);
4382 return DimInfo->
MSAA;
4388 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4389 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4390 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4400bool AMDGPUAsmParser::validateMovrels(
const MCInst &Inst,
4409 const int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
4412 const MCOperand &Src0 = Inst.
getOperand(Src0Idx);
4420 Error(getOperandLoc(Operands, Src0Idx),
"source operand must be a VGPR");
4424bool AMDGPUAsmParser::validateMAIAccWrite(
const MCInst &Inst,
4429 if (
Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4432 const int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
4435 const MCOperand &Src0 = Inst.
getOperand(Src0Idx);
4442 Error(getOperandLoc(Operands, Src0Idx),
4443 "source operand must be either a VGPR or an inline constant");
4450bool AMDGPUAsmParser::validateMAISrc2(
const MCInst &Inst,
4453 const MCInstrDesc &
Desc = MII.
get(Opcode);
4456 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4459 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4463 if (Inst.
getOperand(Src2Idx).
isImm() && isInlineConstant(Inst, Src2Idx)) {
4464 Error(getOperandLoc(Operands, Src2Idx),
4465 "inline constants are not allowed for this operand");
4472bool AMDGPUAsmParser::validateMFMA(
const MCInst &Inst,
4480 int BlgpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::blgp);
4481 if (BlgpIdx != -1) {
4482 if (
const MFMA_F8F6F4_Info *Info = AMDGPU::isMFMA_F8F6F4(
Opc)) {
4483 int CbszIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::cbsz);
4493 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
4494 Error(getOperandLoc(Operands, Src0Idx),
4495 "wrong register tuple size for cbsz value " + Twine(CBSZ));
4500 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
4501 Error(getOperandLoc(Operands, Src1Idx),
4502 "wrong register tuple size for blgp value " + Twine(BLGP));
4510 const int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
4514 const MCOperand &Src2 = Inst.
getOperand(Src2Idx);
4518 MCRegister Src2Reg = Src2.
getReg();
4520 if (Src2Reg == DstReg)
4525 .getSizeInBits() <= 128)
4528 if (
TRI->regsOverlap(Src2Reg, DstReg)) {
4529 Error(getOperandLoc(Operands, Src2Idx),
4530 "source 2 operand must not partially overlap with dst");
4537bool AMDGPUAsmParser::validateDivScale(
const MCInst &Inst) {
4541 case V_DIV_SCALE_F32_gfx6_gfx7:
4542 case V_DIV_SCALE_F32_vi:
4543 case V_DIV_SCALE_F32_gfx10:
4544 case V_DIV_SCALE_F64_gfx6_gfx7:
4545 case V_DIV_SCALE_F64_vi:
4546 case V_DIV_SCALE_F64_gfx10:
4552 for (
auto Name : {AMDGPU::OpName::src0_modifiers,
4553 AMDGPU::OpName::src2_modifiers,
4554 AMDGPU::OpName::src2_modifiers}) {
4565bool AMDGPUAsmParser::validateMIMGD16(
const MCInst &Inst) {
4573 int D16Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::d16);
4582bool AMDGPUAsmParser::validateTensorR128(
const MCInst &Inst) {
4589 int R128Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::r128);
4597 case AMDGPU::V_SUBREV_F32_e32:
4598 case AMDGPU::V_SUBREV_F32_e64:
4599 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4600 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4601 case AMDGPU::V_SUBREV_F32_e32_vi:
4602 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4603 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4604 case AMDGPU::V_SUBREV_F32_e64_vi:
4606 case AMDGPU::V_SUBREV_CO_U32_e32:
4607 case AMDGPU::V_SUBREV_CO_U32_e64:
4608 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4609 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4611 case AMDGPU::V_SUBBREV_U32_e32:
4612 case AMDGPU::V_SUBBREV_U32_e64:
4613 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4614 case AMDGPU::V_SUBBREV_U32_e32_vi:
4615 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4616 case AMDGPU::V_SUBBREV_U32_e64_vi:
4618 case AMDGPU::V_SUBREV_U32_e32:
4619 case AMDGPU::V_SUBREV_U32_e64:
4620 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4621 case AMDGPU::V_SUBREV_U32_e32_vi:
4622 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4623 case AMDGPU::V_SUBREV_U32_e64_vi:
4625 case AMDGPU::V_SUBREV_F16_e32:
4626 case AMDGPU::V_SUBREV_F16_e64:
4627 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4628 case AMDGPU::V_SUBREV_F16_e32_vi:
4629 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4630 case AMDGPU::V_SUBREV_F16_e64_vi:
4632 case AMDGPU::V_SUBREV_U16_e32:
4633 case AMDGPU::V_SUBREV_U16_e64:
4634 case AMDGPU::V_SUBREV_U16_e32_vi:
4635 case AMDGPU::V_SUBREV_U16_e64_vi:
4637 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4638 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4639 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4641 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4642 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4644 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4645 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4647 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4648 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4650 case AMDGPU::V_LSHRREV_B32_e32:
4651 case AMDGPU::V_LSHRREV_B32_e64:
4652 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4653 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4654 case AMDGPU::V_LSHRREV_B32_e32_vi:
4655 case AMDGPU::V_LSHRREV_B32_e64_vi:
4656 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4657 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4659 case AMDGPU::V_ASHRREV_I32_e32:
4660 case AMDGPU::V_ASHRREV_I32_e64:
4661 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4662 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4663 case AMDGPU::V_ASHRREV_I32_e32_vi:
4664 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4665 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4666 case AMDGPU::V_ASHRREV_I32_e64_vi:
4668 case AMDGPU::V_LSHLREV_B32_e32:
4669 case AMDGPU::V_LSHLREV_B32_e64:
4670 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4671 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4672 case AMDGPU::V_LSHLREV_B32_e32_vi:
4673 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4674 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4675 case AMDGPU::V_LSHLREV_B32_e64_vi:
4677 case AMDGPU::V_LSHLREV_B16_e32:
4678 case AMDGPU::V_LSHLREV_B16_e64:
4679 case AMDGPU::V_LSHLREV_B16_e32_vi:
4680 case AMDGPU::V_LSHLREV_B16_e64_vi:
4681 case AMDGPU::V_LSHLREV_B16_gfx10:
4683 case AMDGPU::V_LSHRREV_B16_e32:
4684 case AMDGPU::V_LSHRREV_B16_e64:
4685 case AMDGPU::V_LSHRREV_B16_e32_vi:
4686 case AMDGPU::V_LSHRREV_B16_e64_vi:
4687 case AMDGPU::V_LSHRREV_B16_gfx10:
4689 case AMDGPU::V_ASHRREV_I16_e32:
4690 case AMDGPU::V_ASHRREV_I16_e64:
4691 case AMDGPU::V_ASHRREV_I16_e32_vi:
4692 case AMDGPU::V_ASHRREV_I16_e64_vi:
4693 case AMDGPU::V_ASHRREV_I16_gfx10:
4695 case AMDGPU::V_LSHLREV_B64_e64:
4696 case AMDGPU::V_LSHLREV_B64_gfx10:
4697 case AMDGPU::V_LSHLREV_B64_vi:
4699 case AMDGPU::V_LSHRREV_B64_e64:
4700 case AMDGPU::V_LSHRREV_B64_gfx10:
4701 case AMDGPU::V_LSHRREV_B64_vi:
4703 case AMDGPU::V_ASHRREV_I64_e64:
4704 case AMDGPU::V_ASHRREV_I64_gfx10:
4705 case AMDGPU::V_ASHRREV_I64_vi:
4707 case AMDGPU::V_PK_LSHLREV_B16:
4708 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4709 case AMDGPU::V_PK_LSHLREV_B16_vi:
4711 case AMDGPU::V_PK_LSHRREV_B16:
4712 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4713 case AMDGPU::V_PK_LSHRREV_B16_vi:
4714 case AMDGPU::V_PK_ASHRREV_I16:
4715 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4716 case AMDGPU::V_PK_ASHRREV_I16_vi:
4723bool AMDGPUAsmParser::validateLdsDirect(
const MCInst &Inst,
4725 using namespace SIInstrFlags;
4726 const unsigned Opcode = Inst.
getOpcode();
4727 const MCInstrDesc &
Desc = MII.
get(Opcode);
4732 if ((
Desc.TSFlags & Enc) == 0)
4735 for (
auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4736 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4740 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4743 Error(getOperandLoc(Operands, SrcIdx),
4744 "lds_direct is not supported on this GPU");
4749 Error(getOperandLoc(Operands, SrcIdx),
4750 "lds_direct cannot be used with this instruction");
4754 if (SrcName != OpName::src0) {
4755 Error(getOperandLoc(Operands, SrcIdx),
4756 "lds_direct may be used as src0 only");
4765SMLoc AMDGPUAsmParser::getFlatOffsetLoc(
const OperandVector &Operands)
const {
4766 for (
unsigned i = 1, e = Operands.
size(); i != e; ++i) {
4767 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
4768 if (
Op.isFlatOffset())
4769 return Op.getStartLoc();
4774bool AMDGPUAsmParser::validateOffset(
const MCInst &Inst,
4777 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4783 return validateFlatOffset(Inst, Operands);
4786 return validateSMEMOffset(Inst, Operands);
4792 const unsigned OffsetSize = 24;
4793 if (!
isUIntN(OffsetSize - 1,
Op.getImm())) {
4794 Error(getFlatOffsetLoc(Operands),
4795 Twine(
"expected a ") + Twine(OffsetSize - 1) +
4796 "-bit unsigned offset for buffer ops");
4800 const unsigned OffsetSize = 16;
4801 if (!
isUIntN(OffsetSize,
Op.getImm())) {
4802 Error(getFlatOffsetLoc(Operands),
4803 Twine(
"expected a ") + Twine(OffsetSize) +
"-bit unsigned offset");
4810bool AMDGPUAsmParser::validateFlatOffset(
const MCInst &Inst,
4817 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4821 if (!hasFlatOffsets() &&
Op.getImm() != 0) {
4822 Error(getFlatOffsetLoc(Operands),
4823 "flat offset modifier is not supported on this GPU");
4830 bool AllowNegative =
4833 if (!
isIntN(OffsetSize,
Op.getImm()) || (!AllowNegative &&
Op.getImm() < 0)) {
4834 Error(getFlatOffsetLoc(Operands),
4835 Twine(
"expected a ") +
4836 (AllowNegative ? Twine(OffsetSize) +
"-bit signed offset"
4837 : Twine(OffsetSize - 1) +
"-bit unsigned offset"));
4844SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(
const OperandVector &Operands)
const {
4846 for (
unsigned i = 2, e = Operands.
size(); i != e; ++i) {
4847 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
4848 if (
Op.isSMEMOffset() ||
Op.isSMEMOffsetMod())
4849 return Op.getStartLoc();
4854bool AMDGPUAsmParser::validateSMEMOffset(
const MCInst &Inst,
4864 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4878 Error(getSMEMOffsetLoc(Operands),
4880 ?
"expected a 23-bit unsigned offset for buffer ops"
4881 :
isGFX12Plus() ?
"expected a 24-bit signed offset"
4882 : (
isVI() || IsBuffer) ?
"expected a 20-bit unsigned offset"
4883 :
"expected a 21-bit signed offset");
4888bool AMDGPUAsmParser::validateSOPLiteral(
const MCInst &Inst,
4891 const MCInstrDesc &
Desc = MII.
get(Opcode);
4895 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4896 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4898 const int OpIndices[] = { Src0Idx, Src1Idx };
4900 unsigned NumExprs = 0;
4901 unsigned NumLiterals = 0;
4904 for (
int OpIdx : OpIndices) {
4905 if (
OpIdx == -1)
break;
4911 std::optional<int64_t>
Imm;
4914 }
else if (MO.
isExpr()) {
4923 if (!
Imm.has_value()) {
4925 }
else if (!isInlineConstant(Inst,
OpIdx)) {
4929 if (NumLiterals == 0 || LiteralValue !=
Value) {
4937 if (NumLiterals + NumExprs <= 1)
4940 Error(getOperandLoc(Operands, Src1Idx),
4941 "only one unique literal operand is allowed");
4945bool AMDGPUAsmParser::validateOpSel(
const MCInst &Inst) {
4948 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
4958 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
4959 if (OpSelIdx != -1) {
4963 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel_hi);
4964 if (OpSelHiIdx != -1) {
4973 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
4983 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
4984 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
4985 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
4986 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel_hi);
4988 const MCOperand &Src0 = Inst.
getOperand(Src0Idx);
4989 const MCOperand &Src1 = Inst.
getOperand(Src1Idx);
4995 auto VerifyOneSGPR = [
OpSel, OpSelHi](
unsigned Index) ->
bool {
4997 return ((OpSel & Mask) == 0) && ((OpSelHi &
Mask) == 0);
5007 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
5008 if (Src2Idx != -1) {
5009 const MCOperand &Src2 = Inst.
getOperand(Src2Idx);
5019bool AMDGPUAsmParser::validateTrue16OpSel(
const MCInst &Inst) {
5020 if (!hasTrue16Insts())
5022 const MCRegisterInfo *MRI = getMRI();
5024 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
5030 if (OpSelOpValue == 0)
5032 unsigned OpCount = 0;
5033 for (AMDGPU::OpName OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
5034 AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) {
5035 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), OpName);
5042 bool OpSelOpIsHi = ((OpSelOpValue & (1 << OpCount)) != 0);
5043 if (OpSelOpIsHi != VGPRSuffixIsHi)
5052bool AMDGPUAsmParser::validateNeg(
const MCInst &Inst, AMDGPU::OpName OpName) {
5053 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
5066 int NegIdx = AMDGPU::getNamedOperandIdx(
Opc, OpName);
5077 const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
5078 AMDGPU::OpName::src1_modifiers,
5079 AMDGPU::OpName::src2_modifiers};
5081 for (
unsigned i = 0; i < 3; ++i) {
5091bool AMDGPUAsmParser::validateDPP(
const MCInst &Inst,
5094 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dpp_ctrl);
5095 if (DppCtrlIdx >= 0) {
5102 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
5103 Error(S,
isGFX12() ?
"DP ALU dpp only supports row_share"
5104 :
"DP ALU dpp only supports row_newbcast");
5109 int Dpp8Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dpp8);
5110 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
5113 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
5115 const MCOperand &Src1 = Inst.
getOperand(Src1Idx);
5118 Error(getOperandLoc(Operands, Src1Idx),
5119 "invalid operand for instruction");
5123 Error(getInstLoc(Operands),
5124 "src1 immediate operand invalid for instruction");
5134bool AMDGPUAsmParser::validateVccOperand(MCRegister
Reg)
const {
5135 return (
Reg == AMDGPU::VCC && isWave64()) ||
5136 (
Reg == AMDGPU::VCC_LO && isWave32());
5140bool AMDGPUAsmParser::validateVOPLiteral(
const MCInst &Inst,
5143 const MCInstrDesc &
Desc = MII.
get(Opcode);
5144 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
5146 !HasMandatoryLiteral && !
isVOPD(Opcode))
5151 std::optional<unsigned> LiteralOpIdx;
5154 for (
int OpIdx : OpIndices) {
5164 std::optional<int64_t>
Imm;
5170 bool IsAnotherLiteral =
false;
5171 bool IsForcedLit = findMCOperand(Operands,
OpIdx).isForcedLit();
5172 bool IsForcedLit64 = findMCOperand(Operands,
OpIdx).isForcedLit64();
5173 if (!
Imm.has_value()) {
5175 IsAnotherLiteral =
true;
5176 }
else if (IsForcedLit || IsForcedLit64 || !isInlineConstant(Inst,
OpIdx)) {
5181 HasMandatoryLiteral);
5189 (IsForcedLit64 && !HasMandatoryLiteral)) &&
5190 (!has64BitLiterals() ||
Desc.getSize() != 4)) {
5192 "invalid operand for instruction");
5197 if (!IsForcedFP64 && (IsForcedLit64 || !IsValid32Op) &&
5198 OpIdx != getNamedOperandIdx(Opcode, OpName::src0)) {
5200 "invalid operand for instruction");
5204 if (IsFP64 && IsValid32Op && !IsForcedFP64)
5211 if (IsAnotherLiteral && !HasMandatoryLiteral &&
5212 !getFeatureBits()[FeatureVOP3Literal]) {
5214 "literal operands are not supported");
5218 if (LiteralOpIdx && IsAnotherLiteral) {
5219 Error(getLaterLoc(getOperandLoc(Operands,
OpIdx),
5220 getOperandLoc(Operands, *LiteralOpIdx)),
5221 "only one unique literal operand is allowed");
5225 if (IsAnotherLiteral)
5226 LiteralOpIdx =
OpIdx;
5249bool AMDGPUAsmParser::validateAGPRLdSt(
const MCInst &Inst)
const {
5257 ? AMDGPU::OpName::data0
5258 : AMDGPU::OpName::vdata;
5260 const MCRegisterInfo *MRI = getMRI();
5261 int DstAreg =
IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
5265 int Data2Areg =
IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
5266 if (Data2Areg >= 0 && Data2Areg != DataAreg)
5270 auto FB = getFeatureBits();
5271 if (FB[AMDGPU::FeatureGFX90AInsts]) {
5272 if (DataAreg < 0 || DstAreg < 0)
5274 return DstAreg == DataAreg;
5277 return DstAreg < 1 && DataAreg < 1;
5280bool AMDGPUAsmParser::validateVGPRAlign(
const MCInst &Inst)
const {
5281 auto FB = getFeatureBits();
5282 if (!FB[AMDGPU::FeatureRequiresAlignedVGPRs])
5286 const MCRegisterInfo *MRI = getMRI();
5289 if (FB[AMDGPU::FeatureGFX90AInsts] &&
Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
5292 if (FB[AMDGPU::FeatureGFX1250Insts]) {
5296 case AMDGPU::DS_LOAD_TR6_B96:
5297 case AMDGPU::DS_LOAD_TR6_B96_gfx12:
5301 case AMDGPU::GLOBAL_LOAD_TR6_B96:
5302 case AMDGPU::GLOBAL_LOAD_TR6_B96_gfx1250: {
5306 int VAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
5307 if (VAddrIdx != -1) {
5310 if ((
Sub - AMDGPU::VGPR0) & 1)
5315 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR:
5316 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR_gfx1250:
5321 const MCRegisterClass &VGPR32 = MRI->
getRegClass(AMDGPU::VGPR_32RegClassID);
5322 const MCRegisterClass &AGPR32 = MRI->
getRegClass(AMDGPU::AGPR_32RegClassID);
5341SMLoc AMDGPUAsmParser::getBLGPLoc(
const OperandVector &Operands)
const {
5342 for (
unsigned i = 1, e = Operands.
size(); i != e; ++i) {
5343 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
5345 return Op.getStartLoc();
5350bool AMDGPUAsmParser::validateBLGP(
const MCInst &Inst,
5353 int BlgpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::blgp);
5356 SMLoc BLGPLoc = getBLGPLoc(Operands);
5359 bool IsNeg = StringRef(BLGPLoc.
getPointer()).starts_with(
"neg:");
5360 auto FB = getFeatureBits();
5361 bool UsesNeg =
false;
5362 if (FB[AMDGPU::FeatureGFX940Insts]) {
5364 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
5365 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
5366 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
5367 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
5372 if (IsNeg == UsesNeg)
5376 UsesNeg ?
"invalid modifier: blgp is not supported"
5377 :
"invalid modifier: neg is not supported");
5382bool AMDGPUAsmParser::validateWaitCnt(
const MCInst &Inst,
5388 if (
Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
5389 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
5390 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
5391 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
5394 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::sdst);
5397 if (
Reg == AMDGPU::SGPR_NULL)
5400 Error(getOperandLoc(Operands, Src0Idx),
"src0 must be null");
5404bool AMDGPUAsmParser::validateDS(
const MCInst &Inst,
5410 return validateGWS(Inst, Operands);
5415 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::gds);
5420 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
5421 Error(S,
"gds modifier is not supported on this GPU");
5429bool AMDGPUAsmParser::validateGWS(
const MCInst &Inst,
5431 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
5435 if (
Opc != AMDGPU::DS_GWS_INIT_vi &&
Opc != AMDGPU::DS_GWS_BARRIER_vi &&
5436 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
5439 const MCRegisterInfo *MRI = getMRI();
5440 const MCRegisterClass &VGPR32 = MRI->
getRegClass(AMDGPU::VGPR_32RegClassID);
5442 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::data0);
5445 auto RegIdx =
Reg - (VGPR32.
contains(
Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
5447 Error(getOperandLoc(Operands, Data0Pos),
"vgpr must be even aligned");
5454bool AMDGPUAsmParser::validateCoherencyBits(
const MCInst &Inst,
5457 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.
getOpcode(),
5458 AMDGPU::OpName::cpol);
5466 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5469 Error(S,
"scale_offset is not supported on this GPU");
5472 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5475 Error(S,
"nv is not supported on this GPU");
5480 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5483 Error(S,
"scale_offset is not supported for this instruction");
5487 return validateTHAndScopeBits(Inst, Operands, CPol);
5492 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5493 Error(S,
"cache policy is not supported for SMRD instructions");
5497 Error(IDLoc,
"invalid cache policy for SMEM instruction");
5506 if (!(TSFlags & AllowSCCModifier)) {
5507 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5511 "scc modifier is not supported for this instruction on this GPU");
5522 :
"instruction must use glc");
5527 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5530 &CStr.data()[CStr.find(
isGFX940() ?
"sc0" :
"glc")]);
5532 :
"instruction must not use glc");
5540bool AMDGPUAsmParser::validateTHAndScopeBits(
const MCInst &Inst,
5542 const unsigned CPol) {
5546 const unsigned Opcode = Inst.
getOpcode();
5547 const MCInstrDesc &TID = MII.
get(Opcode);
5550 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5557 return PrintError(
"th:TH_ATOMIC_RETURN requires a destination operand");
5562 return PrintError(
"instruction must use th:TH_ATOMIC_RETURN");
5570 return PrintError(
"invalid th value for SMEM instruction");
5577 return PrintError(
"scope and th combination is not valid");
5583 return PrintError(
"invalid th value for atomic instructions");
5586 return PrintError(
"invalid th value for store instructions");
5589 return PrintError(
"invalid th value for load instructions");
5595bool AMDGPUAsmParser::validateTFE(
const MCInst &Inst,
5598 if (
Desc.mayStore() &&
5600 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5601 if (Loc != getInstLoc(Operands)) {
5602 Error(Loc,
"TFE modifier has no meaning for store instructions");
5610bool AMDGPUAsmParser::validateWMMA(
const MCInst &Inst,
5616 auto validateFmt = [&](AMDGPU::OpName FmtOp, AMDGPU::OpName SrcOp) ->
bool {
5617 int FmtIdx = AMDGPU::getNamedOperandIdx(
Opc, FmtOp);
5621 int SrcIdx = AMDGPU::getNamedOperandIdx(
Opc, SrcOp);
5629 Error(getOperandLoc(Operands, SrcIdx),
5630 "wrong register tuple size for " +
5635 return validateFmt(AMDGPU::OpName::matrix_a_fmt, AMDGPU::OpName::src0) &&
5636 validateFmt(AMDGPU::OpName::matrix_b_fmt, AMDGPU::OpName::src1);
5639bool AMDGPUAsmParser::validateInstruction(
const MCInst &Inst, SMLoc IDLoc,
5641 if (!validateLdsDirect(Inst, Operands))
5643 if (!validateTrue16OpSel(Inst)) {
5644 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5645 "op_sel operand conflicts with 16-bit operand suffix");
5648 if (!validateSOPLiteral(Inst, Operands))
5650 if (!validateVOPLiteral(Inst, Operands)) {
5653 if (!validateConstantBusLimitations(Inst, Operands)) {
5656 if (!validateVOPD(Inst, Operands)) {
5659 if (!validateIntClampSupported(Inst)) {
5660 Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands),
5661 "integer clamping is not supported on this GPU");
5664 if (!validateOpSel(Inst)) {
5665 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5666 "invalid op_sel operand");
5669 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5670 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5671 "invalid neg_lo operand");
5674 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5675 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5676 "invalid neg_hi operand");
5679 if (!validateDPP(Inst, Operands)) {
5683 if (!validateMIMGD16(Inst)) {
5684 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5685 "d16 modifier is not supported on this GPU");
5688 if (!validateMIMGDim(Inst, Operands)) {
5689 Error(IDLoc,
"missing dim operand");
5692 if (!validateTensorR128(Inst)) {
5693 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5694 "instruction must set modifier r128=0");
5697 if (!validateMIMGMSAA(Inst)) {
5698 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5699 "invalid dim; must be MSAA type");
5702 if (!validateMIMGDataSize(Inst, IDLoc)) {
5705 if (!validateMIMGAddrSize(Inst, IDLoc))
5707 if (!validateMIMGAtomicDMask(Inst)) {
5708 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5709 "invalid atomic image dmask");
5712 if (!validateMIMGGatherDMask(Inst)) {
5713 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5714 "invalid image_gather dmask: only one bit must be set");
5717 if (!validateMovrels(Inst, Operands)) {
5720 if (!validateOffset(Inst, Operands)) {
5723 if (!validateMAIAccWrite(Inst, Operands)) {
5726 if (!validateMAISrc2(Inst, Operands)) {
5729 if (!validateMFMA(Inst, Operands)) {
5732 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5736 if (!validateAGPRLdSt(Inst)) {
5737 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5738 ?
"invalid register class: data and dst should be all VGPR or AGPR"
5739 :
"invalid register class: agpr loads and stores not supported on this GPU"
5743 if (!validateVGPRAlign(Inst)) {
5745 "invalid register class: vgpr tuples must be 64 bit aligned");
5748 if (!validateDS(Inst, Operands)) {
5752 if (!validateBLGP(Inst, Operands)) {
5756 if (!validateDivScale(Inst)) {
5757 Error(IDLoc,
"ABS not allowed in VOP3B instructions");
5760 if (!validateWaitCnt(Inst, Operands)) {
5763 if (!validateTFE(Inst, Operands)) {
5766 if (!validateWMMA(Inst, Operands)) {
5775 unsigned VariantID = 0);
5779 unsigned VariantID);
5781bool AMDGPUAsmParser::isSupportedMnemo(
StringRef Mnemo,
5786bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5787 const FeatureBitset &FBS,
5788 ArrayRef<unsigned> Variants) {
5789 for (
auto Variant : Variants) {
5797bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5799 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5802 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5807 getParser().clearPendingErrors();
5811 StringRef VariantName = getMatchedVariantName();
5812 if (!VariantName.
empty() && isSupportedMnemo(Mnemo, FBS)) {
5815 " variant of this instruction is not supported"));
5819 if (
isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5820 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5822 FeatureBitset FeaturesWS32 = getFeatureBits();
5823 FeaturesWS32.
flip(AMDGPU::FeatureWavefrontSize64)
5824 .
flip(AMDGPU::FeatureWavefrontSize32);
5825 FeatureBitset AvailableFeaturesWS32 =
5826 ComputeAvailableFeatures(FeaturesWS32);
5828 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5829 return Error(IDLoc,
"instruction requires wavesize=32");
5833 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5834 return Error(IDLoc,
"instruction not supported on this GPU (" +
5835 getSTI().
getCPU() +
")" +
": " + Mnemo);
5840 return Error(IDLoc,
"invalid instruction" + Suggestion);
5846 const auto &
Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5847 if (
Op.isToken() && InvalidOprIdx > 1) {
5848 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5849 return PrevOp.isToken() && PrevOp.getToken() ==
"::";
5854bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc,
unsigned &Opcode,
5857 uint64_t &ErrorInfo,
5858 bool MatchingInlineAsm) {
5861 unsigned Result = Match_Success;
5862 for (
auto Variant : getMatchedVariants()) {
5864 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5869 if (R == Match_Success || R == Match_MissingFeature ||
5870 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5871 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5872 Result != Match_MissingFeature)) {
5876 if (R == Match_Success)
5880 if (Result == Match_Success) {
5881 if (!validateInstruction(Inst, IDLoc, Operands)) {
5888 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).
getToken();
5889 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5895 case Match_MissingFeature:
5899 return Error(IDLoc,
"operands are not valid for this GPU or mode");
5901 case Match_InvalidOperand: {
5902 SMLoc ErrorLoc = IDLoc;
5903 if (ErrorInfo != ~0ULL) {
5904 if (ErrorInfo >= Operands.
size()) {
5905 return Error(IDLoc,
"too few operands for instruction");
5907 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5908 if (ErrorLoc == SMLoc())
5912 return Error(ErrorLoc,
"invalid VOPDY instruction");
5914 return Error(ErrorLoc,
"invalid operand for instruction");
5917 case Match_MnemonicFail:
5923bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5928 if (getParser().parseAbsoluteExpression(Tmp)) {
5931 Ret =
static_cast<uint32_t
>(Tmp);
5935bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5936 if (!getSTI().getTargetTriple().isAMDGCN())
5937 return TokError(
"directive only supported for amdgcn architecture");
5939 std::string TargetIDDirective;
5940 SMLoc TargetStart = getTok().getLoc();
5941 if (getParser().parseEscapedString(TargetIDDirective))
5944 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5945 if (getTargetStreamer().getTargetID()->
toString() != TargetIDDirective)
5946 return getParser().Error(TargetRange.
Start,
5947 (Twine(
".amdgcn_target directive's target id ") +
5948 Twine(TargetIDDirective) +
5949 Twine(
" does not match the specified target id ") +
5950 Twine(getTargetStreamer().getTargetID()->
toString())).str());
5955bool AMDGPUAsmParser::OutOfRangeError(SMRange
Range) {
5959bool AMDGPUAsmParser::calculateGPRBlocks(
5960 const FeatureBitset &Features,
const MCExpr *VCCUsed,
5961 const MCExpr *FlatScrUsed,
bool XNACKUsed,
5962 std::optional<bool> EnableWavefrontSize32,
const MCExpr *NextFreeVGPR,
5963 SMRange VGPRRange,
const MCExpr *NextFreeSGPR, SMRange SGPRRange,
5964 const MCExpr *&VGPRBlocks,
const MCExpr *&SGPRBlocks) {
5970 const MCExpr *
NumSGPRs = NextFreeSGPR;
5971 int64_t EvaluatedSGPRs;
5978 if (
NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
Version.Major >= 8 &&
5979 !Features.
test(FeatureSGPRInitBug) &&
5980 static_cast<uint64_t
>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5981 return OutOfRangeError(SGPRRange);
5983 const MCExpr *ExtraSGPRs =
5987 if (
NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
5988 (
Version.Major <= 7 || Features.
test(FeatureSGPRInitBug)) &&
5989 static_cast<uint64_t
>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5990 return OutOfRangeError(SGPRRange);
5992 if (Features.
test(FeatureSGPRInitBug))
5999 auto GetNumGPRBlocks = [&Ctx](
const MCExpr *NumGPR,
6000 unsigned Granule) ->
const MCExpr * {
6004 const MCExpr *AlignToGPR =
6006 const MCExpr *DivGPR =
6012 VGPRBlocks = GetNumGPRBlocks(
6021bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
6022 if (!getSTI().getTargetTriple().isAMDGCN())
6023 return TokError(
"directive only supported for amdgcn architecture");
6026 return TokError(
"directive only supported for amdhsa OS");
6028 StringRef KernelName;
6029 if (getParser().parseIdentifier(KernelName))
6032 AMDGPU::MCKernelDescriptor KD =
6044 const MCExpr *NextFreeVGPR = ZeroExpr;
6046 const MCExpr *NamedBarCnt = ZeroExpr;
6047 uint64_t SharedVGPRCount = 0;
6048 uint64_t PreloadLength = 0;
6049 uint64_t PreloadOffset = 0;
6051 const MCExpr *NextFreeSGPR = ZeroExpr;
6054 unsigned ImpliedUserSGPRCount = 0;
6058 std::optional<unsigned> ExplicitUserSGPRCount;
6059 const MCExpr *ReserveVCC = OneExpr;
6060 const MCExpr *ReserveFlatScr = OneExpr;
6061 std::optional<bool> EnableWavefrontSize32;
6067 SMRange IDRange = getTok().getLocRange();
6068 if (!parseId(
ID,
"expected .amdhsa_ directive or .end_amdhsa_kernel"))
6071 if (
ID ==
".end_amdhsa_kernel")
6075 return TokError(
".amdhsa_ directives cannot be repeated");
6077 SMLoc ValStart = getLoc();
6078 const MCExpr *ExprVal;
6079 if (getParser().parseExpression(ExprVal))
6081 SMLoc ValEnd = getLoc();
6082 SMRange ValRange = SMRange(ValStart, ValEnd);
6085 uint64_t Val = IVal;
6086 bool EvaluatableExpr;
6087 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
6089 return OutOfRangeError(ValRange);
6093#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
6094 if (!isUInt<ENTRY##_WIDTH>(Val)) \
6095 return OutOfRangeError(RANGE); \
6096 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
6101#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
6103 return Error(IDRange.Start, "directive should have resolvable expression", \
6106 if (
ID ==
".amdhsa_group_segment_fixed_size") {
6109 return OutOfRangeError(ValRange);
6111 }
else if (
ID ==
".amdhsa_private_segment_fixed_size") {
6114 return OutOfRangeError(ValRange);
6116 }
else if (
ID ==
".amdhsa_kernarg_size") {
6118 return OutOfRangeError(ValRange);
6120 }
else if (
ID ==
".amdhsa_user_sgpr_count") {
6122 ExplicitUserSGPRCount = Val;
6123 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_buffer") {
6127 "directive is not supported with architected flat scratch",
6130 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
6133 ImpliedUserSGPRCount += 4;
6134 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_preload_length") {
6137 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
6140 return OutOfRangeError(ValRange);
6144 ImpliedUserSGPRCount += Val;
6145 PreloadLength = Val;
6147 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_preload_offset") {
6150 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
6153 return OutOfRangeError(ValRange);
6157 PreloadOffset = Val;
6158 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_ptr") {
6161 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
6164 ImpliedUserSGPRCount += 2;
6165 }
else if (
ID ==
".amdhsa_user_sgpr_queue_ptr") {
6168 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
6171 ImpliedUserSGPRCount += 2;
6172 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_segment_ptr") {
6175 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
6178 ImpliedUserSGPRCount += 2;
6179 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_id") {
6182 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
6185 ImpliedUserSGPRCount += 2;
6186 }
else if (
ID ==
".amdhsa_user_sgpr_flat_scratch_init") {
6189 "directive is not supported with architected flat scratch",
6193 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
6196 ImpliedUserSGPRCount += 2;
6197 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_size") {
6200 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
6203 ImpliedUserSGPRCount += 1;
6204 }
else if (
ID ==
".amdhsa_wavefront_size32") {
6206 if (IVersion.
Major < 10)
6207 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
6208 EnableWavefrontSize32 = Val;
6210 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
6212 }
else if (
ID ==
".amdhsa_uses_dynamic_stack") {
6214 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
6216 }
else if (
ID ==
".amdhsa_system_sgpr_private_segment_wavefront_offset") {
6219 "directive is not supported with architected flat scratch",
6222 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6224 }
else if (
ID ==
".amdhsa_enable_private_segment") {
6228 "directive is not supported without architected flat scratch",
6231 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6233 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_x") {
6235 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
6237 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_y") {
6239 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
6241 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_z") {
6243 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
6245 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_info") {
6247 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
6249 }
else if (
ID ==
".amdhsa_system_vgpr_workitem_id") {
6251 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
6253 }
else if (
ID ==
".amdhsa_next_free_vgpr") {
6254 VGPRRange = ValRange;
6255 NextFreeVGPR = ExprVal;
6256 }
else if (
ID ==
".amdhsa_next_free_sgpr") {
6257 SGPRRange = ValRange;
6258 NextFreeSGPR = ExprVal;
6259 }
else if (
ID ==
".amdhsa_accum_offset") {
6261 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
6262 AccumOffset = ExprVal;
6263 }
else if (
ID ==
".amdhsa_named_barrier_count") {
6265 return Error(IDRange.
Start,
"directive requires gfx1250+", IDRange);
6266 NamedBarCnt = ExprVal;
6267 }
else if (
ID ==
".amdhsa_reserve_vcc") {
6269 return OutOfRangeError(ValRange);
6270 ReserveVCC = ExprVal;
6271 }
else if (
ID ==
".amdhsa_reserve_flat_scratch") {
6272 if (IVersion.
Major < 7)
6273 return Error(IDRange.
Start,
"directive requires gfx7+", IDRange);
6276 "directive is not supported with architected flat scratch",
6279 return OutOfRangeError(ValRange);
6280 ReserveFlatScr = ExprVal;
6281 }
else if (
ID ==
".amdhsa_reserve_xnack_mask") {
6282 if (IVersion.
Major < 8)
6283 return Error(IDRange.
Start,
"directive requires gfx8+", IDRange);
6285 return OutOfRangeError(ValRange);
6286 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
6287 return getParser().Error(IDRange.
Start,
".amdhsa_reserve_xnack_mask does not match target id",
6289 }
else if (
ID ==
".amdhsa_float_round_mode_32") {
6291 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
6293 }
else if (
ID ==
".amdhsa_float_round_mode_16_64") {
6295 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
6297 }
else if (
ID ==
".amdhsa_float_denorm_mode_32") {
6299 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
6301 }
else if (
ID ==
".amdhsa_float_denorm_mode_16_64") {
6303 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
6305 }
else if (
ID ==
".amdhsa_dx10_clamp") {
6306 if (!getSTI().hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
6307 return Error(IDRange.
Start,
"directive unsupported on gfx1170+",
6310 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
6312 }
else if (
ID ==
".amdhsa_ieee_mode") {
6313 if (!getSTI().hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
6314 return Error(IDRange.
Start,
"directive unsupported on gfx1170+",
6317 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
6319 }
else if (
ID ==
".amdhsa_fp16_overflow") {
6320 if (IVersion.
Major < 9)
6321 return Error(IDRange.
Start,
"directive requires gfx9+", IDRange);
6323 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
6325 }
else if (
ID ==
".amdhsa_tg_split") {
6327 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
6330 }
else if (
ID ==
".amdhsa_workgroup_processor_mode") {
6333 "directive unsupported on " + getSTI().
getCPU(), IDRange);
6335 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
6337 }
else if (
ID ==
".amdhsa_memory_ordered") {
6338 if (IVersion.
Major < 10)
6339 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
6341 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
6343 }
else if (
ID ==
".amdhsa_forward_progress") {
6344 if (IVersion.
Major < 10)
6345 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
6347 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
6349 }
else if (
ID ==
".amdhsa_shared_vgpr_count") {
6351 if (IVersion.
Major < 10 || IVersion.
Major >= 12)
6352 return Error(IDRange.
Start,
"directive requires gfx10 or gfx11",
6354 SharedVGPRCount = Val;
6356 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
6358 }
else if (
ID ==
".amdhsa_inst_pref_size") {
6359 if (IVersion.
Major < 11)
6360 return Error(IDRange.
Start,
"directive requires gfx11+", IDRange);
6361 if (IVersion.
Major == 11) {
6363 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
6367 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
6370 }
else if (
ID ==
".amdhsa_exception_fp_ieee_invalid_op") {
6373 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
6375 }
else if (
ID ==
".amdhsa_exception_fp_denorm_src") {
6377 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
6379 }
else if (
ID ==
".amdhsa_exception_fp_ieee_div_zero") {
6382 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
6384 }
else if (
ID ==
".amdhsa_exception_fp_ieee_overflow") {
6386 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
6388 }
else if (
ID ==
".amdhsa_exception_fp_ieee_underflow") {
6390 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
6392 }
else if (
ID ==
".amdhsa_exception_fp_ieee_inexact") {
6394 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
6396 }
else if (
ID ==
".amdhsa_exception_int_div_zero") {
6398 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
6400 }
else if (
ID ==
".amdhsa_round_robin_scheduling") {
6401 if (IVersion.
Major < 12)
6402 return Error(IDRange.
Start,
"directive requires gfx12+", IDRange);
6404 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
6407 return Error(IDRange.
Start,
"unknown .amdhsa_kernel directive", IDRange);
6410#undef PARSE_BITS_ENTRY
6413 if (!Seen.
contains(
".amdhsa_next_free_vgpr"))
6414 return TokError(
".amdhsa_next_free_vgpr directive is required");
6416 if (!Seen.
contains(
".amdhsa_next_free_sgpr"))
6417 return TokError(
".amdhsa_next_free_sgpr directive is required");
6419 unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(ImpliedUserSGPRCount);
6421 return TokError(
"too many user SGPRs enabled, found " +
6422 Twine(UserSGPRCount) +
", but only " +
6428 if (PreloadLength) {
6434 const MCExpr *VGPRBlocks;
6435 const MCExpr *SGPRBlocks;
6436 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
6437 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
6438 EnableWavefrontSize32, NextFreeVGPR,
6439 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
6443 int64_t EvaluatedVGPRBlocks;
6444 bool VGPRBlocksEvaluatable =
6445 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
6446 if (VGPRBlocksEvaluatable &&
6448 static_cast<uint64_t
>(EvaluatedVGPRBlocks))) {
6449 return OutOfRangeError(VGPRRange);
6453 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
6454 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT,
getContext());
6456 int64_t EvaluatedSGPRBlocks;
6457 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
6459 static_cast<uint64_t
>(EvaluatedSGPRBlocks)))
6460 return OutOfRangeError(SGPRRange);
6463 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
6464 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
getContext());
6466 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
6467 return TokError(
"amdgpu_user_sgpr_count smaller than implied by "
6468 "enabled user SGPRs");
6474 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
6475 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT,
getContext());
6480 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
6481 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT,
getContext());
6486 return TokError(
"Kernarg size should be resolvable");
6487 uint64_t kernarg_size = IVal;
6488 if (PreloadLength && kernarg_size &&
6489 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
6490 return TokError(
"Kernarg preload length + offset is larger than the "
6491 "kernarg segment size");
6494 if (!Seen.
contains(
".amdhsa_accum_offset"))
6495 return TokError(
".amdhsa_accum_offset directive is required");
6496 int64_t EvaluatedAccum;
6497 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
6498 uint64_t UEvaluatedAccum = EvaluatedAccum;
6499 if (AccumEvaluatable &&
6500 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
6501 return TokError(
"accum_offset should be in range [4..256] in "
6504 int64_t EvaluatedNumVGPR;
6505 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
6508 alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
6509 return TokError(
"accum_offset exceeds total VGPR allocation");
6515 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
6516 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
6522 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
6523 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
6526 if (IVersion.
Major >= 10 && IVersion.
Major < 12) {
6528 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
6529 return TokError(
"shared_vgpr_count directive not valid on "
6530 "wavefront size 32");
6533 if (VGPRBlocksEvaluatable &&
6534 (SharedVGPRCount * 2 +
static_cast<uint64_t
>(EvaluatedVGPRBlocks) >
6536 return TokError(
"shared_vgpr_count*2 + "
6537 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
6542 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
6543 NextFreeVGPR, NextFreeSGPR,
6544 ReserveVCC, ReserveFlatScr);
6548bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
6550 if (ParseAsAbsoluteExpression(
Version))
6553 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(
Version);
6557bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef
ID,
6558 AMDGPUMCKernelCodeT &
C) {
6561 if (
ID ==
"max_scratch_backing_memory_byte_size") {
6562 Parser.eatToEndOfStatement();
6566 SmallString<40> ErrStr;
6567 raw_svector_ostream Err(ErrStr);
6568 if (!
C.ParseKernelCodeT(
ID, getParser(), Err)) {
6569 return TokError(Err.
str());
6573 if (
ID ==
"enable_wavefront_size32") {
6576 return TokError(
"enable_wavefront_size32=1 is only allowed on GFX10+");
6578 return TokError(
"enable_wavefront_size32=1 requires +WavefrontSize32");
6581 return TokError(
"enable_wavefront_size32=0 requires +WavefrontSize64");
6585 if (
ID ==
"wavefront_size") {
6586 if (
C.wavefront_size == 5) {
6588 return TokError(
"wavefront_size=5 is only allowed on GFX10+");
6590 return TokError(
"wavefront_size=5 requires +WavefrontSize32");
6591 }
else if (
C.wavefront_size == 6) {
6593 return TokError(
"wavefront_size=6 requires +WavefrontSize64");
6600bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6601 AMDGPUMCKernelCodeT KernelCode;
6610 if (!parseId(
ID,
"expected value identifier or .end_amd_kernel_code_t"))
6613 if (
ID ==
".end_amd_kernel_code_t")
6616 if (ParseAMDKernelCodeTValue(
ID, KernelCode))
6621 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
6626bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6627 StringRef KernelName;
6628 if (!parseId(KernelName,
"expected symbol name"))
6631 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6638bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6639 if (!getSTI().getTargetTriple().isAMDGCN()) {
6640 return Error(getLoc(),
6641 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6645 auto TargetIDDirective = getLexer().getTok().getStringContents();
6646 if (getTargetStreamer().getTargetID()->
toString() != TargetIDDirective)
6647 return Error(getParser().getTok().getLoc(),
"target id must match options");
6649 getTargetStreamer().EmitISAVersion();
6655bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6658 std::string HSAMetadataString;
6663 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6664 return Error(getLoc(),
"invalid HSA metadata");
6671bool AMDGPUAsmParser::ParseToEndDirective(
const char *AssemblerDirectiveBegin,
6672 const char *AssemblerDirectiveEnd,
6673 std::string &CollectString) {
6675 raw_string_ostream CollectStream(CollectString);
6677 getLexer().setSkipSpace(
false);
6679 bool FoundEnd =
false;
6682 CollectStream << getTokenStr();
6686 if (trySkipId(AssemblerDirectiveEnd)) {
6691 CollectStream << Parser.parseStringToEndOfStatement()
6692 <<
getContext().getAsmInfo().getSeparatorString();
6694 Parser.eatToEndOfStatement();
6697 getLexer().setSkipSpace(
true);
6700 return TokError(Twine(
"expected directive ") +
6701 Twine(AssemblerDirectiveEnd) + Twine(
" not found"));
6708bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6714 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6715 if (!PALMetadata->setFromString(
String))
6716 return Error(getLoc(),
"invalid PAL metadata");
6721bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6723 return Error(getLoc(),
6725 "not available on non-amdpal OSes")).str());
6728 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6729 PALMetadata->setLegacy();
6732 if (ParseAsAbsoluteExpression(
Key)) {
6733 return TokError(Twine(
"invalid value in ") +
6737 return TokError(Twine(
"expected an even number of values in ") +
6740 if (ParseAsAbsoluteExpression(
Value)) {
6741 return TokError(Twine(
"invalid value in ") +
6744 PALMetadata->setRegister(
Key,
Value);
6753bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6754 if (getParser().checkForValidSection())
6758 SMLoc NameLoc = getLoc();
6759 if (getParser().parseIdentifier(Name))
6760 return TokError(
"expected identifier in directive");
6763 if (getParser().parseComma())
6769 SMLoc SizeLoc = getLoc();
6770 if (getParser().parseAbsoluteExpression(
Size))
6773 return Error(SizeLoc,
"size must be non-negative");
6774 if (
Size > LocalMemorySize)
6775 return Error(SizeLoc,
"size is too large");
6777 int64_t Alignment = 4;
6779 SMLoc AlignLoc = getLoc();
6780 if (getParser().parseAbsoluteExpression(Alignment))
6783 return Error(AlignLoc,
"alignment must be a power of two");
6788 if (Alignment >= 1u << 31)
6789 return Error(AlignLoc,
"alignment is too large");
6795 Symbol->redefineIfPossible();
6796 if (!
Symbol->isUndefined())
6797 return Error(NameLoc,
"invalid symbol redefinition");
6799 getTargetStreamer().emitAMDGPULDS(Symbol,
Size,
Align(Alignment));
6803bool AMDGPUAsmParser::ParseDirectiveAMDGPUInfo() {
6804 if (getParser().checkForValidSection())
6808 if (getParser().parseIdentifier(FuncName))
6809 return TokError(
"expected symbol name after .amdgpu_info");
6812 AMDGPU::InfoSectionData ParsedInfoData;
6813 AMDGPU::FuncInfo FI;
6815 bool HasScalarAttrs =
false;
6822 SMLoc IDLoc = getLoc();
6823 if (!parseId(
ID,
"expected directive or .end_amdgpu_info"))
6826 if (
ID ==
".end_amdgpu_info")
6834 return Error(IDLoc,
"unknown .amdgpu_info directive '" +
ID +
"'");
6836 if (Dir ==
"flags") {
6838 if (getParser().parseAbsoluteExpression(Val))
6841 FI.
UsesVCC = !!(
Flags & AMDGPU::FuncInfoFlags::FUNC_USES_VCC);
6843 !!(
Flags & AMDGPU::FuncInfoFlags::FUNC_USES_FLAT_SCRATCH);
6845 HasScalarAttrs =
true;
6846 }
else if (Dir ==
"num_sgpr") {
6848 if (getParser().parseAbsoluteExpression(Val))
6850 FI.
NumSGPR =
static_cast<uint32_t
>(Val);
6851 HasScalarAttrs =
true;
6852 }
else if (Dir ==
"num_vgpr") {
6854 if (getParser().parseAbsoluteExpression(Val))
6857 HasScalarAttrs =
true;
6858 }
else if (Dir ==
"num_agpr") {
6860 if (getParser().parseAbsoluteExpression(Val))
6863 HasScalarAttrs =
true;
6864 }
else if (Dir ==
"private_segment_size") {
6866 if (getParser().parseAbsoluteExpression(Val))
6869 HasScalarAttrs =
true;
6870 }
else if (Dir ==
"use") {
6872 if (getParser().parseIdentifier(ResName))
6873 return TokError(
"expected resource symbol for .amdgpu_use");
6874 ParsedInfoData.
Uses.push_back(
6875 {FuncSym,
getContext().getOrCreateSymbol(ResName)});
6876 }
else if (Dir ==
"call") {
6878 if (getParser().parseIdentifier(DstName))
6879 return TokError(
"expected callee symbol for .amdgpu_call");
6880 ParsedInfoData.
Calls.push_back(
6881 {FuncSym,
getContext().getOrCreateSymbol(DstName)});
6882 }
else if (Dir ==
"indirect_call") {
6884 if (getParser().parseEscapedString(TypeId))
6885 return TokError(
"expected type ID string for .amdgpu_indirect_call");
6886 ParsedInfoData.
IndirectCalls.push_back({FuncSym, std::move(TypeId)});
6887 }
else if (Dir ==
"typeid") {
6889 if (getParser().parseEscapedString(TypeId))
6890 return TokError(
"expected type ID string for .amdgpu_typeid");
6891 ParsedInfoData.
TypeIds.push_back({FuncSym, std::move(TypeId)});
6893 return Error(IDLoc,
"unknown .amdgpu_info directive '" +
ID +
"'");
6898 ParsedInfoData.
Funcs.push_back(std::move(FI));
6900 AMDGPU::InfoSectionData &
Data = InfoData ? *InfoData : InfoData.emplace();
6901 for (AMDGPU::FuncInfo &Func : ParsedInfoData.
Funcs)
6902 Data.Funcs.push_back(std::move(Func));
6903 for (std::pair<MCSymbol *, MCSymbol *> &Use : ParsedInfoData.
Uses)
6904 Data.Uses.push_back(Use);
6905 for (std::pair<MCSymbol *, MCSymbol *> &
Call : ParsedInfoData.
Calls)
6907 for (std::pair<MCSymbol *, std::string> &
IndirectCall :
6910 for (std::pair<MCSymbol *, std::string> &TypeId : ParsedInfoData.
TypeIds)
6911 Data.TypeIds.push_back(std::move(TypeId));
6916void AMDGPUAsmParser::onEndOfFile() {
6918 getTargetStreamer().emitAMDGPUInfo(*InfoData);
6921bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6922 StringRef IDVal = DirectiveID.
getString();
6925 if (IDVal ==
".amdhsa_kernel")
6926 return ParseDirectiveAMDHSAKernel();
6928 if (IDVal ==
".amdhsa_code_object_version")
6929 return ParseDirectiveAMDHSACodeObjectVersion();
6933 return ParseDirectiveHSAMetadata();
6935 if (IDVal ==
".amd_kernel_code_t")
6936 return ParseDirectiveAMDKernelCodeT();
6938 if (IDVal ==
".amdgpu_hsa_kernel")
6939 return ParseDirectiveAMDGPUHsaKernel();
6941 if (IDVal ==
".amd_amdgpu_isa")
6942 return ParseDirectiveISAVersion();
6946 Twine(
" directive is "
6947 "not available on non-amdhsa OSes"))
6952 if (IDVal ==
".amdgcn_target")
6953 return ParseDirectiveAMDGCNTarget();
6955 if (IDVal ==
".amdgpu_lds")
6956 return ParseDirectiveAMDGPULDS();
6958 if (IDVal ==
".amdgpu_info")
6959 return ParseDirectiveAMDGPUInfo();
6962 return ParseDirectivePALMetadataBegin();
6965 return ParseDirectivePALMetadata();
6970bool AMDGPUAsmParser::subtargetHasRegister(
const MCRegisterInfo &MRI,
6977 return hasSGPR104_SGPR105();
6980 case SRC_SHARED_BASE_LO:
6981 case SRC_SHARED_BASE:
6982 case SRC_SHARED_LIMIT_LO:
6983 case SRC_SHARED_LIMIT:
6984 case SRC_PRIVATE_BASE_LO:
6985 case SRC_PRIVATE_BASE:
6986 case SRC_PRIVATE_LIMIT_LO:
6987 case SRC_PRIVATE_LIMIT:
6989 case SRC_FLAT_SCRATCH_BASE_LO:
6990 case SRC_FLAT_SCRATCH_BASE_HI:
6991 return hasGloballyAddressableScratch();
6992 case SRC_POPS_EXITING_WAVE_ID:
7004 return (
isVI() ||
isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
7034 return hasSGPR102_SGPR103();
7039ParseStatus AMDGPUAsmParser::parseOperand(
OperandVector &Operands,
7042 ParseStatus Res = parseVOPD(Operands);
7047 Res = MatchOperandParserImpl(Operands, Mnemonic);
7059 SMLoc LBraceLoc = getLoc();
7064 auto Loc = getLoc();
7065 Res = parseReg(Operands);
7067 Error(Loc,
"expected a register");
7071 RBraceLoc = getLoc();
7076 "expected a comma or a closing square bracket"))
7080 if (Operands.
size() - Prefix > 1) {
7082 AMDGPUOperand::CreateToken(
this,
"[", LBraceLoc));
7083 Operands.
push_back(AMDGPUOperand::CreateToken(
this,
"]", RBraceLoc));
7089 return parseRegOrImm(Operands);
7092StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
7094 setForcedEncodingSize(0);
7095 setForcedDPP(
false);
7096 setForcedSDWA(
false);
7098 if (
Name.consume_back(
"_e64_dpp")) {
7100 setForcedEncodingSize(64);
7103 if (
Name.consume_back(
"_e64")) {
7104 setForcedEncodingSize(64);
7107 if (
Name.consume_back(
"_e32")) {
7108 setForcedEncodingSize(32);
7111 if (
Name.consume_back(
"_dpp")) {
7115 if (
Name.consume_back(
"_sdwa")) {
7116 setForcedSDWA(
true);
7124 unsigned VariantID);
7130 Name = parseMnemonicSuffix(Name);
7136 Operands.
push_back(AMDGPUOperand::CreateToken(
this, Name, NameLoc));
7138 bool IsMIMG = Name.starts_with(
"image_");
7141 OperandMode
Mode = OperandMode_Default;
7143 Mode = OperandMode_NSA;
7147 checkUnsupportedInstruction(Name, NameLoc);
7148 if (!Parser.hasPendingError()) {
7151 :
"not a valid operand.";
7152 Error(getLoc(), Msg);
7171ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
7174 if (!trySkipId(Name))
7177 Operands.
push_back(AMDGPUOperand::CreateToken(
this, Name, S));
7181ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
const char *Prefix,
7190ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
7191 const char *Prefix,
OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7192 std::function<
bool(int64_t &)> ConvertResult) {
7196 ParseStatus Res = parseIntWithPrefix(Prefix,
Value);
7200 if (ConvertResult && !ConvertResult(
Value)) {
7201 Error(S,
"invalid " + StringRef(Prefix) +
" value.");
7204 Operands.
push_back(AMDGPUOperand::CreateImm(
this,
Value, S, ImmTy));
7208ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
7209 const char *Prefix,
OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7210 bool (*ConvertResult)(int64_t &)) {
7219 const unsigned MaxSize = 4;
7223 for (
int I = 0; ; ++
I) {
7225 SMLoc Loc = getLoc();
7229 if (
Op != 0 &&
Op != 1)
7230 return Error(Loc,
"invalid " + StringRef(Prefix) +
" value.");
7237 if (
I + 1 == MaxSize)
7238 return Error(getLoc(),
"expected a closing square bracket");
7244 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Val, S, ImmTy));
7248ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
7250 AMDGPUOperand::ImmTy ImmTy,
7251 bool IgnoreNegative) {
7255 if (trySkipId(Name)) {
7257 }
else if (trySkipId(
"no", Name)) {
7266 return Error(S,
"r128 modifier is not supported on this GPU");
7267 if (Name ==
"a16" && !
hasA16())
7268 return Error(S,
"a16 modifier is not supported on this GPU");
7270 if (Bit == 0 && Name ==
"gds") {
7271 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).
getToken();
7273 return Error(S,
"nogds is not allowed");
7276 if (
isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
7277 ImmTy = AMDGPUOperand::ImmTyR128A16;
7279 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Bit, S, ImmTy));
7283unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
7284 bool &Disabling)
const {
7285 Disabling =
Id.consume_front(
"no");
7288 return StringSwitch<unsigned>(Id)
7295 return StringSwitch<unsigned>(Id)
7303ParseStatus AMDGPUAsmParser::parseCPol(
OperandVector &Operands) {
7305 SMLoc StringLoc = getLoc();
7307 int64_t CPolVal = 0;
7316 ResTH = parseTH(Operands, TH);
7327 ResScope = parseScope(Operands, Scope);
7340 if (trySkipId(
"nv")) {
7344 }
else if (trySkipId(
"no",
"nv")) {
7351 if (trySkipId(
"scale_offset")) {
7355 }
else if (trySkipId(
"no",
"scale_offset")) {
7368 Operands.
push_back(AMDGPUOperand::CreateImm(
this, CPolVal, StringLoc,
7369 AMDGPUOperand::ImmTyCPol));
7373 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).
getToken();
7374 SMLoc OpLoc = getLoc();
7375 unsigned Enabled = 0, Seen = 0;
7379 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
7386 return Error(S,
"dlc modifier is not supported on this GPU");
7389 return Error(S,
"scc modifier is not supported on this GPU");
7392 return Error(S,
"duplicate cache policy modifier");
7404 AMDGPUOperand::CreateImm(
this,
Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
7408ParseStatus AMDGPUAsmParser::parseScope(
OperandVector &Operands,
7413 ParseStatus Res = parseStringOrIntWithPrefix(
7414 Operands,
"scope", {
"SCOPE_CU",
"SCOPE_SE",
"SCOPE_DEV",
"SCOPE_SYS"},
7423ParseStatus AMDGPUAsmParser::parseTH(
OperandVector &Operands, int64_t &TH) {
7428 ParseStatus Res = parseStringWithPrefix(
"th",
Value, StringLoc);
7432 if (
Value ==
"TH_DEFAULT")
7434 else if (
Value ==
"TH_STORE_LU" ||
Value ==
"TH_LOAD_WB" ||
7435 Value ==
"TH_LOAD_NT_WB") {
7436 return Error(StringLoc,
"invalid th value");
7437 }
else if (
Value.consume_front(
"TH_ATOMIC_")) {
7439 }
else if (
Value.consume_front(
"TH_LOAD_")) {
7441 }
else if (
Value.consume_front(
"TH_STORE_")) {
7444 return Error(StringLoc,
"invalid th value");
7447 if (
Value ==
"BYPASS")
7452 TH |= StringSwitch<int64_t>(
Value)
7462 .Default(0xffffffff);
7464 TH |= StringSwitch<int64_t>(
Value)
7475 .Default(0xffffffff);
7478 if (TH == 0xffffffff)
7479 return Error(StringLoc,
"invalid th value");
7486 AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
7487 AMDGPUOperand::ImmTy ImmT, int64_t
Default = 0,
7488 std::optional<unsigned> InsertAt = std::nullopt) {
7489 auto i = OptionalIdx.find(ImmT);
7490 if (i != OptionalIdx.end()) {
7491 unsigned Idx = i->second;
7492 const AMDGPUOperand &
Op =
7493 static_cast<const AMDGPUOperand &
>(*Operands[Idx]);
7497 Op.addImmOperands(Inst, 1);
7499 if (InsertAt.has_value())
7506ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
7512 StringLoc = getLoc();
7517ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7518 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7523 SMLoc StringLoc = getLoc();
7527 Value = getTokenStr();
7531 if (
Value == Ids[IntVal])
7536 if (IntVal < 0 || IntVal >= (int64_t)Ids.
size())
7537 return Error(StringLoc,
"invalid " + Twine(Name) +
" value");
7542ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7543 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7544 AMDGPUOperand::ImmTy
Type) {
7548 ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal);
7550 Operands.
push_back(AMDGPUOperand::CreateImm(
this, IntVal, S,
Type));
7559bool AMDGPUAsmParser::tryParseFmt(
const char *Pref,
7563 SMLoc Loc = getLoc();
7565 auto Res = parseIntWithPrefix(Pref, Val);
7571 if (Val < 0 || Val > MaxVal) {
7572 Error(Loc, Twine(
"out of range ", StringRef(Pref)));
7580ParseStatus AMDGPUAsmParser::tryParseIndexKey(
OperandVector &Operands,
7581 AMDGPUOperand::ImmTy ImmTy) {
7582 const char *Pref =
"index_key";
7584 SMLoc Loc = getLoc();
7585 auto Res = parseIntWithPrefix(Pref, ImmVal);
7589 if ((ImmTy == AMDGPUOperand::ImmTyIndexKey16bit ||
7590 ImmTy == AMDGPUOperand::ImmTyIndexKey32bit) &&
7591 (ImmVal < 0 || ImmVal > 1))
7592 return Error(Loc, Twine(
"out of range ", StringRef(Pref)));
7594 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
7595 return Error(Loc, Twine(
"out of range ", StringRef(Pref)));
7597 Operands.
push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc, ImmTy));
7601ParseStatus AMDGPUAsmParser::parseIndexKey8bit(
OperandVector &Operands) {
7602 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
7605ParseStatus AMDGPUAsmParser::parseIndexKey16bit(
OperandVector &Operands) {
7606 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
7609ParseStatus AMDGPUAsmParser::parseIndexKey32bit(
OperandVector &Operands) {
7610 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey32bit);
7613ParseStatus AMDGPUAsmParser::tryParseMatrixFMT(
OperandVector &Operands,
7615 AMDGPUOperand::ImmTy
Type) {
7620ParseStatus AMDGPUAsmParser::parseMatrixAFMT(
OperandVector &Operands) {
7621 return tryParseMatrixFMT(Operands,
"matrix_a_fmt",
7622 AMDGPUOperand::ImmTyMatrixAFMT);
7625ParseStatus AMDGPUAsmParser::parseMatrixBFMT(
OperandVector &Operands) {
7626 return tryParseMatrixFMT(Operands,
"matrix_b_fmt",
7627 AMDGPUOperand::ImmTyMatrixBFMT);
7630ParseStatus AMDGPUAsmParser::tryParseMatrixScale(
OperandVector &Operands,
7632 AMDGPUOperand::ImmTy
Type) {
7637ParseStatus AMDGPUAsmParser::parseMatrixAScale(
OperandVector &Operands) {
7638 return tryParseMatrixScale(Operands,
"matrix_a_scale",
7639 AMDGPUOperand::ImmTyMatrixAScale);
7642ParseStatus AMDGPUAsmParser::parseMatrixBScale(
OperandVector &Operands) {
7643 return tryParseMatrixScale(Operands,
"matrix_b_scale",
7644 AMDGPUOperand::ImmTyMatrixBScale);
7647ParseStatus AMDGPUAsmParser::tryParseMatrixScaleFmt(
OperandVector &Operands,
7649 AMDGPUOperand::ImmTy
Type) {
7654ParseStatus AMDGPUAsmParser::parseMatrixAScaleFmt(
OperandVector &Operands) {
7655 return tryParseMatrixScaleFmt(Operands,
"matrix_a_scale_fmt",
7656 AMDGPUOperand::ImmTyMatrixAScaleFmt);
7659ParseStatus AMDGPUAsmParser::parseMatrixBScaleFmt(
OperandVector &Operands) {
7660 return tryParseMatrixScaleFmt(Operands,
"matrix_b_scale_fmt",
7661 AMDGPUOperand::ImmTyMatrixBScaleFmt);
7666ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &
Format) {
7667 using namespace llvm::AMDGPU::MTBUFFormat;
7673 for (
int I = 0;
I < 2; ++
I) {
7674 if (Dfmt == DFMT_UNDEF && !tryParseFmt(
"dfmt", DFMT_MAX, Dfmt))
7677 if (Nfmt == NFMT_UNDEF && !tryParseFmt(
"nfmt", NFMT_MAX, Nfmt))
7682 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
7688 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
7691 Dfmt = (Dfmt ==
DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7692 Nfmt = (Nfmt ==
NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7698ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &
Format) {
7699 using namespace llvm::AMDGPU::MTBUFFormat;
7703 if (!tryParseFmt(
"format", UFMT_MAX, Fmt))
7706 if (Fmt == UFMT_UNDEF)
7713bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
7715 StringRef FormatStr,
7717 using namespace llvm::AMDGPU::MTBUFFormat;
7721 if (
Format != DFMT_UNDEF) {
7727 if (
Format != NFMT_UNDEF) {
7732 Error(Loc,
"unsupported format");
7736ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
7739 using namespace llvm::AMDGPU::MTBUFFormat;
7743 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
7748 SMLoc Loc = getLoc();
7749 if (!parseId(Str,
"expected a format string") ||
7750 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
7752 if (Dfmt == DFMT_UNDEF)
7753 return Error(Loc,
"duplicate numeric format");
7754 if (Nfmt == NFMT_UNDEF)
7755 return Error(Loc,
"duplicate data format");
7758 Dfmt = (Dfmt ==
DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7759 Nfmt = (Nfmt ==
NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7763 if (Ufmt == UFMT_UNDEF)
7764 return Error(FormatLoc,
"unsupported format");
7773ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
7776 using namespace llvm::AMDGPU::MTBUFFormat;
7779 if (Id == UFMT_UNDEF)
7783 return Error(Loc,
"unified format is not supported on this GPU");
7789ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &
Format) {
7790 using namespace llvm::AMDGPU::MTBUFFormat;
7791 SMLoc Loc = getLoc();
7796 return Error(Loc,
"out of range format");
7801ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &
Format) {
7802 using namespace llvm::AMDGPU::MTBUFFormat;
7808 StringRef FormatStr;
7809 SMLoc Loc = getLoc();
7810 if (!parseId(FormatStr,
"expected a format string"))
7813 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc,
Format);
7815 Res = parseSymbolicSplitFormat(FormatStr, Loc,
Format);
7825 return parseNumericFormat(
Format);
7828ParseStatus AMDGPUAsmParser::parseFORMAT(
OperandVector &Operands) {
7829 using namespace llvm::AMDGPU::MTBUFFormat;
7833 SMLoc Loc = getLoc();
7843 AMDGPUOperand::CreateImm(
this,
Format, Loc, AMDGPUOperand::ImmTyFORMAT));
7855 Res = parseRegOrImm(Operands);
7862 Res = parseSymbolicOrNumericFormat(
Format);
7867 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*Operands[
Size - 2]);
7868 assert(
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7875 return Error(getLoc(),
"duplicate format");
7879ParseStatus AMDGPUAsmParser::parseFlatOffset(
OperandVector &Operands) {
7881 parseIntWithPrefix(
"offset", Operands, AMDGPUOperand::ImmTyOffset);
7883 Res = parseIntWithPrefix(
"inst_offset", Operands,
7884 AMDGPUOperand::ImmTyInstOffset);
7889ParseStatus AMDGPUAsmParser::parseR128A16(
OperandVector &Operands) {
7891 parseNamedBit(
"r128", Operands, AMDGPUOperand::ImmTyR128A16);
7893 Res = parseNamedBit(
"a16", Operands, AMDGPUOperand::ImmTyA16);
7897ParseStatus AMDGPUAsmParser::parseBLGP(
OperandVector &Operands) {
7899 parseIntWithPrefix(
"blgp", Operands, AMDGPUOperand::ImmTyBLGP);
7902 parseOperandArrayWithPrefix(
"neg", Operands, AMDGPUOperand::ImmTyBLGP);
7911void AMDGPUAsmParser::cvtExp(MCInst &Inst,
const OperandVector &Operands) {
7912 OptionalImmIndexMap OptionalIdx;
7914 unsigned OperandIdx[4];
7915 unsigned EnMask = 0;
7918 for (
unsigned i = 1, e = Operands.
size(); i != e; ++i) {
7919 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
7924 OperandIdx[SrcIdx] = Inst.
size();
7925 Op.addRegOperands(Inst, 1);
7932 OperandIdx[SrcIdx] = Inst.
size();
7938 if (
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7939 Op.addImmOperands(Inst, 1);
7943 if (
Op.isToken() && (
Op.getToken() ==
"done" ||
Op.getToken() ==
"row_en"))
7947 OptionalIdx[
Op.getImmTy()] = i;
7953 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7960 for (
auto i = 0; i < SrcIdx; ++i) {
7962 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7987 IntVal =
encode(ISA, IntVal, CntVal);
7988 if (CntVal !=
decode(ISA, IntVal)) {
7990 IntVal =
encode(ISA, IntVal, -1);
7998bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
8000 SMLoc CntLoc = getLoc();
8001 StringRef CntName = getTokenStr();
8008 SMLoc ValLoc = getLoc();
8017 if (CntName ==
"vmcnt" || CntName ==
"vmcnt_sat") {
8019 }
else if (CntName ==
"expcnt" || CntName ==
"expcnt_sat") {
8021 }
else if (CntName ==
"lgkmcnt" || CntName ==
"lgkmcnt_sat") {
8024 Error(CntLoc,
"invalid counter name " + CntName);
8029 Error(ValLoc,
"too large value for " + CntName);
8038 Error(getLoc(),
"expected a counter name");
8046ParseStatus AMDGPUAsmParser::parseSWaitCnt(
OperandVector &Operands) {
8053 if (!parseCnt(Waitcnt))
8061 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Waitcnt, S));
8065bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
8066 SMLoc FieldLoc = getLoc();
8067 StringRef FieldName = getTokenStr();
8072 SMLoc ValueLoc = getLoc();
8079 if (FieldName ==
"instid0") {
8081 }
else if (FieldName ==
"instskip") {
8083 }
else if (FieldName ==
"instid1") {
8086 Error(FieldLoc,
"invalid field name " + FieldName);
8105 .Case(
"VALU_DEP_1", 1)
8106 .Case(
"VALU_DEP_2", 2)
8107 .Case(
"VALU_DEP_3", 3)
8108 .Case(
"VALU_DEP_4", 4)
8109 .Case(
"TRANS32_DEP_1", 5)
8110 .Case(
"TRANS32_DEP_2", 6)
8111 .Case(
"TRANS32_DEP_3", 7)
8112 .Case(
"FMA_ACCUM_CYCLE_1", 8)
8113 .Case(
"SALU_CYCLE_1", 9)
8114 .Case(
"SALU_CYCLE_2", 10)
8115 .Case(
"SALU_CYCLE_3", 11)
8123 Delay |=
Value << Shift;
8127ParseStatus AMDGPUAsmParser::parseSDelayALU(
OperandVector &Operands) {
8133 if (!parseDelay(Delay))
8141 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Delay, S));
8146AMDGPUOperand::isSWaitCnt()
const {
8150bool AMDGPUOperand::isSDelayALU()
const {
return isImm(); }
8156void AMDGPUAsmParser::depCtrError(SMLoc Loc,
int ErrorId,
8157 StringRef DepCtrName) {
8160 Error(Loc, Twine(
"invalid counter name ", DepCtrName));
8163 Error(Loc, Twine(DepCtrName,
" is not supported on this GPU"));
8166 Error(Loc, Twine(
"duplicate counter name ", DepCtrName));
8169 Error(Loc, Twine(
"invalid value for ", DepCtrName));
8176bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr,
unsigned &UsedOprMask) {
8178 using namespace llvm::AMDGPU::DepCtr;
8180 SMLoc DepCtrLoc = getLoc();
8181 StringRef DepCtrName = getTokenStr();
8191 unsigned PrevOprMask = UsedOprMask;
8192 int CntVal =
encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
8195 depCtrError(DepCtrLoc, CntVal, DepCtrName);
8204 Error(getLoc(),
"expected a counter name");
8209 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
8210 DepCtr = (DepCtr & ~CntValMask) | CntVal;
8214ParseStatus AMDGPUAsmParser::parseDepCtr(
OperandVector &Operands) {
8215 using namespace llvm::AMDGPU::DepCtr;
8218 SMLoc Loc = getLoc();
8221 unsigned UsedOprMask = 0;
8223 if (!parseDepCtr(DepCtr, UsedOprMask))
8231 Operands.
push_back(AMDGPUOperand::CreateImm(
this, DepCtr, Loc));
8235bool AMDGPUOperand::isDepCtr()
const {
return isS16Imm(); }
8241ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
8243 OperandInfoTy &Width) {
8244 using namespace llvm::AMDGPU::Hwreg;
8250 HwReg.Loc = getLoc();
8253 HwReg.IsSymbolic =
true;
8255 }
else if (!
parseExpr(HwReg.Val,
"a register name")) {
8263 if (!skipToken(
AsmToken::Comma,
"expected a comma or a closing parenthesis"))
8273 Width.Loc = getLoc();
8281ParseStatus AMDGPUAsmParser::parseHwreg(
OperandVector &Operands) {
8282 using namespace llvm::AMDGPU::Hwreg;
8285 SMLoc Loc = getLoc();
8287 StructuredOpField HwReg(
"id",
"hardware register", HwregId::Width,
8289 StructuredOpField
Offset(
"offset",
"bit offset", HwregOffset::Width,
8290 HwregOffset::Default);
8291 struct : StructuredOpField {
8292 using StructuredOpField::StructuredOpField;
8293 bool validate(AMDGPUAsmParser &Parser)
const override {
8295 return Error(Parser,
"only values from 1 to 32 are legal");
8298 } Width(
"size",
"bitfield width", HwregSize::Width, HwregSize::Default);
8299 ParseStatus Res = parseStructuredOpFields({&HwReg, &
Offset, &Width});
8302 Res = parseHwregFunc(HwReg,
Offset, Width);
8305 if (!validateStructuredOpFields({&HwReg, &
Offset, &Width}))
8307 ImmVal = HwregEncoding::encode(HwReg.Val,
Offset.Val, Width.Val);
8311 parseExpr(ImmVal,
"a hwreg macro, structured immediate"))
8318 return Error(Loc,
"invalid immediate: only 16-bit values are legal");
8320 AMDGPUOperand::CreateImm(
this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
8324bool AMDGPUOperand::isHwreg()
const {
8325 return isImmTy(ImmTyHwreg);
8333AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
8335 OperandInfoTy &Stream) {
8336 using namespace llvm::AMDGPU::SendMsg;
8341 Msg.IsSymbolic =
true;
8343 }
else if (!
parseExpr(Msg.Val,
"a message name")) {
8348 Op.IsDefined =
true;
8351 (
Op.Val =
getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
8354 }
else if (!
parseExpr(
Op.Val,
"an operation name")) {
8359 Stream.IsDefined =
true;
8360 Stream.Loc = getLoc();
8370AMDGPUAsmParser::validateSendMsg(
const OperandInfoTy &Msg,
8371 const OperandInfoTy &
Op,
8372 const OperandInfoTy &Stream) {
8373 using namespace llvm::AMDGPU::SendMsg;
8378 bool Strict = Msg.IsSymbolic;
8382 Error(Msg.Loc,
"specified message id is not supported on this GPU");
8387 Error(Msg.Loc,
"invalid message id");
8393 Error(
Op.Loc,
"message does not support operations");
8395 Error(Msg.Loc,
"missing message operation");
8401 Error(
Op.Loc,
"specified operation id is not supported on this GPU");
8403 Error(
Op.Loc,
"invalid operation id");
8408 Error(Stream.Loc,
"message operation does not support streams");
8412 Error(Stream.Loc,
"invalid message stream id");
8418ParseStatus AMDGPUAsmParser::parseSendMsg(
OperandVector &Operands) {
8419 using namespace llvm::AMDGPU::SendMsg;
8422 SMLoc Loc = getLoc();
8426 OperandInfoTy
Op(OP_NONE_);
8427 OperandInfoTy Stream(STREAM_ID_NONE_);
8428 if (parseSendMsgBody(Msg,
Op, Stream) &&
8429 validateSendMsg(Msg,
Op, Stream)) {
8434 }
else if (
parseExpr(ImmVal,
"a sendmsg macro")) {
8436 return Error(Loc,
"invalid immediate: only 16-bit values are legal");
8441 Operands.
push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
8445bool AMDGPUOperand::isSendMsg()
const {
8446 return isImmTy(ImmTySendMsg);
8449ParseStatus AMDGPUAsmParser::parseWaitEvent(
OperandVector &Operands) {
8450 using namespace llvm::AMDGPU::WaitEvent;
8452 SMLoc Loc = getLoc();
8455 StructuredOpField DontWaitExportReady(
"dont_wait_export_ready",
"bit value",
8457 StructuredOpField ExportReady(
"export_ready",
"bit value", 1, 0);
8459 StructuredOpField *TargetBitfield =
8460 isGFX11() ? &DontWaitExportReady : &ExportReady;
8462 ParseStatus Res = parseStructuredOpFields({TargetBitfield});
8466 if (!validateStructuredOpFields({TargetBitfield}))
8468 ImmVal = TargetBitfield->Val;
8475 return Error(Loc,
"invalid immediate: only 16-bit values are legal");
8477 Operands.
push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc,
8478 AMDGPUOperand::ImmTyWaitEvent));
8482bool AMDGPUOperand::isWaitEvent()
const {
return isImmTy(ImmTyWaitEvent); }
8488ParseStatus AMDGPUAsmParser::parseInterpSlot(
OperandVector &Operands) {
8495 int Slot = StringSwitch<int>(Str)
8502 return Error(S,
"invalid interpolation slot");
8504 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Slot, S,
8505 AMDGPUOperand::ImmTyInterpSlot));
8509ParseStatus AMDGPUAsmParser::parseInterpAttr(
OperandVector &Operands) {
8516 if (!Str.starts_with(
"attr"))
8517 return Error(S,
"invalid interpolation attribute");
8519 StringRef Chan = Str.take_back(2);
8520 int AttrChan = StringSwitch<int>(Chan)
8527 return Error(S,
"invalid or missing interpolation attribute channel");
8529 Str = Str.drop_back(2).drop_front(4);
8532 if (Str.getAsInteger(10, Attr))
8533 return Error(S,
"invalid or missing interpolation attribute number");
8536 return Error(S,
"out of bounds interpolation attribute number");
8540 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Attr, S,
8541 AMDGPUOperand::ImmTyInterpAttr));
8542 Operands.
push_back(AMDGPUOperand::CreateImm(
8543 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
8551ParseStatus AMDGPUAsmParser::parseExpTgt(
OperandVector &Operands) {
8552 using namespace llvm::AMDGPU::Exp;
8562 return Error(S, (Id == ET_INVALID)
8563 ?
"invalid exp target"
8564 :
"exp target is not supported on this GPU");
8566 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Id, S,
8567 AMDGPUOperand::ImmTyExpTgt));
8576AMDGPUAsmParser::isId(
const AsmToken &Token,
const StringRef Id)
const {
8581AMDGPUAsmParser::isId(
const StringRef Id)
const {
8587 return getTokenKind() ==
Kind;
8590StringRef AMDGPUAsmParser::getId()
const {
8595AMDGPUAsmParser::trySkipId(
const StringRef Id) {
8604AMDGPUAsmParser::trySkipId(
const StringRef Pref,
const StringRef Id) {
8606 StringRef Tok = getTokenStr();
8617 if (isId(Id) && peekToken().is(Kind)) {
8627 if (isToken(Kind)) {
8636 const StringRef ErrMsg) {
8637 if (!trySkipToken(Kind)) {
8638 Error(getLoc(), ErrMsg);
8645AMDGPUAsmParser::parseExpr(int64_t &
Imm, StringRef Expected) {
8649 if (Parser.parseExpression(Expr))
8652 if (Expr->evaluateAsAbsolute(
Imm))
8655 if (Expected.empty()) {
8656 Error(S,
"expected absolute expression");
8658 Error(S, Twine(
"expected ", Expected) +
8659 Twine(
" or an absolute expression"));
8669 if (Parser.parseExpression(Expr))
8673 if (Expr->evaluateAsAbsolute(IntVal)) {
8674 Operands.
push_back(AMDGPUOperand::CreateImm(
this, IntVal, S));
8676 Operands.
push_back(AMDGPUOperand::CreateExpr(
this, Expr, S));
8682AMDGPUAsmParser::parseString(StringRef &Val,
const StringRef ErrMsg) {
8684 Val =
getToken().getStringContents();
8688 Error(getLoc(), ErrMsg);
8693AMDGPUAsmParser::parseId(StringRef &Val,
const StringRef ErrMsg) {
8695 Val = getTokenStr();
8699 if (!ErrMsg.
empty())
8700 Error(getLoc(), ErrMsg);
8705AMDGPUAsmParser::getToken()
const {
8706 return Parser.getTok();
8709AsmToken AMDGPUAsmParser::peekToken(
bool ShouldSkipSpace) {
8712 : getLexer().peekTok(ShouldSkipSpace);
8717 auto TokCount = getLexer().peekTokens(Tokens);
8719 for (
auto Idx = TokCount; Idx < Tokens.
size(); ++Idx)
8724AMDGPUAsmParser::getTokenKind()
const {
8725 return getLexer().getKind();
8729AMDGPUAsmParser::getLoc()
const {
8734AMDGPUAsmParser::getTokenStr()
const {
8739AMDGPUAsmParser::lex() {
8743const AMDGPUOperand &
8744AMDGPUAsmParser::findMCOperand(
const OperandVector &Operands,
8745 int MCOpIdx)
const {
8746 for (
const auto &
Op : Operands) {
8747 const AMDGPUOperand &TargetOp =
static_cast<AMDGPUOperand &
>(*Op);
8748 if (TargetOp.getMCOpIdx() == MCOpIdx)
8754SMLoc AMDGPUAsmParser::getInstLoc(
const OperandVector &Operands)
const {
8755 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
8759SMLoc AMDGPUAsmParser::getLaterLoc(SMLoc a, SMLoc b) {
8763SMLoc AMDGPUAsmParser::getOperandLoc(
const OperandVector &Operands,
8764 int MCOpIdx)
const {
8765 return findMCOperand(Operands, MCOpIdx).getStartLoc();
8769AMDGPUAsmParser::getOperandLoc(std::function<
bool(
const AMDGPUOperand&)>
Test,
8771 for (
unsigned i = Operands.
size() - 1; i > 0; --i) {
8772 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
8774 return Op.getStartLoc();
8776 return getInstLoc(Operands);
8780AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy
Type,
8782 auto Test = [=](
const AMDGPUOperand&
Op) {
return Op.isImmTy(
Type); };
8783 return getOperandLoc(
Test, Operands);
8797 StringRef
Id = getTokenStr();
8798 SMLoc IdLoc = getLoc();
8804 find_if(Fields, [Id](StructuredOpField *
F) {
return F->Id ==
Id; });
8805 if (
I == Fields.
end())
8806 return Error(IdLoc,
"unknown field");
8807 if ((*I)->IsDefined)
8808 return Error(IdLoc,
"duplicate field");
8811 (*I)->Loc = getLoc();
8814 (*I)->IsDefined =
true;
8821bool AMDGPUAsmParser::validateStructuredOpFields(
8823 return all_of(Fields, [
this](
const StructuredOpField *
F) {
8824 return F->validate(*
this);
8835 const unsigned OrMask,
8836 const unsigned XorMask) {
8845bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &
Op,
const unsigned MinVal,
8846 const unsigned MaxVal,
8847 const Twine &ErrMsg, SMLoc &Loc) {
8864AMDGPUAsmParser::parseSwizzleOperands(
const unsigned OpNum, int64_t*
Op,
8865 const unsigned MinVal,
8866 const unsigned MaxVal,
8867 const StringRef ErrMsg) {
8869 for (
unsigned i = 0; i < OpNum; ++i) {
8870 if (!parseSwizzleOperand(
Op[i], MinVal, MaxVal, ErrMsg, Loc))
8878AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &
Imm) {
8879 using namespace llvm::AMDGPU::Swizzle;
8882 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
8883 "expected a 2-bit lane id")) {
8894AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &
Imm) {
8895 using namespace llvm::AMDGPU::Swizzle;
8901 if (!parseSwizzleOperand(GroupSize,
8903 "group size must be in the interval [2,32]",
8908 Error(Loc,
"group size must be a power of two");
8911 if (parseSwizzleOperand(LaneIdx,
8913 "lane id must be in the interval [0,group size - 1]",
8922AMDGPUAsmParser::parseSwizzleReverse(int64_t &
Imm) {
8923 using namespace llvm::AMDGPU::Swizzle;
8928 if (!parseSwizzleOperand(GroupSize,
8930 "group size must be in the interval [2,32]",
8935 Error(Loc,
"group size must be a power of two");
8944AMDGPUAsmParser::parseSwizzleSwap(int64_t &
Imm) {
8945 using namespace llvm::AMDGPU::Swizzle;
8950 if (!parseSwizzleOperand(GroupSize,
8952 "group size must be in the interval [1,16]",
8957 Error(Loc,
"group size must be a power of two");
8966AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &
Imm) {
8967 using namespace llvm::AMDGPU::Swizzle;
8974 SMLoc StrLoc = getLoc();
8975 if (!parseString(Ctl)) {
8978 if (Ctl.
size() != BITMASK_WIDTH) {
8979 Error(StrLoc,
"expected a 5-character mask");
8983 unsigned AndMask = 0;
8984 unsigned OrMask = 0;
8985 unsigned XorMask = 0;
8987 for (
size_t i = 0; i < Ctl.
size(); ++i) {
8991 Error(StrLoc,
"invalid mask");
9012bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &
Imm) {
9013 using namespace llvm::AMDGPU::Swizzle;
9016 Error(getLoc(),
"FFT mode swizzle not supported on this GPU");
9022 if (!parseSwizzleOperand(Swizzle, 0, FFT_SWIZZLE_MAX,
9023 "FFT swizzle must be in the interval [0," +
9024 Twine(FFT_SWIZZLE_MAX) + Twine(
']'),
9032bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &
Imm) {
9033 using namespace llvm::AMDGPU::Swizzle;
9036 Error(getLoc(),
"Rotate mode swizzle not supported on this GPU");
9043 if (!parseSwizzleOperand(
Direction, 0, 1,
9044 "direction must be 0 (left) or 1 (right)", Loc))
9048 if (!parseSwizzleOperand(
9049 RotateSize, 0, ROTATE_MAX_SIZE,
9050 "number of threads to rotate must be in the interval [0," +
9051 Twine(ROTATE_MAX_SIZE) + Twine(
']'),
9056 (RotateSize << ROTATE_SIZE_SHIFT);
9061AMDGPUAsmParser::parseSwizzleOffset(int64_t &
Imm) {
9063 SMLoc OffsetLoc = getLoc();
9069 Error(OffsetLoc,
"expected a 16-bit offset");
9076AMDGPUAsmParser::parseSwizzleMacro(int64_t &
Imm) {
9077 using namespace llvm::AMDGPU::Swizzle;
9081 SMLoc ModeLoc = getLoc();
9084 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
9085 Ok = parseSwizzleQuadPerm(
Imm);
9086 }
else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
9087 Ok = parseSwizzleBitmaskPerm(
Imm);
9088 }
else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
9089 Ok = parseSwizzleBroadcast(
Imm);
9090 }
else if (trySkipId(IdSymbolic[ID_SWAP])) {
9091 Ok = parseSwizzleSwap(
Imm);
9092 }
else if (trySkipId(IdSymbolic[ID_REVERSE])) {
9093 Ok = parseSwizzleReverse(
Imm);
9094 }
else if (trySkipId(IdSymbolic[ID_FFT])) {
9095 Ok = parseSwizzleFFT(
Imm);
9096 }
else if (trySkipId(IdSymbolic[ID_ROTATE])) {
9097 Ok = parseSwizzleRotate(
Imm);
9099 Error(ModeLoc,
"expected a swizzle mode");
9102 return Ok && skipToken(
AsmToken::RParen,
"expected a closing parentheses");
9108ParseStatus AMDGPUAsmParser::parseSwizzle(
OperandVector &Operands) {
9112 if (trySkipId(
"offset")) {
9116 if (trySkipId(
"swizzle")) {
9117 Ok = parseSwizzleMacro(
Imm);
9119 Ok = parseSwizzleOffset(
Imm);
9123 Operands.
push_back(AMDGPUOperand::CreateImm(
this,
Imm, S, AMDGPUOperand::ImmTySwizzle));
9131AMDGPUOperand::isSwizzle()
const {
9132 return isImmTy(ImmTySwizzle);
9139int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
9141 using namespace llvm::AMDGPU::VGPRIndexMode;
9153 for (
unsigned ModeId = ID_MIN; ModeId <=
ID_MAX; ++ModeId) {
9154 if (trySkipId(IdSymbolic[ModeId])) {
9162 "expected a VGPR index mode or a closing parenthesis" :
9163 "expected a VGPR index mode");
9168 Error(S,
"duplicate VGPR index mode");
9176 "expected a comma or a closing parenthesis"))
9183ParseStatus AMDGPUAsmParser::parseGPRIdxMode(
OperandVector &Operands) {
9185 using namespace llvm::AMDGPU::VGPRIndexMode;
9191 Imm = parseGPRIdxMacro();
9195 if (getParser().parseAbsoluteExpression(
Imm))
9198 return Error(S,
"invalid immediate: only 4-bit values are legal");
9202 AMDGPUOperand::CreateImm(
this,
Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
9206bool AMDGPUOperand::isGPRIdxMode()
const {
9207 return isImmTy(ImmTyGprIdxMode);
9214ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(
OperandVector &Operands) {
9219 if (isRegister() || isModifier())
9225 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.
size() - 1]);
9226 assert(Opr.isImm() || Opr.isExpr());
9227 SMLoc Loc = Opr.getStartLoc();
9231 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
9232 Error(Loc,
"expected an absolute expression or a label");
9233 }
else if (Opr.isImm() && !Opr.isS16Imm()) {
9234 Error(Loc,
"expected a 16-bit signed jump offset");
9244ParseStatus AMDGPUAsmParser::parseBoolReg(
OperandVector &Operands) {
9245 return parseReg(Operands);
9252void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
9255 OptionalImmIndexMap OptionalIdx;
9256 unsigned FirstOperandIdx = 1;
9257 bool IsAtomicReturn =
false;
9264 for (
unsigned i = FirstOperandIdx, e = Operands.
size(); i != e; ++i) {
9265 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
9269 Op.addRegOperands(Inst, 1);
9273 if (IsAtomicReturn && i == FirstOperandIdx)
9274 Op.addRegOperands(Inst, 1);
9279 if (
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
9280 Op.addImmOperands(Inst, 1);
9292 OptionalIdx[
Op.getImmTy()] = i;
9306bool AMDGPUOperand::isSMRDOffset8()
const {
9310bool AMDGPUOperand::isSMEMOffset()
const {
9312 return isImmLiteral();
9315bool AMDGPUOperand::isSMRDLiteralOffset()
const {
9350bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
9351 if (BoundCtrl == 0 || BoundCtrl == 1) {
9359void AMDGPUAsmParser::onBeginOfFile() {
9360 if (!getParser().getStreamer().getTargetStreamer() ||
9364 if (!getTargetStreamer().getTargetID())
9365 getTargetStreamer().initializeTargetID(getSTI(),
9366 getSTI().getFeatureString());
9369 getTargetStreamer().EmitDirectiveAMDGCNTarget();
9377bool AMDGPUAsmParser::parsePrimaryExpr(
const MCExpr *&Res, SMLoc &EndLoc) {
9381 StringRef TokenId = getTokenStr();
9382 AGVK VK = StringSwitch<AGVK>(TokenId)
9383 .Case(
"max", AGVK::AGVK_Max)
9384 .Case(
"or", AGVK::AGVK_Or)
9385 .Case(
"extrasgprs", AGVK::AGVK_ExtraSGPRs)
9386 .Case(
"totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
9387 .Case(
"alignto", AGVK::AGVK_AlignTo)
9388 .Case(
"occupancy", AGVK::AGVK_Occupancy)
9389 .Case(
"instprefsize", AGVK::AGVK_InstPrefSize)
9390 .Default(AGVK::AGVK_None);
9394 uint64_t CommaCount = 0;
9399 if (Exprs.
empty()) {
9401 "empty " + Twine(TokenId) +
" expression");
9404 if (CommaCount + 1 != Exprs.
size()) {
9406 "mismatch of commas in " + Twine(TokenId) +
" expression");
9413 if (getParser().parseExpression(Expr, EndLoc))
9417 if (LastTokenWasComma)
9421 "unexpected token in " + Twine(TokenId) +
" expression");
9427 return getParser().parsePrimaryExpr(Res, EndLoc,
nullptr);
9430ParseStatus AMDGPUAsmParser::parseOModSI(
OperandVector &Operands) {
9431 StringRef
Name = getTokenStr();
9432 if (Name ==
"mul") {
9433 return parseIntWithPrefix(
"mul", Operands,
9437 if (Name ==
"div") {
9438 return parseIntWithPrefix(
"div", Operands,
9449 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
9454 const AMDGPU::OpName
Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9455 AMDGPU::OpName::src2};
9463 int DstIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
9468 int ModIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0_modifiers);
9470 if (
DstOp.isReg() &&
9475 if ((OpSel & (1 << SrcNum)) != 0)
9481void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
9483 cvtVOP3P(Inst, Operands);
9487void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
const OperandVector &Operands,
9488 OptionalImmIndexMap &OptionalIdx) {
9489 cvtVOP3P(Inst, Operands, OptionalIdx);
9498 &&
Desc.NumOperands > (OpNum + 1)
9500 &&
Desc.operands()[OpNum + 1].RegClass != -1
9502 &&
Desc.getOperandConstraint(OpNum + 1,
9506void AMDGPUAsmParser::cvtOpSelHelper(MCInst &Inst,
unsigned OpSel) {
9508 constexpr AMDGPU::OpName
Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9509 AMDGPU::OpName::src2};
9510 constexpr AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9511 AMDGPU::OpName::src1_modifiers,
9512 AMDGPU::OpName::src2_modifiers};
9513 for (
int J = 0; J < 3; ++J) {
9514 int OpIdx = AMDGPU::getNamedOperandIdx(
Opc,
Ops[J]);
9520 int ModIdx = AMDGPU::getNamedOperandIdx(
Opc, ModOps[J]);
9523 if ((OpSel & (1 << J)) != 0)
9526 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && (OpSel & (1 << 3)) != 0)
9533void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst,
const OperandVector &Operands)
9535 OptionalImmIndexMap OptionalIdx;
9540 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9541 ((AMDGPUOperand &)*Operands[
I++]).addRegOperands(Inst, 1);
9544 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
9545 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[
I]);
9547 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9548 }
else if (
Op.isInterpSlot() ||
Op.isInterpAttr() ||
9549 Op.isInterpAttrChan()) {
9551 }
else if (
Op.isImmModifier()) {
9552 OptionalIdx[
Op.getImmTy()] =
I;
9560 AMDGPUOperand::ImmTyHigh);
9564 AMDGPUOperand::ImmTyClamp);
9568 AMDGPUOperand::ImmTyOModSI);
9573 AMDGPUOperand::ImmTyOpSel);
9574 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
9577 cvtOpSelHelper(Inst, OpSel);
9581void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst,
const OperandVector &Operands)
9583 OptionalImmIndexMap OptionalIdx;
9588 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9589 ((AMDGPUOperand &)*Operands[
I++]).addRegOperands(Inst, 1);
9592 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
9593 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[
I]);
9595 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9596 }
else if (
Op.isImmModifier()) {
9597 OptionalIdx[
Op.getImmTy()] =
I;
9605 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
9615 cvtOpSelHelper(Inst, OpSel);
9618void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
9620 OptionalImmIndexMap OptionalIdx;
9623 int CbszOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::cbsz);
9627 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J)
9628 static_cast<AMDGPUOperand &
>(*Operands[
I++]).addRegOperands(Inst, 1);
9630 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
9631 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*Operands[
I]);
9636 if (NumOperands == CbszOpIdx) {
9641 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9642 }
else if (
Op.isImmModifier()) {
9643 OptionalIdx[
Op.getImmTy()] =
I;
9645 Op.addRegOrImmOperands(Inst, 1);
9650 auto CbszIdx = OptionalIdx.find(AMDGPUOperand::ImmTyCBSZ);
9651 if (CbszIdx != OptionalIdx.end()) {
9652 int CbszVal = ((AMDGPUOperand &)*Operands[CbszIdx->second]).
getImm();
9656 int BlgpOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::blgp);
9657 auto BlgpIdx = OptionalIdx.find(AMDGPUOperand::ImmTyBLGP);
9658 if (BlgpIdx != OptionalIdx.end()) {
9659 int BlgpVal = ((AMDGPUOperand &)*Operands[BlgpIdx->second]).
getImm();
9670 auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel);
9671 if (OpselIdx != OptionalIdx.end()) {
9672 OpSel =
static_cast<const AMDGPUOperand &
>(*Operands[OpselIdx->second])
9676 unsigned OpSelHi = 0;
9677 auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi);
9678 if (OpselHiIdx != OptionalIdx.end()) {
9679 OpSelHi =
static_cast<const AMDGPUOperand &
>(*Operands[OpselHiIdx->second])
9682 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9683 AMDGPU::OpName::src1_modifiers};
9685 for (
unsigned J = 0; J < 2; ++J) {
9686 unsigned ModVal = 0;
9687 if (OpSel & (1 << J))
9689 if (OpSelHi & (1 << J))
9692 const int ModIdx = AMDGPU::getNamedOperandIdx(
Opc, ModOps[J]);
9697void AMDGPUAsmParser::cvtVOP3(MCInst &Inst,
const OperandVector &Operands,
9698 OptionalImmIndexMap &OptionalIdx) {
9703 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9704 ((AMDGPUOperand &)*Operands[
I++]).addRegOperands(Inst, 1);
9707 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
9708 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[
I]);
9710 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9711 }
else if (
Op.isImmModifier()) {
9712 OptionalIdx[
Op.getImmTy()] =
I;
9714 Op.addRegOrImmOperands(Inst, 1);
9720 AMDGPUOperand::ImmTyScaleSel);
9724 AMDGPUOperand::ImmTyClamp);
9730 AMDGPUOperand::ImmTyByteSel);
9735 AMDGPUOperand::ImmTyOModSI);
9742 auto *it = Inst.
begin();
9743 std::advance(it, AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2_modifiers));
9751void AMDGPUAsmParser::cvtVOP3(MCInst &Inst,
const OperandVector &Operands) {
9752 OptionalImmIndexMap OptionalIdx;
9753 cvtVOP3(Inst, Operands, OptionalIdx);
9756void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
const OperandVector &Operands,
9757 OptionalImmIndexMap &OptIdx) {
9763 if (
Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
9764 Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
9765 Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
9766 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
9767 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx11 ||
9768 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx11 ||
9769 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
9770 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12 ||
9771 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx13 ||
9772 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx13) {
9781 int VdstInIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
9782 if (VdstInIdx != -1 && VdstInIdx ==
static_cast<int>(Inst.
getNumOperands()))
9785 int BitOp3Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::bitop3);
9786 if (BitOp3Idx != -1) {
9793 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
9794 if (OpSelIdx != -1) {
9798 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel_hi);
9799 if (OpSelHiIdx != -1) {
9800 int DefaultVal =
IsPacked ? -1 : 0;
9806 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_a_fmt);
9807 if (MatrixAFMTIdx != -1) {
9809 AMDGPUOperand::ImmTyMatrixAFMT, 0);
9813 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_b_fmt);
9814 if (MatrixBFMTIdx != -1) {
9816 AMDGPUOperand::ImmTyMatrixBFMT, 0);
9819 int MatrixAScaleIdx =
9820 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_a_scale);
9821 if (MatrixAScaleIdx != -1) {
9823 AMDGPUOperand::ImmTyMatrixAScale, 0);
9826 int MatrixBScaleIdx =
9827 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_b_scale);
9828 if (MatrixBScaleIdx != -1) {
9830 AMDGPUOperand::ImmTyMatrixBScale, 0);
9833 int MatrixAScaleFmtIdx =
9834 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_a_scale_fmt);
9835 if (MatrixAScaleFmtIdx != -1) {
9837 AMDGPUOperand::ImmTyMatrixAScaleFmt, 0);
9840 int MatrixBScaleFmtIdx =
9841 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_b_scale_fmt);
9842 if (MatrixBScaleFmtIdx != -1) {
9844 AMDGPUOperand::ImmTyMatrixBScaleFmt, 0);
9849 AMDGPUOperand::ImmTyMatrixAReuse, 0);
9853 AMDGPUOperand::ImmTyMatrixBReuse, 0);
9855 int NegLoIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::neg_lo);
9859 int NegHiIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::neg_hi);
9863 const AMDGPU::OpName
Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9864 AMDGPU::OpName::src2};
9865 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9866 AMDGPU::OpName::src1_modifiers,
9867 AMDGPU::OpName::src2_modifiers};
9870 unsigned OpSelHi = 0;
9877 if (OpSelHiIdx != -1)
9886 for (
int J = 0; J < 3; ++J) {
9887 int OpIdx = AMDGPU::getNamedOperandIdx(
Opc,
Ops[J]);
9891 int ModIdx = AMDGPU::getNamedOperandIdx(
Opc, ModOps[J]);
9896 uint32_t ModVal = 0;
9899 if (SrcOp.
isReg() && getMRI()
9906 if ((OpSel & (1 << J)) != 0)
9910 if ((OpSelHi & (1 << J)) != 0)
9913 if ((NegLo & (1 << J)) != 0)
9916 if ((NegHi & (1 << J)) != 0)
9923void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
const OperandVector &Operands) {
9924 OptionalImmIndexMap OptIdx;
9925 cvtVOP3(Inst, Operands, OptIdx);
9926 cvtVOP3P(Inst, Operands, OptIdx);
9930 unsigned i,
unsigned Opc,
9932 if (AMDGPU::getNamedOperandIdx(
Opc,
OpName) != -1)
9933 ((AMDGPUOperand &)*
Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
9935 ((AMDGPUOperand &)*
Operands[i]).addRegOperands(Inst, 1);
9938void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst,
const OperandVector &Operands) {
9941 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
9944 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
9945 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1);
9947 OptionalImmIndexMap OptIdx;
9948 for (
unsigned i = 5; i < Operands.
size(); ++i) {
9949 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
9950 OptIdx[
Op.getImmTy()] = i;
9955 AMDGPUOperand::ImmTyIndexKey8bit);
9959 AMDGPUOperand::ImmTyIndexKey16bit);
9963 AMDGPUOperand::ImmTyIndexKey32bit);
9968 cvtVOP3P(Inst, Operands, OptIdx);
9975ParseStatus AMDGPUAsmParser::parseVOPD(
OperandVector &Operands) {
9983 Operands.
push_back(AMDGPUOperand::CreateToken(
this,
"::", S));
9984 SMLoc OpYLoc = getLoc();
9987 Operands.
push_back(AMDGPUOperand::CreateToken(
this, OpYName, OpYLoc));
9990 return Error(OpYLoc,
"expected a VOPDY instruction after ::");
9996void AMDGPUAsmParser::cvtVOPD(MCInst &Inst,
const OperandVector &Operands) {
9999 auto addOp = [&](uint16_t ParsedOprIdx) {
10000 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
10002 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
10006 Op.addRegOperands(Inst, 1);
10010 Op.addImmOperands(Inst, 1);
10022 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
10026 const auto &CInfo = InstInfo[CompIdx];
10027 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
10028 for (
unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
10029 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
10030 if (CInfo.hasSrc2Acc())
10031 addOp(CInfo.getIndexOfDstInParsedOperands());
10035 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::bitop3);
10036 if (BitOp3Idx != -1) {
10037 OptionalImmIndexMap OptIdx;
10038 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands.
back());
10040 OptIdx[
Op.getImmTy()] = Operands.
size() - 1;
10050bool AMDGPUOperand::isDPP8()
const {
10051 return isImmTy(ImmTyDPP8);
10054bool AMDGPUOperand::isDPPCtrl()
const {
10055 using namespace AMDGPU::DPP;
10057 bool result = isImm() && getImmTy() == ImmTyDppCtrl &&
isUInt<9>(
getImm());
10060 return (
Imm >= DppCtrl::QUAD_PERM_FIRST &&
Imm <= DppCtrl::QUAD_PERM_LAST) ||
10061 (
Imm >= DppCtrl::ROW_SHL_FIRST &&
Imm <= DppCtrl::ROW_SHL_LAST) ||
10062 (
Imm >= DppCtrl::ROW_SHR_FIRST &&
Imm <= DppCtrl::ROW_SHR_LAST) ||
10063 (
Imm >= DppCtrl::ROW_ROR_FIRST &&
Imm <= DppCtrl::ROW_ROR_LAST) ||
10064 (
Imm == DppCtrl::WAVE_SHL1) ||
10065 (
Imm == DppCtrl::WAVE_ROL1) ||
10066 (
Imm == DppCtrl::WAVE_SHR1) ||
10067 (
Imm == DppCtrl::WAVE_ROR1) ||
10068 (
Imm == DppCtrl::ROW_MIRROR) ||
10069 (
Imm == DppCtrl::ROW_HALF_MIRROR) ||
10070 (
Imm == DppCtrl::BCAST15) ||
10071 (
Imm == DppCtrl::BCAST31) ||
10072 (
Imm >= DppCtrl::ROW_SHARE_FIRST &&
Imm <= DppCtrl::ROW_SHARE_LAST) ||
10073 (
Imm >= DppCtrl::ROW_XMASK_FIRST &&
Imm <= DppCtrl::ROW_XMASK_LAST);
10082bool AMDGPUOperand::isBLGP()
const {
10086bool AMDGPUOperand::isS16Imm()
const {
10090bool AMDGPUOperand::isU16Imm()
const {
10098bool AMDGPUAsmParser::parseDimId(
unsigned &Encoding) {
10103 SMLoc Loc =
getToken().getEndLoc();
10104 Token = std::string(getTokenStr());
10106 if (getLoc() != Loc)
10111 if (!parseId(Suffix))
10115 StringRef DimId = Token;
10126ParseStatus AMDGPUAsmParser::parseDim(
OperandVector &Operands) {
10130 SMLoc S = getLoc();
10136 SMLoc Loc = getLoc();
10137 if (!parseDimId(Encoding))
10138 return Error(Loc,
"invalid dim value");
10140 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Encoding, S,
10141 AMDGPUOperand::ImmTyDim));
10149ParseStatus AMDGPUAsmParser::parseDPP8(
OperandVector &Operands) {
10150 SMLoc S = getLoc();
10159 if (!skipToken(
AsmToken::LBrac,
"expected an opening square bracket"))
10162 for (
size_t i = 0; i < 8; ++i) {
10166 SMLoc Loc = getLoc();
10167 if (getParser().parseAbsoluteExpression(Sels[i]))
10169 if (0 > Sels[i] || 7 < Sels[i])
10170 return Error(Loc,
"expected a 3-bit value");
10173 if (!skipToken(
AsmToken::RBrac,
"expected a closing square bracket"))
10177 for (
size_t i = 0; i < 8; ++i)
10178 DPP8 |= (Sels[i] << (i * 3));
10180 Operands.
push_back(AMDGPUOperand::CreateImm(
this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
10185AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
10187 if (Ctrl ==
"row_newbcast")
10190 if (Ctrl ==
"row_share" ||
10191 Ctrl ==
"row_xmask")
10194 if (Ctrl ==
"wave_shl" ||
10195 Ctrl ==
"wave_shr" ||
10196 Ctrl ==
"wave_rol" ||
10197 Ctrl ==
"wave_ror" ||
10198 Ctrl ==
"row_bcast")
10201 return Ctrl ==
"row_mirror" ||
10202 Ctrl ==
"row_half_mirror" ||
10203 Ctrl ==
"quad_perm" ||
10204 Ctrl ==
"row_shl" ||
10205 Ctrl ==
"row_shr" ||
10210AMDGPUAsmParser::parseDPPCtrlPerm() {
10213 if (!skipToken(
AsmToken::LBrac,
"expected an opening square bracket"))
10217 for (
int i = 0; i < 4; ++i) {
10222 SMLoc Loc = getLoc();
10223 if (getParser().parseAbsoluteExpression(Temp))
10225 if (Temp < 0 || Temp > 3) {
10226 Error(Loc,
"expected a 2-bit value");
10230 Val += (Temp << i * 2);
10233 if (!skipToken(
AsmToken::RBrac,
"expected a closing square bracket"))
10240AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
10241 using namespace AMDGPU::DPP;
10246 SMLoc Loc = getLoc();
10248 if (getParser().parseAbsoluteExpression(Val))
10251 struct DppCtrlCheck {
10257 DppCtrlCheck
Check = StringSwitch<DppCtrlCheck>(Ctrl)
10258 .Case(
"wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
10259 .Case(
"wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
10260 .Case(
"wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
10261 .Case(
"wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
10262 .Case(
"row_shl", {DppCtrl::ROW_SHL0, 1, 15})
10263 .Case(
"row_shr", {DppCtrl::ROW_SHR0, 1, 15})
10264 .Case(
"row_ror", {DppCtrl::ROW_ROR0, 1, 15})
10265 .Case(
"row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
10266 .Case(
"row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
10267 .Case(
"row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
10271 if (
Check.Ctrl == -1) {
10272 Valid = (
Ctrl ==
"row_bcast" && (Val == 15 || Val == 31));
10280 Error(Loc, Twine(
"invalid ", Ctrl) + Twine(
" value"));
10287ParseStatus AMDGPUAsmParser::parseDPPCtrl(
OperandVector &Operands) {
10288 using namespace AMDGPU::DPP;
10291 !isSupportedDPPCtrl(getTokenStr(), Operands))
10294 SMLoc S = getLoc();
10300 if (Ctrl ==
"row_mirror") {
10301 Val = DppCtrl::ROW_MIRROR;
10302 }
else if (Ctrl ==
"row_half_mirror") {
10303 Val = DppCtrl::ROW_HALF_MIRROR;
10306 if (Ctrl ==
"quad_perm") {
10307 Val = parseDPPCtrlPerm();
10309 Val = parseDPPCtrlSel(Ctrl);
10318 AMDGPUOperand::CreateImm(
this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
10322void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst,
const OperandVector &Operands,
10324 OptionalImmIndexMap OptionalIdx;
10331 int OldIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::old);
10333 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2_modifiers);
10334 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
10338 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
10339 ((AMDGPUOperand &)*Operands[
I++]).addRegOperands(Inst, 1);
10343 int VdstInIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
10344 bool IsVOP3CvtSrDpp =
Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
10345 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx13 ||
10346 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
10347 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx13 ||
10348 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
10349 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx13 ||
10350 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
10351 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx13;
10353 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
10357 if (OldIdx == NumOperands) {
10359 constexpr int DST_IDX = 0;
10361 }
else if (Src2ModIdx == NumOperands) {
10371 if (IsVOP3CvtSrDpp) {
10380 if (TiedTo != -1) {
10385 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[
I]);
10387 if (IsDPP8 &&
Op.isDppFI()) {
10390 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
10391 }
else if (
Op.isReg()) {
10392 Op.addRegOperands(Inst, 1);
10393 }
else if (
Op.isImm() &&
10395 Op.addImmOperands(Inst, 1);
10396 }
else if (
Op.isImm()) {
10397 OptionalIdx[
Op.getImmTy()] =
I;
10405 AMDGPUOperand::ImmTyClamp);
10411 AMDGPUOperand::ImmTyByteSel);
10418 cvtVOP3P(Inst, Operands, OptionalIdx);
10420 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
10427 using namespace llvm::AMDGPU::DPP;
10437 AMDGPUOperand::ImmTyDppFI);
10441void AMDGPUAsmParser::cvtDPP(MCInst &Inst,
const OperandVector &Operands,
bool IsDPP8) {
10442 OptionalImmIndexMap OptionalIdx;
10446 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
10447 ((AMDGPUOperand &)*Operands[
I++]).addRegOperands(Inst, 1);
10451 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
10454 if (TiedTo != -1) {
10459 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[
I]);
10461 if (
Op.isReg() && validateVccOperand(
Op.getReg())) {
10469 Op.addImmOperands(Inst, 1);
10471 Op.addRegWithFPInputModsOperands(Inst, 2);
10472 }
else if (
Op.isDppFI()) {
10474 }
else if (
Op.isReg()) {
10475 Op.addRegOperands(Inst, 1);
10481 Op.addRegWithFPInputModsOperands(Inst, 2);
10482 }
else if (
Op.isReg()) {
10483 Op.addRegOperands(Inst, 1);
10484 }
else if (
Op.isDPPCtrl()) {
10485 Op.addImmOperands(Inst, 1);
10486 }
else if (
Op.isImm()) {
10488 OptionalIdx[
Op.getImmTy()] =
I;
10496 using namespace llvm::AMDGPU::DPP;
10504 AMDGPUOperand::ImmTyDppFI);
10513ParseStatus AMDGPUAsmParser::parseSDWASel(
OperandVector &Operands,
10515 AMDGPUOperand::ImmTy
Type) {
10516 return parseStringOrIntWithPrefix(
10518 {
"BYTE_0",
"BYTE_1",
"BYTE_2",
"BYTE_3",
"WORD_0",
"WORD_1",
"DWORD"},
10522ParseStatus AMDGPUAsmParser::parseSDWADstUnused(
OperandVector &Operands) {
10523 return parseStringOrIntWithPrefix(
10524 Operands,
"dst_unused", {
"UNUSED_PAD",
"UNUSED_SEXT",
"UNUSED_PRESERVE"},
10525 AMDGPUOperand::ImmTySDWADstUnused);
10528void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst,
const OperandVector &Operands) {
10529 cvtSDWA(Inst, Operands, SDWAInstType::VOP1);
10532void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst,
const OperandVector &Operands) {
10533 cvtSDWA(Inst, Operands, SDWAInstType::VOP2);
10536void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst,
const OperandVector &Operands) {
10537 cvtSDWA(Inst, Operands, SDWAInstType::VOP2,
true,
true);
10540void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst,
const OperandVector &Operands) {
10541 cvtSDWA(Inst, Operands, SDWAInstType::VOP2,
false,
true);
10544void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst,
const OperandVector &Operands) {
10545 cvtSDWA(Inst, Operands, SDWAInstType::VOPC,
isVI());
10548void AMDGPUAsmParser::cvtSDWA(MCInst &Inst,
const OperandVector &Operands,
10549 SDWAInstType BasicInstType,
bool SkipDstVcc,
10551 using namespace llvm::AMDGPU::SDWA;
10553 OptionalImmIndexMap OptionalIdx;
10554 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
10555 bool SkippedVcc =
false;
10559 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
10560 ((AMDGPUOperand &)*Operands[
I++]).addRegOperands(Inst, 1);
10563 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
10564 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[
I]);
10565 if (SkipVcc && !SkippedVcc &&
Op.isReg() &&
10566 (
Op.getReg() == AMDGPU::VCC ||
Op.getReg() == AMDGPU::VCC_LO)) {
10572 if (BasicInstType == SDWAInstType::VOP2 &&
10578 if (BasicInstType == SDWAInstType::VOPC && Inst.
getNumOperands() == 0) {
10584 Op.addRegOrImmWithInputModsOperands(Inst, 2);
10585 }
else if (
Op.isImm()) {
10587 OptionalIdx[
Op.getImmTy()] =
I;
10591 SkippedVcc =
false;
10595 if (
Opc != AMDGPU::V_NOP_sdwa_gfx10 &&
Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
10596 Opc != AMDGPU::V_NOP_sdwa_vi) {
10598 switch (BasicInstType) {
10599 case SDWAInstType::VOP1:
10602 AMDGPUOperand::ImmTyClamp, 0);
10606 AMDGPUOperand::ImmTyOModSI, 0);
10610 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10614 AMDGPUOperand::ImmTySDWADstUnused,
10615 DstUnused::UNUSED_PRESERVE);
10617 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10620 case SDWAInstType::VOP2:
10622 AMDGPUOperand::ImmTyClamp, 0);
10627 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10628 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
10629 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10630 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10633 case SDWAInstType::VOPC:
10636 AMDGPUOperand::ImmTyClamp, 0);
10637 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10638 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10645 if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
10646 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
10647 auto *it = Inst.
begin();
10649 it, AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::src2));
10661#define GET_MATCHER_IMPLEMENTATION
10662#define GET_MNEMONIC_SPELL_CHECKER
10663#define GET_MNEMONIC_CHECKER
10664#include "AMDGPUGenAsmMatcher.inc"
10670 return parseTokenOp(
"addr64",
Operands);
10672 return parseNamedBit(
"done",
Operands, AMDGPUOperand::ImmTyDone,
true);
10674 return parseTokenOp(
"idxen",
Operands);
10676 return parseNamedBit(
"lds",
Operands, AMDGPUOperand::ImmTyLDS,
10679 return parseTokenOp(
"offen",
Operands);
10681 return parseTokenOp(
"off",
Operands);
10682 case MCK_row_95_en:
10683 return parseNamedBit(
"row_en",
Operands, AMDGPUOperand::ImmTyRowEn,
true);
10685 return parseNamedBit(
"gds",
Operands, AMDGPUOperand::ImmTyGDS);
10687 return parseNamedBit(
"tfe",
Operands, AMDGPUOperand::ImmTyTFE);
10689 return tryCustomParseOperand(
Operands, MCK);
10694unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &
Op,
10700 AMDGPUOperand &Operand = (AMDGPUOperand&)
Op;
10703 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
10705 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
10707 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
10709 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
10711 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
10713 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
10715 return Operand.isDone() ? Match_Success : Match_InvalidOperand;
10716 case MCK_row_95_en:
10717 return Operand.isRowEn() ? Match_Success : Match_InvalidOperand;
10725 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
10727 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
10728 case MCK_SOPPBrTarget:
10729 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
10730 case MCK_VReg32OrOff:
10731 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
10732 case MCK_InterpSlot:
10733 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
10734 case MCK_InterpAttr:
10735 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
10736 case MCK_InterpAttrChan:
10737 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
10739 case MCK_SReg_64_XEXEC:
10749 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
10751 return Match_InvalidOperand;
10759ParseStatus AMDGPUAsmParser::parseEndpgm(
OperandVector &Operands) {
10760 SMLoc S = getLoc();
10769 return Error(S,
"expected a 16-bit value");
10772 AMDGPUOperand::CreateImm(
this,
Imm, S, AMDGPUOperand::ImmTyEndpgm));
10776bool AMDGPUOperand::isEndpgm()
const {
return isImmTy(ImmTyEndpgm); }
10782bool AMDGPUOperand::isSplitBarrier()
const {
return isInlinableImm(MVT::i32); }
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
SmallVector< int16_t, MAX_SRC_OPERANDS_NUM > OperandIndices
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, AMDGPU::OpName OpName)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name, const MCRegisterInfo *MRI)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static bool IsRevOpcode(const unsigned Opcode)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static MCRegister getSpecialRegForName(StringRef RegName)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0, std::optional< unsigned > InsertAt=std::nullopt)
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
Enums shared between the AMDGPU backend (LLVM) and the ELF linker (LLD) for the .amdgpu....
AMDHSA kernel descriptor definitions.
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_EXTERNAL_VISIBILITY
static llvm::Expected< InlineInfo > decode(GsymDataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
static Value * getOpcode(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Loop::LoopBounds::Direction Direction
Register const TargetRegisterInfo * TRI
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file implements the SmallBitVector class.
StringSet - A set-like wrapper for the StringMap.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
static const char * getRegisterName(MCRegister Reg)
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
static const fltSemantics & IEEEsingle()
static const fltSemantics & BFloat()
static const fltSemantics & IEEEdouble()
static constexpr roundingMode rmNearestTiesToEven
static const fltSemantics & IEEEhalf()
opStatus
IEEE-754R 7: Default exception handling.
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
bool is(TokenKind K) const
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
void printExpr(raw_ostream &, const MCExpr &) const
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Context object for machine code objects.
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Instances of this class represent a single low-level machine instruction.
unsigned getNumOperands() const
unsigned getOpcode() const
iterator insert(iterator I, const MCOperand &Op)
void addOperand(const MCOperand Op)
const MCOperand & getOperand(unsigned i) const
Describe properties that are true of each instruction in the target description file.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
int16_t getOpRegClassID(const MCOperandInfo &OpInfo, unsigned HwModeId) const
Return the ID of the register class to use for OpInfo, for the active HwMode HwModeId.
Instances of this class represent operands of the MCInst class.
static MCOperand createExpr(const MCExpr *Val)
static MCOperand createReg(MCRegister Reg)
static MCOperand createImm(int64_t Val)
void setReg(MCRegister Reg)
Set the register number.
MCRegister getReg() const
Returns the register number.
const MCExpr * getExpr() const
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
bool regsOverlap(MCRegister RegA, MCRegister RegB) const
Returns true if the two registers are equal or alias each other.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Wrapper class representing physical registers. Should be passed by value.
constexpr bool isValid() const
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
Generic base class for all target subtargets.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
bool isVariable() const
isVariable - Check if this is a variable symbol.
LLVM_ABI void setVariableValue(const MCExpr *Value)
void setRedefinable(bool Value)
Mark this symbol as redefinable.
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
MCTargetAsmParser - Generic interface to target specific assembly parsers.
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
constexpr unsigned id() const
Represents a location in source code.
static SMLoc getFromPointer(const char *Ptr)
constexpr const char * getPointer() const
constexpr bool isValid() const
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Represent a constant reference to a string, i.e.
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
constexpr size_t size() const
Get the string size.
constexpr const char * data() const
Get a pointer to the start of the string (which may not be null terminated).
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
bool consume_front(char Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
bool contains(StringRef key) const
Check if the set contains the given key.
std::pair< typename Base::iterator, bool > insert(StringRef key)
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
std::pair< iterator, bool > insert(const ValueT &V)
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
static constexpr CustomOperand Operands[]
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
unsigned getAddressableNumSGPRs(const MCSubtargetInfo &STI)
unsigned getVGPREncodingGranule(const MCSubtargetInfo &STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo &STI)
unsigned getLocalMemorySize(const MCSubtargetInfo &STI)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
ArrayRef< GFXVersion > getGFXVersions()
constexpr unsigned COMPONENTS[]
constexpr const char *const ModMatrixFmt[]
constexpr const char *const ModMatrixScaleFmt[]
constexpr const char *const ModMatrixScale[]
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
bool isInlineValue(MCRegister Reg)
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
FuncInfoFlags
Per-function flags packed into INFO_FLAGS entries.
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
unsigned getTemporalHintType(const MCInstrDesc TID)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY bool isLitExpr(const MCExpr *Expr)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX13(const MCSubtargetInfo &STI)
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool hasMAIInsts(const MCSubtargetInfo &STI)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX13Plus(const MCSubtargetInfo &STI)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
LLVM_READONLY int64_t getLitValue(const MCExpr *Expr)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
@ OPERAND_REG_IMM_V2FP16_SPLAT
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool isGFX1250(const MCSubtargetInfo &STI)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isGFX1250Plus(const MCSubtargetInfo &STI)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ UNDEF
UNDEF - An undefined node.
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
@ Valid
The data is already valid.
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
Context & getContext() const
This is an optimization pass for GlobalISel generic memory operations.
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
FunctionAddr VTableAddr Value
StringMapEntry< Value * > ValueName
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
static bool isMem(const MachineInstr &MI, unsigned Op)
LLVM_ABI std::pair< StringRef, StringRef > getToken(StringRef Source, StringRef Delimiters=" \t\n\v\f\r")
getToken - This function extracts one token from source, ignoring any leading characters that appear ...
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
void PrintError(const Twine &Msg)
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
FunctionAddr VTableAddr uintptr_t uintptr_t DataSize
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Target & getTheR600Target()
The target for R600 GPUs.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
SmallVectorImpl< std::unique_ptr< MCParsedAsmOperand > > OperandVector
FunctionAddr VTableAddr uintptr_t uintptr_t Version
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Sub
Subtraction of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
@ Enabled
Convert any .debug_str_offsets tables to DWARF64 if needed.
@ Default
The result value is uniform if and only if all operands are uniform.
void initDefault(const MCSubtargetInfo &STI, MCContext &Ctx, bool InitMCExpr=true)
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
uint32_t PrivateSegmentSize
SmallVector< std::pair< MCSymbol *, std::string >, 4 > IndirectCalls
SmallVector< std::pair< MCSymbol *, MCSymbol * >, 8 > Calls
SmallVector< FuncInfo, 8 > Funcs
SmallVector< std::pair< MCSymbol *, std::string >, 4 > TypeIds
SmallVector< std::pair< MCSymbol *, MCSymbol * >, 4 > Uses
Instruction set architecture version.
const MCExpr * compute_pgm_rsrc2
const MCExpr * kernarg_size
const MCExpr * kernarg_preload
const MCExpr * compute_pgm_rsrc3
const MCExpr * private_segment_fixed_size
const MCExpr * compute_pgm_rsrc1
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
const MCExpr * group_segment_fixed_size
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
const MCExpr * kernel_code_properties
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...
uint32_t group_segment_fixed_size
uint32_t private_segment_fixed_size