57enum RegisterKind { IS_UNKNOWN,
IS_VGPR, IS_SGPR,
IS_AGPR, IS_TTMP, IS_SPECIAL };
71 SMLoc StartLoc, EndLoc;
72 const AMDGPUAsmParser *AsmParser;
75 AMDGPUOperand(KindTy Kind_,
const AMDGPUAsmParser *AsmParser_)
76 : Kind(Kind_), AsmParser(AsmParser_) {}
78 using Ptr = std::unique_ptr<AMDGPUOperand>;
86 bool hasFPModifiers()
const {
return Abs || Neg; }
87 bool hasIntModifiers()
const {
return Sext; }
88 bool hasModifiers()
const {
return hasFPModifiers() || hasIntModifiers(); }
89 bool isForcedLit()
const {
return Lit == LitModifier::Lit; }
90 bool isForcedLit64()
const {
return Lit == LitModifier::Lit64; }
92 int64_t getFPModifiersOperand()
const {
99 int64_t getIntModifiersOperand()
const {
105 int64_t getModifiersOperand()
const {
106 assert(!(hasFPModifiers() && hasIntModifiers())
107 &&
"fp and int modifiers should not be used simultaneously");
108 if (hasFPModifiers())
109 return getFPModifiersOperand();
110 if (hasIntModifiers())
111 return getIntModifiersOperand();
115 friend raw_ostream &
operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
189 ImmTyMatrixAScaleFmt,
190 ImmTyMatrixBScaleFmt,
223 mutable int MCOpIdx = -1;
226 bool isToken()
const override {
return Kind == Token; }
228 bool isSymbolRefExpr()
const {
232 bool isImm()
const override {
233 return Kind == Immediate;
236 bool isInlinableImm(MVT type)
const;
237 bool isLiteralImm(MVT type)
const;
239 bool isRegKind()
const {
240 return Kind == Register;
243 bool isReg()
const override {
244 return isRegKind() && !hasModifiers();
247 bool isRegOrInline(
unsigned RCID, MVT type)
const {
248 return isRegClass(RCID) || isInlinableImm(type);
252 return isRegOrInline(RCID, type) || isLiteralImm(type);
255 bool isRegOrImmWithInt16InputMods()
const {
259 template <
bool IsFake16>
bool isRegOrImmWithIntT16InputMods()
const {
261 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
264 bool isRegOrImmWithInt32InputMods()
const {
268 bool isRegOrInlineImmWithInt16InputMods()
const {
269 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
272 template <
bool IsFake16>
bool isRegOrInlineImmWithIntT16InputMods()
const {
273 return isRegOrInline(
274 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
277 bool isRegOrInlineImmWithInt32InputMods()
const {
278 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
281 bool isRegOrImmWithInt64InputMods()
const {
285 bool isRegOrImmWithFP16InputMods()
const {
289 template <
bool IsFake16>
bool isRegOrImmWithFPT16InputMods()
const {
291 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
294 bool isRegOrImmWithFP32InputMods()
const {
298 bool isRegOrImmWithFP64InputMods()
const {
302 template <
bool IsFake16>
bool isRegOrInlineImmWithFP16InputMods()
const {
303 return isRegOrInline(
304 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
307 bool isRegOrInlineImmWithFP32InputMods()
const {
308 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
311 bool isRegOrInlineImmWithFP64InputMods()
const {
312 return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64);
315 bool isVRegWithInputMods(
unsigned RCID)
const {
return isRegClass(RCID); }
317 bool isVRegWithFP32InputMods()
const {
318 return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID);
321 bool isVRegWithFP64InputMods()
const {
322 return isVRegWithInputMods(AMDGPU::VReg_64RegClassID);
325 bool isPackedFP16InputMods()
const {
329 bool isPackedVGPRFP32InputMods()
const {
333 bool isVReg()
const {
334 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
335 isRegClass(AMDGPU::VReg_64RegClassID) ||
336 isRegClass(AMDGPU::VReg_96RegClassID) ||
337 isRegClass(AMDGPU::VReg_128RegClassID) ||
338 isRegClass(AMDGPU::VReg_160RegClassID) ||
339 isRegClass(AMDGPU::VReg_192RegClassID) ||
340 isRegClass(AMDGPU::VReg_256RegClassID) ||
341 isRegClass(AMDGPU::VReg_512RegClassID) ||
342 isRegClass(AMDGPU::VReg_1024RegClassID);
345 bool isVReg32()
const {
346 return isRegClass(AMDGPU::VGPR_32RegClassID);
349 bool isVReg32OrOff()
const {
350 return isOff() || isVReg32();
354 return isRegKind() &&
getReg() == AMDGPU::SGPR_NULL;
357 bool isAV_LdSt_32_Align2_RegOp()
const {
358 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
359 isRegClass(AMDGPU::AGPR_32RegClassID);
362 bool isVRegWithInputMods()
const;
363 template <
bool IsFake16>
bool isT16_Lo128VRegWithInputMods()
const;
364 template <
bool IsFake16>
bool isT16VRegWithInputMods()
const;
366 bool isSDWAOperand(MVT type)
const;
367 bool isSDWAFP16Operand()
const;
368 bool isSDWAFP32Operand()
const;
369 bool isSDWAInt16Operand()
const;
370 bool isSDWAInt32Operand()
const;
372 bool isImmTy(ImmTy ImmT)
const {
373 return isImm() &&
Imm.Type == ImmT;
376 template <ImmTy Ty>
bool isImmTy()
const {
return isImmTy(Ty); }
378 bool isImmLiteral()
const {
return isImmTy(ImmTyNone); }
380 bool isImmModifier()
const {
381 return isImm() &&
Imm.Type != ImmTyNone;
384 bool isOModSI()
const {
return isImmTy(ImmTyOModSI); }
385 bool isDim()
const {
return isImmTy(ImmTyDim); }
386 bool isR128A16()
const {
return isImmTy(ImmTyR128A16); }
387 bool isOff()
const {
return isImmTy(ImmTyOff); }
388 bool isExpTgt()
const {
return isImmTy(ImmTyExpTgt); }
389 bool isOffen()
const {
return isImmTy(ImmTyOffen); }
390 bool isIdxen()
const {
return isImmTy(ImmTyIdxen); }
391 bool isAddr64()
const {
return isImmTy(ImmTyAddr64); }
392 bool isSMEMOffsetMod()
const {
return isImmTy(ImmTySMEMOffsetMod); }
393 bool isFlatOffset()
const {
return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
394 bool isGDS()
const {
return isImmTy(ImmTyGDS); }
395 bool isLDS()
const {
return isImmTy(ImmTyLDS); }
396 bool isCPol()
const {
return isImmTy(ImmTyCPol); }
397 bool isIndexKey8bit()
const {
return isImmTy(ImmTyIndexKey8bit); }
398 bool isIndexKey16bit()
const {
return isImmTy(ImmTyIndexKey16bit); }
399 bool isIndexKey32bit()
const {
return isImmTy(ImmTyIndexKey32bit); }
400 bool isMatrixAFMT()
const {
return isImmTy(ImmTyMatrixAFMT); }
401 bool isMatrixBFMT()
const {
return isImmTy(ImmTyMatrixBFMT); }
402 bool isMatrixAScale()
const {
return isImmTy(ImmTyMatrixAScale); }
403 bool isMatrixBScale()
const {
return isImmTy(ImmTyMatrixBScale); }
404 bool isMatrixAScaleFmt()
const {
return isImmTy(ImmTyMatrixAScaleFmt); }
405 bool isMatrixBScaleFmt()
const {
return isImmTy(ImmTyMatrixBScaleFmt); }
406 bool isMatrixAReuse()
const {
return isImmTy(ImmTyMatrixAReuse); }
407 bool isMatrixBReuse()
const {
return isImmTy(ImmTyMatrixBReuse); }
408 bool isTFE()
const {
return isImmTy(ImmTyTFE); }
409 bool isFORMAT()
const {
return isImmTy(ImmTyFORMAT) &&
isUInt<7>(
getImm()); }
410 bool isDppFI()
const {
return isImmTy(ImmTyDppFI); }
411 bool isSDWADstSel()
const {
return isImmTy(ImmTySDWADstSel); }
412 bool isSDWASrc0Sel()
const {
return isImmTy(ImmTySDWASrc0Sel); }
413 bool isSDWASrc1Sel()
const {
return isImmTy(ImmTySDWASrc1Sel); }
414 bool isSDWADstUnused()
const {
return isImmTy(ImmTySDWADstUnused); }
415 bool isInterpSlot()
const {
return isImmTy(ImmTyInterpSlot); }
416 bool isInterpAttr()
const {
return isImmTy(ImmTyInterpAttr); }
417 bool isInterpAttrChan()
const {
return isImmTy(ImmTyInterpAttrChan); }
418 bool isOpSel()
const {
return isImmTy(ImmTyOpSel); }
419 bool isOpSelHi()
const {
return isImmTy(ImmTyOpSelHi); }
420 bool isNegLo()
const {
return isImmTy(ImmTyNegLo); }
421 bool isNegHi()
const {
return isImmTy(ImmTyNegHi); }
422 bool isBitOp3()
const {
return isImmTy(ImmTyBitOp3) &&
isUInt<8>(
getImm()); }
423 bool isDone()
const {
return isImmTy(ImmTyDone); }
424 bool isRowEn()
const {
return isImmTy(ImmTyRowEn); }
426 bool isRegOrImm()
const {
427 return isReg() || isImm();
430 bool isRegClass(
unsigned RCID)
const;
434 bool isRegOrInlineNoMods(
unsigned RCID, MVT type)
const {
435 return isRegOrInline(RCID, type) && !hasModifiers();
438 bool isSCSrcB16()
const {
439 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
442 bool isSCSrcV2B16()
const {
446 bool isSCSrc_b32()
const {
447 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
450 bool isSCSrc_b64()
const {
451 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
454 bool isBoolReg()
const;
456 bool isSCSrcF16()
const {
457 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
460 bool isSCSrcV2F16()
const {
464 bool isSCSrcF32()
const {
465 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
468 bool isSCSrcF64()
const {
469 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
472 bool isSSrc_b32()
const {
473 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
476 bool isSSrc_b16()
const {
return isSCSrcB16() || isLiteralImm(MVT::i16); }
478 bool isSSrcV2B16()
const {
483 bool isSSrc_b64()
const {
486 return isSCSrc_b64() || isLiteralImm(MVT::i64) ||
487 (((
const MCTargetAsmParser *)AsmParser)
488 ->getAvailableFeatures()[AMDGPU::Feature64BitLiterals] &&
492 bool isSSrc_f32()
const {
493 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
496 bool isSSrcF64()
const {
return isSCSrc_b64() || isLiteralImm(MVT::f64); }
498 bool isSSrc_bf16()
const {
return isSCSrcB16() || isLiteralImm(MVT::bf16); }
500 bool isSSrc_f16()
const {
return isSCSrcB16() || isLiteralImm(MVT::f16); }
502 bool isSSrcV2F16()
const {
507 bool isSSrcV2FP32()
const {
512 bool isSCSrcV2FP32()
const {
517 bool isSSrcV2INT32()
const {
522 bool isSCSrcV2INT32()
const {
524 return isSCSrc_b32();
527 bool isSSrcOrLds_b32()
const {
528 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
529 isLiteralImm(MVT::i32) || isExpr();
532 bool isVCSrc_b32()
const {
533 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
536 bool isVCSrc_b32_Lo256()
const {
537 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo256RegClassID, MVT::i32);
540 bool isVCSrc_b64_Lo256()
const {
541 return isRegOrInlineNoMods(AMDGPU::VS_64_Lo256RegClassID, MVT::i64);
544 bool isVCSrc_b64()
const {
545 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
548 bool isVCSrcT_b16()
const {
549 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
552 bool isVCSrcTB16_Lo128()
const {
553 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
556 bool isVCSrcFake16B16_Lo128()
const {
557 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
560 bool isVCSrc_b16()
const {
561 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
564 bool isVCSrc_v2b16()
const {
return isVCSrc_b16(); }
566 bool isVCSrc_f32()
const {
567 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
570 bool isVCSrc_f64()
const {
571 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
574 bool isVCSrcTBF16()
const {
575 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
578 bool isVCSrcT_f16()
const {
579 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
582 bool isVCSrcT_bf16()
const {
583 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
586 bool isVCSrcTBF16_Lo128()
const {
587 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
590 bool isVCSrcTF16_Lo128()
const {
591 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
594 bool isVCSrcFake16BF16_Lo128()
const {
595 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
598 bool isVCSrcFake16F16_Lo128()
const {
599 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
602 bool isVCSrc_bf16()
const {
603 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
606 bool isVCSrc_f16()
const {
607 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
610 bool isVCSrc_v2bf16()
const {
return isVCSrc_bf16(); }
612 bool isVCSrc_v2f16()
const {
return isVCSrc_f16(); }
614 bool isVSrc_b32()
const {
615 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
618 bool isVSrc_b64()
const {
return isVCSrc_f64() || isLiteralImm(MVT::i64); }
620 bool isVSrc_v2b64()
const {
621 return isRegOrInlineNoMods(AMDGPU::VS_128RegClassID, MVT::i64) ||
622 isLiteralImm(MVT::i64);
625 bool isVSrc_v2f64()
const {
626 return isRegOrInlineNoMods(AMDGPU::VS_128RegClassID, MVT::f64) ||
627 isLiteralImm(MVT::f64);
630 bool isVSrcT_b16()
const {
return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
632 bool isVSrcT_b16_Lo128()
const {
633 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
636 bool isVSrcFake16_b16_Lo128()
const {
637 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
640 bool isVSrc_b16()
const {
return isVCSrc_b16() || isLiteralImm(MVT::i16); }
642 bool isVSrc_v2b16()
const {
return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
644 bool isVCSrcV2FP32()
const {
return isVCSrc_f64(); }
646 bool isVSrc_v2f32()
const {
return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
648 bool isVCSrc_v2b32()
const {
return isVCSrc_b64(); }
650 bool isVSrc_v2b32()
const {
return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
652 bool isVSrc_f32()
const {
653 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
656 bool isVSrc_f64()
const {
return isVCSrc_f64() || isLiteralImm(MVT::f64); }
658 bool isVSrcT_bf16()
const {
return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
660 bool isVSrcT_f16()
const {
return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
662 bool isVSrcT_bf16_Lo128()
const {
663 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
666 bool isVSrcT_f16_Lo128()
const {
667 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
670 bool isVSrcFake16_bf16_Lo128()
const {
671 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
674 bool isVSrcFake16_f16_Lo128()
const {
675 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
678 bool isVSrc_bf16()
const {
return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
680 bool isVSrc_f16()
const {
return isVCSrc_f16() || isLiteralImm(MVT::f16); }
682 bool isVSrc_v2bf16()
const {
683 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
686 bool isVSrc_v2f16()
const {
return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
688 bool isVSrc_v2f16_splat()
const {
return isVSrc_v2f16(); }
690 bool isVSrc_NoInline_v2f16()
const {
return isVSrc_v2f16(); }
692 bool isVISrcB32()
const {
693 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
696 bool isVISrcB16()
const {
697 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
700 bool isVISrcV2B16()
const {
704 bool isVISrcF32()
const {
705 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
708 bool isVISrcF16()
const {
709 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
712 bool isVISrcV2F16()
const {
713 return isVISrcF16() || isVISrcB32();
716 bool isVISrc_64_bf16()
const {
717 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
720 bool isVISrc_64_f16()
const {
721 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
724 bool isVISrc_64_b32()
const {
725 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
728 bool isVISrc_64B64()
const {
729 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
732 bool isVISrc_64_f64()
const {
733 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
736 bool isVISrc_64V2FP32()
const {
737 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
740 bool isVISrc_64V2INT32()
const {
741 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
744 bool isVISrc_256_b32()
const {
745 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
748 bool isVISrc_256_f32()
const {
749 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
752 bool isVISrc_256B64()
const {
753 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
756 bool isVISrc_256_f64()
const {
757 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
760 bool isVISrc_512_f64()
const {
761 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f64);
764 bool isVISrc_128B16()
const {
765 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
768 bool isVISrc_128V2B16()
const {
769 return isVISrc_128B16();
772 bool isVISrc_128_b32()
const {
773 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
776 bool isVISrc_128_f32()
const {
777 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
780 bool isVISrc_256V2FP32()
const {
781 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
784 bool isVISrc_256V2INT32()
const {
785 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
788 bool isVISrc_512_b32()
const {
789 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
792 bool isVISrc_512B16()
const {
793 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
796 bool isVISrc_512V2B16()
const {
797 return isVISrc_512B16();
800 bool isVISrc_512_f32()
const {
801 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
804 bool isVISrc_512F16()
const {
805 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
808 bool isVISrc_512V2F16()
const {
809 return isVISrc_512F16() || isVISrc_512_b32();
812 bool isVISrc_1024_b32()
const {
813 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
816 bool isVISrc_1024B16()
const {
817 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
820 bool isVISrc_1024V2B16()
const {
821 return isVISrc_1024B16();
824 bool isVISrc_1024_f32()
const {
825 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
828 bool isVISrc_1024F16()
const {
829 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
832 bool isVISrc_1024V2F16()
const {
833 return isVISrc_1024F16() || isVISrc_1024_b32();
836 bool isAISrcB32()
const {
837 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
840 bool isAISrcB16()
const {
841 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
844 bool isAISrcV2B16()
const {
848 bool isAISrcF32()
const {
849 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
852 bool isAISrcF16()
const {
853 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
856 bool isAISrcV2F16()
const {
857 return isAISrcF16() || isAISrcB32();
860 bool isAISrc_64B64()
const {
861 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
864 bool isAISrc_64_f64()
const {
865 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
868 bool isAISrc_128_b32()
const {
869 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
872 bool isAISrc_128B16()
const {
873 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
876 bool isAISrc_128V2B16()
const {
877 return isAISrc_128B16();
880 bool isAISrc_128_f32()
const {
881 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
884 bool isAISrc_128F16()
const {
885 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
888 bool isAISrc_128V2F16()
const {
889 return isAISrc_128F16() || isAISrc_128_b32();
892 bool isVISrc_128_bf16()
const {
893 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
896 bool isVISrc_128_f16()
const {
897 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
900 bool isVISrc_128V2F16()
const {
901 return isVISrc_128_f16() || isVISrc_128_b32();
904 bool isAISrc_256B64()
const {
905 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
908 bool isAISrc_256_f64()
const {
909 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
912 bool isAISrc_512_b32()
const {
913 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
916 bool isAISrc_512B16()
const {
917 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
920 bool isAISrc_512V2B16()
const {
921 return isAISrc_512B16();
924 bool isAISrc_512_f32()
const {
925 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
928 bool isAISrc_512F16()
const {
929 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
932 bool isAISrc_512V2F16()
const {
933 return isAISrc_512F16() || isAISrc_512_b32();
936 bool isAISrc_1024_b32()
const {
937 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
940 bool isAISrc_1024B16()
const {
941 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
944 bool isAISrc_1024V2B16()
const {
945 return isAISrc_1024B16();
948 bool isAISrc_1024_f32()
const {
949 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
952 bool isAISrc_1024F16()
const {
953 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
956 bool isAISrc_1024V2F16()
const {
957 return isAISrc_1024F16() || isAISrc_1024_b32();
960 bool isKImmFP32()
const {
961 return isLiteralImm(MVT::f32);
964 bool isKImmFP16()
const {
965 return isLiteralImm(MVT::f16);
968 bool isKImmFP64()
const {
return isLiteralImm(MVT::f64); }
970 bool isMem()
const override {
974 bool isExpr()
const {
975 return Kind == Expression;
978 bool isSOPPBrTarget()
const {
return isExpr() || isImm(); }
980 bool isSWaitCnt()
const;
981 bool isDepCtr()
const;
982 bool isSDelayALU()
const;
983 bool isHwreg()
const;
984 bool isSendMsg()
const;
985 bool isWaitEvent()
const;
986 bool isSplitBarrier()
const;
987 bool isSwizzle()
const;
988 bool isSMRDOffset8()
const;
989 bool isSMEMOffset()
const;
990 bool isSMRDLiteralOffset()
const;
992 bool isDPPCtrl()
const;
994 bool isGPRIdxMode()
const;
995 bool isS16Imm()
const;
996 bool isU16Imm()
const;
997 bool isEndpgm()
const;
999 auto getPredicate(std::function<
bool(
const AMDGPUOperand &
Op)>
P)
const {
1000 return [
this,
P]() {
return P(*
this); };
1005 return StringRef(Tok.Data, Tok.Length);
1013 void setImm(int64_t Val) {
1018 ImmTy getImmTy()
const {
1023 MCRegister
getReg()
const override {
1028 SMLoc getStartLoc()
const override {
1032 SMLoc getEndLoc()
const override {
1036 SMRange getLocRange()
const {
1037 return SMRange(StartLoc, EndLoc);
1040 int getMCOpIdx()
const {
return MCOpIdx; }
1042 Modifiers getModifiers()
const {
1043 assert(isRegKind() || isImmTy(ImmTyNone));
1044 return isRegKind() ?
Reg.Mods :
Imm.Mods;
1047 void setModifiers(Modifiers Mods) {
1048 assert(isRegKind() || isImmTy(ImmTyNone));
1055 bool hasModifiers()
const {
1056 return getModifiers().hasModifiers();
1059 bool hasFPModifiers()
const {
1060 return getModifiers().hasFPModifiers();
1063 bool hasIntModifiers()
const {
1064 return getModifiers().hasIntModifiers();
1067 bool isForcedLit()
const {
1068 return isImmLiteral() && getModifiers().isForcedLit();
1071 bool isForcedLit64()
const {
1072 return isImmLiteral() && getModifiers().isForcedLit64();
1075 uint64_t applyInputFPModifiers(uint64_t Val,
unsigned Size)
const;
1077 void addImmOperands(MCInst &Inst,
unsigned N,
bool ApplyModifiers =
true)
const;
1079 void addLiteralImmOperand(MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const;
1081 void addRegOperands(MCInst &Inst,
unsigned N)
const;
1083 void addRegOrImmOperands(MCInst &Inst,
unsigned N)
const {
1085 addRegOperands(Inst,
N);
1087 addImmOperands(Inst,
N);
1090 void addRegOrImmWithInputModsOperands(MCInst &Inst,
unsigned N)
const {
1091 Modifiers Mods = getModifiers();
1094 addRegOperands(Inst,
N);
1096 addImmOperands(Inst,
N,
false);
1100 void addRegOrImmWithFPInputModsOperands(MCInst &Inst,
unsigned N)
const {
1101 assert(!hasIntModifiers());
1102 addRegOrImmWithInputModsOperands(Inst,
N);
1105 void addRegOrImmWithIntInputModsOperands(MCInst &Inst,
unsigned N)
const {
1106 assert(!hasFPModifiers());
1107 addRegOrImmWithInputModsOperands(Inst,
N);
1110 void addRegWithInputModsOperands(MCInst &Inst,
unsigned N)
const {
1111 Modifiers Mods = getModifiers();
1114 addRegOperands(Inst,
N);
1117 void addRegWithFPInputModsOperands(MCInst &Inst,
unsigned N)
const {
1118 assert(!hasIntModifiers());
1119 addRegWithInputModsOperands(Inst,
N);
1122 void addRegWithIntInputModsOperands(MCInst &Inst,
unsigned N)
const {
1123 assert(!hasFPModifiers());
1124 addRegWithInputModsOperands(Inst,
N);
1127 static void printImmTy(raw_ostream& OS, ImmTy
Type) {
1130 case ImmTyNone: OS <<
"None";
break;
1131 case ImmTyGDS: OS <<
"GDS";
break;
1132 case ImmTyLDS: OS <<
"LDS";
break;
1133 case ImmTyOffen: OS <<
"Offen";
break;
1134 case ImmTyIdxen: OS <<
"Idxen";
break;
1135 case ImmTyAddr64: OS <<
"Addr64";
break;
1136 case ImmTyOffset: OS <<
"Offset";
break;
1137 case ImmTyInstOffset: OS <<
"InstOffset";
break;
1138 case ImmTyOffset0: OS <<
"Offset0";
break;
1139 case ImmTyOffset1: OS <<
"Offset1";
break;
1140 case ImmTySMEMOffsetMod: OS <<
"SMEMOffsetMod";
break;
1141 case ImmTyCPol: OS <<
"CPol";
break;
1142 case ImmTyIndexKey8bit: OS <<
"index_key";
break;
1143 case ImmTyIndexKey16bit: OS <<
"index_key";
break;
1144 case ImmTyIndexKey32bit: OS <<
"index_key";
break;
1145 case ImmTyTFE: OS <<
"TFE";
break;
1146 case ImmTyIsAsync: OS <<
"IsAsync";
break;
1147 case ImmTyD16: OS <<
"D16";
break;
1148 case ImmTyFORMAT: OS <<
"FORMAT";
break;
1149 case ImmTyClamp: OS <<
"Clamp";
break;
1150 case ImmTyOModSI: OS <<
"OModSI";
break;
1151 case ImmTyDPP8: OS <<
"DPP8";
break;
1152 case ImmTyDppCtrl: OS <<
"DppCtrl";
break;
1153 case ImmTyDppRowMask: OS <<
"DppRowMask";
break;
1154 case ImmTyDppBankMask: OS <<
"DppBankMask";
break;
1155 case ImmTyDppBoundCtrl: OS <<
"DppBoundCtrl";
break;
1156 case ImmTyDppFI: OS <<
"DppFI";
break;
1157 case ImmTySDWADstSel: OS <<
"SDWADstSel";
break;
1158 case ImmTySDWASrc0Sel: OS <<
"SDWASrc0Sel";
break;
1159 case ImmTySDWASrc1Sel: OS <<
"SDWASrc1Sel";
break;
1160 case ImmTySDWADstUnused: OS <<
"SDWADstUnused";
break;
1161 case ImmTyDMask: OS <<
"DMask";
break;
1162 case ImmTyDim: OS <<
"Dim";
break;
1163 case ImmTyUNorm: OS <<
"UNorm";
break;
1164 case ImmTyDA: OS <<
"DA";
break;
1165 case ImmTyR128A16: OS <<
"R128A16";
break;
1166 case ImmTyA16: OS <<
"A16";
break;
1167 case ImmTyLWE: OS <<
"LWE";
break;
1168 case ImmTyOff: OS <<
"Off";
break;
1169 case ImmTyExpTgt: OS <<
"ExpTgt";
break;
1170 case ImmTyExpCompr: OS <<
"ExpCompr";
break;
1171 case ImmTyExpVM: OS <<
"ExpVM";
break;
1172 case ImmTyDone: OS <<
"Done";
break;
1173 case ImmTyRowEn: OS <<
"RowEn";
break;
1174 case ImmTyHwreg: OS <<
"Hwreg";
break;
1175 case ImmTySendMsg: OS <<
"SendMsg";
break;
1176 case ImmTyWaitEvent: OS <<
"WaitEvent";
break;
1177 case ImmTyInterpSlot: OS <<
"InterpSlot";
break;
1178 case ImmTyInterpAttr: OS <<
"InterpAttr";
break;
1179 case ImmTyInterpAttrChan: OS <<
"InterpAttrChan";
break;
1180 case ImmTyOpSel: OS <<
"OpSel";
break;
1181 case ImmTyOpSelHi: OS <<
"OpSelHi";
break;
1182 case ImmTyNegLo: OS <<
"NegLo";
break;
1183 case ImmTyNegHi: OS <<
"NegHi";
break;
1184 case ImmTySwizzle: OS <<
"Swizzle";
break;
1185 case ImmTyGprIdxMode: OS <<
"GprIdxMode";
break;
1186 case ImmTyHigh: OS <<
"High";
break;
1187 case ImmTyBLGP: OS <<
"BLGP";
break;
1188 case ImmTyCBSZ: OS <<
"CBSZ";
break;
1189 case ImmTyABID: OS <<
"ABID";
break;
1190 case ImmTyEndpgm: OS <<
"Endpgm";
break;
1191 case ImmTyWaitVDST: OS <<
"WaitVDST";
break;
1192 case ImmTyWaitEXP: OS <<
"WaitEXP";
break;
1193 case ImmTyWaitVAVDst: OS <<
"WaitVAVDst";
break;
1194 case ImmTyWaitVMVSrc: OS <<
"WaitVMVSrc";
break;
1195 case ImmTyBitOp3: OS <<
"BitOp3";
break;
1196 case ImmTyMatrixAFMT: OS <<
"ImmTyMatrixAFMT";
break;
1197 case ImmTyMatrixBFMT: OS <<
"ImmTyMatrixBFMT";
break;
1198 case ImmTyMatrixAScale: OS <<
"ImmTyMatrixAScale";
break;
1199 case ImmTyMatrixBScale: OS <<
"ImmTyMatrixBScale";
break;
1200 case ImmTyMatrixAScaleFmt: OS <<
"ImmTyMatrixAScaleFmt";
break;
1201 case ImmTyMatrixBScaleFmt: OS <<
"ImmTyMatrixBScaleFmt";
break;
1202 case ImmTyMatrixAReuse: OS <<
"ImmTyMatrixAReuse";
break;
1203 case ImmTyMatrixBReuse: OS <<
"ImmTyMatrixBReuse";
break;
1204 case ImmTyScaleSel: OS <<
"ScaleSel" ;
break;
1205 case ImmTyByteSel: OS <<
"ByteSel" ;
break;
1210 void print(raw_ostream &OS,
const MCAsmInfo &MAI)
const override {
1214 <<
" mods: " <<
Reg.Mods <<
'>';
1218 if (getImmTy() != ImmTyNone) {
1219 OS <<
" type: "; printImmTy(OS, getImmTy());
1221 OS <<
" mods: " <<
Imm.Mods <<
'>';
1234 static AMDGPUOperand::Ptr CreateImm(
const AMDGPUAsmParser *AsmParser,
1235 int64_t Val, SMLoc Loc,
1236 ImmTy
Type = ImmTyNone,
1237 bool IsFPImm =
false) {
1238 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1240 Op->Imm.IsFPImm = IsFPImm;
1242 Op->Imm.Mods = Modifiers();
1248 static AMDGPUOperand::Ptr CreateToken(
const AMDGPUAsmParser *AsmParser,
1249 StringRef Str, SMLoc Loc,
1250 bool HasExplicitEncodingSize =
true) {
1251 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1252 Res->Tok.Data = Str.data();
1253 Res->Tok.Length = Str.size();
1254 Res->StartLoc = Loc;
1259 static AMDGPUOperand::Ptr CreateReg(
const AMDGPUAsmParser *AsmParser,
1260 MCRegister
Reg, SMLoc S, SMLoc
E) {
1261 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1262 Op->Reg.RegNo =
Reg;
1263 Op->Reg.Mods = Modifiers();
1269 static AMDGPUOperand::Ptr CreateExpr(
const AMDGPUAsmParser *AsmParser,
1270 const class MCExpr *Expr, SMLoc S) {
1271 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1280 OS <<
"abs:" << Mods.Abs <<
" neg: " << Mods.Neg <<
" sext:" << Mods.Sext;
1289#define GET_REGISTER_MATCHER
1290#include "AMDGPUGenAsmMatcher.inc"
1291#undef GET_REGISTER_MATCHER
1292#undef GET_SUBTARGET_FEATURE_NAME
1297class KernelScopeInfo {
1298 int SgprIndexUnusedMin = -1;
1299 int VgprIndexUnusedMin = -1;
1300 int AgprIndexUnusedMin = -1;
1304 void usesSgprAt(
int i) {
1305 if (i >= SgprIndexUnusedMin) {
1306 SgprIndexUnusedMin = ++i;
1309 Ctx->getOrCreateSymbol(
Twine(
".kernel.sgpr_count"));
1315 void usesVgprAt(
int i) {
1316 if (i >= VgprIndexUnusedMin) {
1317 VgprIndexUnusedMin = ++i;
1320 Ctx->getOrCreateSymbol(
Twine(
".kernel.vgpr_count"));
1322 VgprIndexUnusedMin);
1328 void usesAgprAt(
int i) {
1333 if (i >= AgprIndexUnusedMin) {
1334 AgprIndexUnusedMin = ++i;
1337 Ctx->getOrCreateSymbol(
Twine(
".kernel.agpr_count"));
1342 Ctx->getOrCreateSymbol(
Twine(
".kernel.vgpr_count"));
1344 VgprIndexUnusedMin);
1351 KernelScopeInfo() =
default;
1355 MSTI = Ctx->getSubtargetInfo();
1357 usesSgprAt(SgprIndexUnusedMin = -1);
1358 usesVgprAt(VgprIndexUnusedMin = -1);
1360 usesAgprAt(AgprIndexUnusedMin = -1);
1364 void usesRegister(RegisterKind RegKind,
unsigned DwordRegIndex,
1365 unsigned RegWidth) {
1368 usesSgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1371 usesAgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1374 usesVgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1383 MCAsmParser &Parser;
1385 unsigned ForcedEncodingSize = 0;
1386 bool ForcedDPP =
false;
1387 bool ForcedSDWA =
false;
1388 KernelScopeInfo KernelScope;
1389 const unsigned HwMode;
1394#define GET_ASSEMBLER_HEADER
1395#include "AMDGPUGenAsmMatcher.inc"
1400 unsigned getRegOperandSize(
const MCInstrDesc &
Desc,
unsigned OpNo)
const {
1402 int16_t RCID = MII.getOpRegClassID(
Desc.operands()[OpNo], HwMode);
1406 std::optional<AMDGPU::InfoSectionData> InfoData;
1409 void createConstantSymbol(StringRef Id, int64_t Val);
1411 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1412 bool OutOfRangeError(SMRange
Range);
1428 bool calculateGPRBlocks(
const FeatureBitset &Features,
const MCExpr *VCCUsed,
1429 const MCExpr *FlatScrUsed,
bool XNACKUsed,
1430 std::optional<bool> EnableWavefrontSize32,
1431 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1432 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1433 const MCExpr *&VGPRBlocks,
const MCExpr *&SGPRBlocks);
1434 bool ParseDirectiveAMDGCNTarget();
1435 bool ParseDirectiveAMDHSACodeObjectVersion();
1436 bool ParseDirectiveAMDHSAKernel();
1437 bool ParseAMDKernelCodeTValue(StringRef
ID, AMDGPUMCKernelCodeT &Header);
1438 bool ParseDirectiveAMDKernelCodeT();
1440 bool subtargetHasRegister(
const MCRegisterInfo &MRI, MCRegister
Reg);
1441 bool ParseDirectiveAMDGPUHsaKernel();
1443 bool ParseDirectiveISAVersion();
1444 bool ParseDirectiveHSAMetadata();
1445 bool ParseDirectivePALMetadataBegin();
1446 bool ParseDirectivePALMetadata();
1447 bool ParseDirectiveAMDGPULDS();
1448 bool ParseDirectiveAMDGPUInfo();
1452 bool ParseToEndDirective(
const char *AssemblerDirectiveBegin,
1453 const char *AssemblerDirectiveEnd,
1454 std::string &CollectString);
1456 bool AddNextRegisterToList(MCRegister &
Reg,
unsigned &RegWidth,
1457 RegisterKind RegKind, MCRegister Reg1,
1458 RegisterKind RegKind1, SMLoc Loc);
1459 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &
Reg,
1460 unsigned &RegNum,
unsigned &RegWidth,
1461 bool RestoreOnFailure =
false);
1462 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &
Reg,
1463 unsigned &RegNum,
unsigned &RegWidth,
1464 SmallVectorImpl<AsmToken> &Tokens);
1465 MCRegister ParseRegularReg(RegisterKind &RegKind,
unsigned &RegNum,
1467 SmallVectorImpl<AsmToken> &Tokens);
1468 MCRegister ParseSpecialReg(RegisterKind &RegKind,
unsigned &RegNum,
1470 SmallVectorImpl<AsmToken> &Tokens);
1471 MCRegister ParseRegList(RegisterKind &RegKind,
unsigned &RegNum,
1473 SmallVectorImpl<AsmToken> &Tokens);
1474 bool ParseRegRange(
unsigned &Num,
unsigned &Width,
unsigned &SubReg);
1475 MCRegister getRegularReg(RegisterKind RegKind,
unsigned RegNum,
1476 unsigned SubReg,
unsigned RegWidth, SMLoc Loc);
1479 bool isRegister(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1480 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1481 void initializeGprCountSymbol(RegisterKind RegKind);
1482 bool updateGprCountSymbols(RegisterKind RegKind,
unsigned DwordRegIndex,
1484 void cvtMubufImpl(MCInst &Inst,
const OperandVector &Operands,
1489 OperandMode_Default,
1493 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1495 AMDGPUAsmParser(
const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1496 const MCInstrInfo &MII)
1497 : MCTargetAsmParser(STI, MII), Parser(_Parser),
1498 HwMode(STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo)) {
1501 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1505 createConstantSymbol(
".amdgcn.gfx_generation_number",
ISA.Major);
1506 createConstantSymbol(
".amdgcn.gfx_generation_minor",
ISA.Minor);
1507 createConstantSymbol(
".amdgcn.gfx_generation_stepping",
ISA.Stepping);
1509 createConstantSymbol(
".option.machine_version_major",
ISA.Major);
1510 createConstantSymbol(
".option.machine_version_minor",
ISA.Minor);
1511 createConstantSymbol(
".option.machine_version_stepping",
ISA.Stepping);
1514 initializeGprCountSymbol(IS_VGPR);
1515 initializeGprCountSymbol(IS_SGPR);
1520 createConstantSymbol(Symbol, Code);
1522 createConstantSymbol(
"UC_VERSION_W64_BIT", 0x2000);
1523 createConstantSymbol(
"UC_VERSION_W32_BIT", 0x4000);
1524 createConstantSymbol(
"UC_VERSION_MDP_BIT", 0x8000);
1602 bool isWave32()
const {
return getAvailableFeatures()[Feature_isWave32Bit]; }
1604 bool isWave64()
const {
return getAvailableFeatures()[Feature_isWave64Bit]; }
1606 bool hasInv2PiInlineImm()
const {
1607 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1610 bool has64BitLiterals()
const {
1611 return getFeatureBits()[AMDGPU::Feature64BitLiterals];
1614 bool hasFlatOffsets()
const {
1615 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1618 bool hasTrue16Insts()
const {
1619 return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts];
1623 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1626 bool hasSGPR102_SGPR103()
const {
1630 bool hasSGPR104_SGPR105()
const {
return isGFX10Plus(); }
1633 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1636 bool hasPartialNSAEncoding()
const {
1637 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1640 bool hasGloballyAddressableScratch()
const {
1641 return getFeatureBits()[AMDGPU::FeatureGloballyAddressableScratch];
1654 AMDGPUTargetStreamer &getTargetStreamer() {
1655 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1656 return static_cast<AMDGPUTargetStreamer &
>(TS);
1662 return const_cast<AMDGPUAsmParser *
>(
this)->MCTargetAsmParser::getContext();
1665 const MCRegisterInfo *getMRI()
const {
1669 const MCInstrInfo *getMII()
const {
1675 const FeatureBitset &getFeatureBits()
const {
1676 return getSTI().getFeatureBits();
1679 void setForcedEncodingSize(
unsigned Size) { ForcedEncodingSize =
Size; }
1680 void setForcedDPP(
bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1681 void setForcedSDWA(
bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1683 unsigned getForcedEncodingSize()
const {
return ForcedEncodingSize; }
1684 bool isForcedVOP3()
const {
return ForcedEncodingSize == 64; }
1685 bool isForcedDPP()
const {
return ForcedDPP; }
1686 bool isForcedSDWA()
const {
return ForcedSDWA; }
1687 ArrayRef<unsigned> getMatchedVariants()
const;
1688 StringRef getMatchedVariantName()
const;
1690 std::unique_ptr<AMDGPUOperand> parseRegister(
bool RestoreOnFailure =
false);
1691 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1692 bool RestoreOnFailure);
1693 bool parseRegister(MCRegister &
Reg, SMLoc &StartLoc, SMLoc &EndLoc)
override;
1694 ParseStatus tryParseRegister(MCRegister &
Reg, SMLoc &StartLoc,
1695 SMLoc &EndLoc)
override;
1696 unsigned checkTargetMatchPredicate(MCInst &Inst)
override;
1697 unsigned validateTargetOperandClass(MCParsedAsmOperand &
Op,
1698 unsigned Kind)
override;
1699 bool matchAndEmitInstruction(SMLoc IDLoc,
unsigned &Opcode,
1701 uint64_t &ErrorInfo,
1702 bool MatchingInlineAsm)
override;
1703 bool ParseDirective(AsmToken DirectiveID)
override;
1704 void onEndOfFile()
override;
1705 ParseStatus parseOperand(
OperandVector &Operands, StringRef Mnemonic,
1706 OperandMode
Mode = OperandMode_Default);
1707 StringRef parseMnemonicSuffix(StringRef Name);
1708 bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
1712 ParseStatus parseTokenOp(StringRef Name,
OperandVector &Operands);
1714 ParseStatus parseIntWithPrefix(
const char *Prefix, int64_t &
Int);
1717 parseIntWithPrefix(
const char *Prefix,
OperandVector &Operands,
1718 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1719 std::function<
bool(int64_t &)> ConvertResult =
nullptr);
1721 ParseStatus parseOperandArrayWithPrefix(
1723 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1724 bool (*ConvertResult)(int64_t &) =
nullptr);
1728 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1729 bool IgnoreNegative =
false);
1730 unsigned getCPolKind(StringRef Id, StringRef Mnemo,
bool &Disabling)
const;
1732 ParseStatus parseScope(
OperandVector &Operands, int64_t &Scope);
1734 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &
Value,
1736 ParseStatus parseStringOrIntWithPrefix(
OperandVector &Operands,
1738 ArrayRef<const char *> Ids,
1740 ParseStatus parseStringOrIntWithPrefix(
OperandVector &Operands,
1742 ArrayRef<const char *> Ids,
1743 AMDGPUOperand::ImmTy
Type);
1746 bool isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1747 bool isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1748 bool isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1749 bool isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1750 bool parseSP3NegModifier();
1751 ParseStatus parseImm(
OperandVector &Operands,
bool HasSP3AbsModifier =
false,
1754 ParseStatus parseRegOrImm(
OperandVector &Operands,
bool HasSP3AbsMod =
false,
1756 ParseStatus parseRegOrImmWithFPInputMods(
OperandVector &Operands,
1757 bool AllowImm =
true);
1758 ParseStatus parseRegOrImmWithIntInputMods(
OperandVector &Operands,
1759 bool AllowImm =
true);
1760 ParseStatus parseRegWithFPInputMods(
OperandVector &Operands);
1761 ParseStatus parseRegWithIntInputMods(
OperandVector &Operands);
1764 AMDGPUOperand::ImmTy ImmTy);
1768 ParseStatus tryParseMatrixFMT(
OperandVector &Operands, StringRef Name,
1769 AMDGPUOperand::ImmTy
Type);
1772 ParseStatus tryParseMatrixScale(
OperandVector &Operands, StringRef Name,
1773 AMDGPUOperand::ImmTy
Type);
1776 ParseStatus tryParseMatrixScaleFmt(
OperandVector &Operands, StringRef Name,
1777 AMDGPUOperand::ImmTy
Type);
1781 ParseStatus parseDfmtNfmt(int64_t &
Format);
1782 ParseStatus parseUfmt(int64_t &
Format);
1783 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1785 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1788 ParseStatus parseSymbolicOrNumericFormat(int64_t &
Format);
1789 ParseStatus parseNumericFormat(int64_t &
Format);
1793 bool tryParseFmt(
const char *Pref, int64_t MaxVal, int64_t &Val);
1794 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1798 bool parseCnt(int64_t &IntVal);
1801 bool parseDepCtr(int64_t &IntVal,
unsigned &Mask);
1802 void depCtrError(SMLoc Loc,
int ErrorId, StringRef DepCtrName);
1805 bool parseDelay(int64_t &Delay);
1811 struct OperandInfoTy {
1814 bool IsSymbolic =
false;
1815 bool IsDefined =
false;
1817 constexpr OperandInfoTy(int64_t Val) : Val(Val) {}
1820 struct StructuredOpField : OperandInfoTy {
1824 bool IsDefined =
false;
1826 constexpr StructuredOpField(StringLiteral Id, StringLiteral Desc,
1827 unsigned Width, int64_t
Default)
1828 : OperandInfoTy(
Default), Id(Id), Desc(Desc), Width(Width) {}
1829 virtual ~StructuredOpField() =
default;
1831 bool Error(AMDGPUAsmParser &Parser,
const Twine &Err)
const {
1832 Parser.Error(Loc,
"invalid " + Desc +
": " + Err);
1836 virtual bool validate(AMDGPUAsmParser &Parser)
const {
1838 return Error(Parser,
"not supported on this GPU");
1840 return Error(Parser,
"only " + Twine(Width) +
"-bit values are legal");
1848 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &
Op, OperandInfoTy &Stream);
1849 bool validateSendMsg(
const OperandInfoTy &Msg,
1850 const OperandInfoTy &
Op,
1851 const OperandInfoTy &Stream);
1853 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &
Offset,
1854 OperandInfoTy &Width);
1856 const AMDGPUOperand &findMCOperand(
const OperandVector &Operands,
1859 static SMLoc getLaterLoc(SMLoc a, SMLoc b);
1861 SMLoc getFlatOffsetLoc(
const OperandVector &Operands)
const;
1862 SMLoc getSMEMOffsetLoc(
const OperandVector &Operands)
const;
1865 SMLoc getOperandLoc(
const OperandVector &Operands,
int MCOpIdx)
const;
1866 SMLoc getOperandLoc(std::function<
bool(
const AMDGPUOperand&)>
Test,
1868 SMLoc getImmLoc(AMDGPUOperand::ImmTy
Type,
1872 bool validateInstruction(
const MCInst &Inst, SMLoc IDLoc,
1874 bool validateOffset(
const MCInst &Inst,
const OperandVector &Operands);
1875 bool validateFlatOffset(
const MCInst &Inst,
const OperandVector &Operands);
1876 bool validateSMEMOffset(
const MCInst &Inst,
const OperandVector &Operands);
1877 bool validateSOPLiteral(
const MCInst &Inst,
const OperandVector &Operands);
1878 bool validateConstantBusLimitations(
const MCInst &Inst,
const OperandVector &Operands);
1879 std::optional<unsigned> checkVOPDRegBankConstraints(
const MCInst &Inst,
1881 bool validateVOPD(
const MCInst &Inst,
const OperandVector &Operands);
1882 bool tryVOPD(
const MCInst &Inst);
1883 bool tryVOPD3(
const MCInst &Inst);
1884 bool tryAnotherVOPDEncoding(
const MCInst &Inst);
1886 bool validateIntClampSupported(
const MCInst &Inst);
1887 bool validateMIMGAtomicDMask(
const MCInst &Inst);
1888 bool validateMIMGGatherDMask(
const MCInst &Inst);
1889 bool validateMovrels(
const MCInst &Inst,
const OperandVector &Operands);
1890 bool validateMIMGDataSize(
const MCInst &Inst, SMLoc IDLoc);
1891 bool validateMIMGAddrSize(
const MCInst &Inst, SMLoc IDLoc);
1892 bool validateMIMGD16(
const MCInst &Inst);
1893 bool validateMIMGDim(
const MCInst &Inst,
const OperandVector &Operands);
1894 bool validateTensorR128(
const MCInst &Inst);
1895 bool validateMIMGMSAA(
const MCInst &Inst);
1896 bool validateOpSel(
const MCInst &Inst);
1897 bool validateTrue16OpSel(
const MCInst &Inst);
1898 bool validateNeg(
const MCInst &Inst, AMDGPU::OpName OpName);
1899 bool validateDPP(
const MCInst &Inst,
const OperandVector &Operands);
1900 bool validateVccOperand(MCRegister
Reg)
const;
1901 bool validateVOPLiteral(
const MCInst &Inst,
const OperandVector &Operands);
1902 bool validateMAIAccWrite(
const MCInst &Inst,
const OperandVector &Operands);
1903 bool validateMAISrc2(
const MCInst &Inst,
const OperandVector &Operands);
1904 bool validateMFMA(
const MCInst &Inst,
const OperandVector &Operands);
1905 bool validateAGPRLdSt(
const MCInst &Inst)
const;
1906 bool validateVGPRAlign(
const MCInst &Inst)
const;
1907 bool validateBLGP(
const MCInst &Inst,
const OperandVector &Operands);
1908 bool validateDS(
const MCInst &Inst,
const OperandVector &Operands);
1909 bool validateGWS(
const MCInst &Inst,
const OperandVector &Operands);
1910 bool validateDivScale(
const MCInst &Inst);
1911 bool validateWaitCnt(
const MCInst &Inst,
const OperandVector &Operands);
1912 bool validateCoherencyBits(
const MCInst &Inst,
const OperandVector &Operands,
1914 bool validateTHAndScopeBits(
const MCInst &Inst,
const OperandVector &Operands,
1915 const unsigned CPol);
1916 bool validateTFE(
const MCInst &Inst,
const OperandVector &Operands);
1917 bool validateLdsDirect(
const MCInst &Inst,
const OperandVector &Operands);
1918 bool validateWMMA(
const MCInst &Inst,
const OperandVector &Operands);
1919 unsigned getConstantBusLimit(
unsigned Opcode)
const;
1920 bool usesConstantBus(
const MCInst &Inst,
unsigned OpIdx);
1921 bool isInlineConstant(
const MCInst &Inst,
unsigned OpIdx)
const;
1922 MCRegister findImplicitSGPRReadInVOP(
const MCInst &Inst)
const;
1924 bool isSupportedMnemo(StringRef Mnemo,
1925 const FeatureBitset &FBS);
1926 bool isSupportedMnemo(StringRef Mnemo,
1927 const FeatureBitset &FBS,
1928 ArrayRef<unsigned> Variants);
1929 bool checkUnsupportedInstruction(StringRef Name, SMLoc IDLoc);
1931 bool isId(
const StringRef Id)
const;
1932 bool isId(
const AsmToken &Token,
const StringRef Id)
const;
1934 StringRef getId()
const;
1935 bool trySkipId(
const StringRef Id);
1936 bool trySkipId(
const StringRef Pref,
const StringRef Id);
1940 bool parseString(StringRef &Val,
const StringRef ErrMsg =
"expected a string");
1941 bool parseId(StringRef &Val,
const StringRef ErrMsg =
"");
1947 StringRef getTokenStr()
const;
1948 AsmToken peekToken(
bool ShouldSkipSpace =
true);
1950 SMLoc getLoc()
const;
1954 void onBeginOfFile()
override;
1955 bool parsePrimaryExpr(
const MCExpr *&Res, SMLoc &EndLoc)
override;
1957 ParseStatus parseCustomOperand(
OperandVector &Operands,
unsigned MCK);
1967 bool parseSwizzleOperand(int64_t &
Op,
const unsigned MinVal,
1968 const unsigned MaxVal,
const Twine &ErrMsg,
1970 bool parseSwizzleOperands(
const unsigned OpNum, int64_t*
Op,
1971 const unsigned MinVal,
1972 const unsigned MaxVal,
1973 const StringRef ErrMsg);
1975 bool parseSwizzleOffset(int64_t &
Imm);
1976 bool parseSwizzleMacro(int64_t &
Imm);
1977 bool parseSwizzleQuadPerm(int64_t &
Imm);
1978 bool parseSwizzleBitmaskPerm(int64_t &
Imm);
1979 bool parseSwizzleBroadcast(int64_t &
Imm);
1980 bool parseSwizzleSwap(int64_t &
Imm);
1981 bool parseSwizzleReverse(int64_t &
Imm);
1982 bool parseSwizzleFFT(int64_t &
Imm);
1983 bool parseSwizzleRotate(int64_t &
Imm);
1986 int64_t parseGPRIdxMacro();
1988 void cvtMubuf(MCInst &Inst,
const OperandVector &Operands) { cvtMubufImpl(Inst, Operands,
false); }
1989 void cvtMubufAtomic(MCInst &Inst,
const OperandVector &Operands) { cvtMubufImpl(Inst, Operands,
true); }
1994 OptionalImmIndexMap &OptionalIdx);
1995 void cvtScaledMFMA(MCInst &Inst,
const OperandVector &Operands);
1996 void cvtVOP3OpSel(MCInst &Inst,
const OperandVector &Operands);
1999 void cvtSWMMAC(MCInst &Inst,
const OperandVector &Operands);
2002 void cvtVOP3OpSel(MCInst &Inst,
const OperandVector &Operands,
2003 OptionalImmIndexMap &OptionalIdx);
2005 OptionalImmIndexMap &OptionalIdx);
2007 void cvtVOP3Interp(MCInst &Inst,
const OperandVector &Operands);
2008 void cvtVINTERP(MCInst &Inst,
const OperandVector &Operands);
2009 void cvtOpSelHelper(MCInst &Inst,
unsigned OpSel);
2011 bool parseDimId(
unsigned &Encoding);
2013 bool convertDppBoundCtrl(int64_t &BoundCtrl);
2016 bool isSupportedDPPCtrl(StringRef Ctrl,
const OperandVector &Operands);
2017 int64_t parseDPPCtrlSel(StringRef Ctrl);
2018 int64_t parseDPPCtrlPerm();
2019 void cvtDPP(MCInst &Inst,
const OperandVector &Operands,
bool IsDPP8 =
false);
2021 cvtDPP(Inst, Operands,
true);
2023 void cvtVOP3DPP(MCInst &Inst,
const OperandVector &Operands,
2024 bool IsDPP8 =
false);
2025 void cvtVOP3DPP8(MCInst &Inst,
const OperandVector &Operands) {
2026 cvtVOP3DPP(Inst, Operands,
true);
2029 ParseStatus parseSDWASel(
OperandVector &Operands, StringRef Prefix,
2030 AMDGPUOperand::ImmTy
Type);
2032 void cvtSdwaVOP1(MCInst &Inst,
const OperandVector &Operands);
2033 void cvtSdwaVOP2(MCInst &Inst,
const OperandVector &Operands);
2034 void cvtSdwaVOP2b(MCInst &Inst,
const OperandVector &Operands);
2035 void cvtSdwaVOP2e(MCInst &Inst,
const OperandVector &Operands);
2036 void cvtSdwaVOPC(MCInst &Inst,
const OperandVector &Operands);
2038 enum class SDWAInstType :
unsigned {
VOP1 = 0,
VOP2 = 1,
VOPC = 2 };
2041 SDWAInstType BasicInstType,
bool SkipDstVcc =
false,
2042 bool SkipSrcVcc =
false);
2153bool AMDGPUOperand::isInlinableImm(
MVT type)
const {
2163 if (!isImmTy(ImmTyNone)) {
2168 if (getModifiers().
Lit != LitModifier::None)
2178 if (type == MVT::f64 || type == MVT::i64) {
2180 AsmParser->hasInv2PiInlineImm());
2183 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64,
Imm.Val));
2202 APFloat::rmNearestTiesToEven, &Lost);
2209 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2211 AsmParser->hasInv2PiInlineImm());
2216 static_cast<int32_t
>(FPLiteral.bitcastToAPInt().getZExtValue()),
2217 AsmParser->hasInv2PiInlineImm());
2221 if (type == MVT::f64 || type == MVT::i64) {
2223 AsmParser->hasInv2PiInlineImm());
2232 static_cast<int16_t
>(
Literal.getLoBits(16).getSExtValue()),
2233 type, AsmParser->hasInv2PiInlineImm());
2237 static_cast<int32_t
>(
Literal.getLoBits(32).getZExtValue()),
2238 AsmParser->hasInv2PiInlineImm());
2241bool AMDGPUOperand::isLiteralImm(MVT type)
const {
2243 if (!isImmTy(ImmTyNone)) {
2248 (type == MVT::i64 || type == MVT::f64) && AsmParser->has64BitLiterals();
2253 if (type == MVT::f64 && hasFPModifiers()) {
2273 if (type == MVT::f64) {
2278 if (type == MVT::i64) {
2291 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2292 : (type == MVT::v2i16) ? MVT::f32
2293 : (type == MVT::v2f32) ? MVT::f32
2296 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64,
Imm.Val));
2300bool AMDGPUOperand::isRegClass(
unsigned RCID)
const {
2301 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(
getReg());
2304bool AMDGPUOperand::isVRegWithInputMods()
const {
2305 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2307 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2308 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2311template <
bool IsFake16>
2312bool AMDGPUOperand::isT16_Lo128VRegWithInputMods()
const {
2313 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2314 : AMDGPU::VGPR_16_Lo128RegClassID);
2317template <
bool IsFake16>
bool AMDGPUOperand::isT16VRegWithInputMods()
const {
2318 return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
2319 : AMDGPU::VGPR_16RegClassID);
2322bool AMDGPUOperand::isSDWAOperand(MVT type)
const {
2323 if (AsmParser->isVI())
2325 if (AsmParser->isGFX9Plus())
2326 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2330bool AMDGPUOperand::isSDWAFP16Operand()
const {
2331 return isSDWAOperand(MVT::f16);
2334bool AMDGPUOperand::isSDWAFP32Operand()
const {
2335 return isSDWAOperand(MVT::f32);
2338bool AMDGPUOperand::isSDWAInt16Operand()
const {
2339 return isSDWAOperand(MVT::i16);
2342bool AMDGPUOperand::isSDWAInt32Operand()
const {
2343 return isSDWAOperand(MVT::i32);
2346bool AMDGPUOperand::isBoolReg()
const {
2347 return isReg() && ((AsmParser->isWave64() && isSCSrc_b64()) ||
2348 (AsmParser->isWave32() && isSCSrc_b32()));
2351uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val,
unsigned Size)
const
2353 assert(isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2356 const uint64_t FpSignMask = (1ULL << (
Size * 8 - 1));
2368void AMDGPUOperand::addImmOperands(MCInst &Inst,
unsigned N,
bool ApplyModifiers)
const {
2378 addLiteralImmOperand(Inst,
Imm.Val,
2380 isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2382 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2387void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const {
2388 const auto& InstDesc = AsmParser->getMII()->get(Inst.
getOpcode());
2393 if (ApplyModifiers) {
2396 Val = applyInputFPModifiers(Val,
Size);
2400 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2402 bool CanUse64BitLiterals =
2403 AsmParser->has64BitLiterals() &&
2406 MCContext &Ctx = AsmParser->getContext();
2417 if (
Lit == LitModifier::None &&
2419 AsmParser->hasInv2PiInlineImm())) {
2427 bool HasMandatoryLiteral =
2430 if (
Literal.getLoBits(32) != 0 &&
2431 (InstDesc.getSize() != 4 || !AsmParser->has64BitLiterals()) &&
2432 !HasMandatoryLiteral) {
2433 const_cast<AMDGPUAsmParser *
>(AsmParser)->
Warning(
2435 "Can't encode literal as exact 64-bit floating-point operand. "
2436 "Low 32-bits will be set to zero");
2437 Val &= 0xffffffff00000000u;
2443 if (CanUse64BitLiterals &&
Lit == LitModifier::None &&
2449 Lit = LitModifier::Lit64;
2450 }
else if (
Lit == LitModifier::Lit) {
2464 if (CanUse64BitLiterals &&
Lit == LitModifier::None &&
2466 Lit = LitModifier::Lit64;
2473 if (
Lit == LitModifier::None && AsmParser->hasInv2PiInlineImm() &&
2474 Literal == 0x3fc45f306725feed) {
2509 APFloat::rmNearestTiesToEven, &lost);
2513 Val = FPLiteral.bitcastToAPInt().getZExtValue();
2520 if (
Lit != LitModifier::None) {
2551 if (
Lit == LitModifier::None &&
2561 if (!AsmParser->has64BitLiterals() ||
Lit == LitModifier::Lit)
2569 if (
Lit == LitModifier::None &&
2577 if (!AsmParser->has64BitLiterals()) {
2578 Val =
static_cast<uint64_t
>(Val) << 32;
2585 if (
Lit == LitModifier::Lit ||
2587 Val =
static_cast<uint64_t
>(Val) << 32;
2591 if (
Lit == LitModifier::Lit)
2617 if (
Lit != LitModifier::None) {
2625void AMDGPUOperand::addRegOperands(MCInst &Inst,
unsigned N)
const {
2630bool AMDGPUOperand::isInlineValue()
const {
2638void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2649 if (Is == IS_VGPR) {
2653 return AMDGPU::VGPR_32RegClassID;
2655 return AMDGPU::VReg_64RegClassID;
2657 return AMDGPU::VReg_96RegClassID;
2659 return AMDGPU::VReg_128RegClassID;
2661 return AMDGPU::VReg_160RegClassID;
2663 return AMDGPU::VReg_192RegClassID;
2665 return AMDGPU::VReg_224RegClassID;
2667 return AMDGPU::VReg_256RegClassID;
2669 return AMDGPU::VReg_288RegClassID;
2671 return AMDGPU::VReg_320RegClassID;
2673 return AMDGPU::VReg_352RegClassID;
2675 return AMDGPU::VReg_384RegClassID;
2677 return AMDGPU::VReg_512RegClassID;
2679 return AMDGPU::VReg_1024RegClassID;
2681 }
else if (Is == IS_TTMP) {
2685 return AMDGPU::TTMP_32RegClassID;
2687 return AMDGPU::TTMP_64RegClassID;
2689 return AMDGPU::TTMP_128RegClassID;
2691 return AMDGPU::TTMP_256RegClassID;
2693 return AMDGPU::TTMP_512RegClassID;
2695 }
else if (Is == IS_SGPR) {
2699 return AMDGPU::SGPR_32RegClassID;
2701 return AMDGPU::SGPR_64RegClassID;
2703 return AMDGPU::SGPR_96RegClassID;
2705 return AMDGPU::SGPR_128RegClassID;
2707 return AMDGPU::SGPR_160RegClassID;
2709 return AMDGPU::SGPR_192RegClassID;
2711 return AMDGPU::SGPR_224RegClassID;
2713 return AMDGPU::SGPR_256RegClassID;
2715 return AMDGPU::SGPR_288RegClassID;
2717 return AMDGPU::SGPR_320RegClassID;
2719 return AMDGPU::SGPR_352RegClassID;
2721 return AMDGPU::SGPR_384RegClassID;
2723 return AMDGPU::SGPR_512RegClassID;
2725 }
else if (Is == IS_AGPR) {
2729 return AMDGPU::AGPR_32RegClassID;
2731 return AMDGPU::AReg_64RegClassID;
2733 return AMDGPU::AReg_96RegClassID;
2735 return AMDGPU::AReg_128RegClassID;
2737 return AMDGPU::AReg_160RegClassID;
2739 return AMDGPU::AReg_192RegClassID;
2741 return AMDGPU::AReg_224RegClassID;
2743 return AMDGPU::AReg_256RegClassID;
2745 return AMDGPU::AReg_288RegClassID;
2747 return AMDGPU::AReg_320RegClassID;
2749 return AMDGPU::AReg_352RegClassID;
2751 return AMDGPU::AReg_384RegClassID;
2753 return AMDGPU::AReg_512RegClassID;
2755 return AMDGPU::AReg_1024RegClassID;
2763 .
Case(
"exec", AMDGPU::EXEC)
2764 .
Case(
"vcc", AMDGPU::VCC)
2765 .
Case(
"flat_scratch", AMDGPU::FLAT_SCR)
2766 .
Case(
"xnack_mask", AMDGPU::XNACK_MASK)
2767 .
Case(
"shared_base", AMDGPU::SRC_SHARED_BASE)
2768 .
Case(
"src_shared_base", AMDGPU::SRC_SHARED_BASE)
2769 .
Case(
"shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2770 .
Case(
"src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2771 .
Case(
"private_base", AMDGPU::SRC_PRIVATE_BASE)
2772 .
Case(
"src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2773 .
Case(
"private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2774 .
Case(
"src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2775 .
Case(
"src_flat_scratch_base_lo", AMDGPU::SRC_FLAT_SCRATCH_BASE_LO)
2776 .
Case(
"src_flat_scratch_base_hi", AMDGPU::SRC_FLAT_SCRATCH_BASE_HI)
2777 .
Case(
"pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2778 .
Case(
"src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2779 .
Case(
"lds_direct", AMDGPU::LDS_DIRECT)
2780 .
Case(
"src_lds_direct", AMDGPU::LDS_DIRECT)
2781 .
Case(
"m0", AMDGPU::M0)
2782 .
Case(
"vccz", AMDGPU::SRC_VCCZ)
2783 .
Case(
"src_vccz", AMDGPU::SRC_VCCZ)
2784 .
Case(
"execz", AMDGPU::SRC_EXECZ)
2785 .
Case(
"src_execz", AMDGPU::SRC_EXECZ)
2786 .
Case(
"scc", AMDGPU::SRC_SCC)
2787 .
Case(
"src_scc", AMDGPU::SRC_SCC)
2788 .
Case(
"tba", AMDGPU::TBA)
2789 .
Case(
"tma", AMDGPU::TMA)
2790 .
Case(
"flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2791 .
Case(
"flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2792 .
Case(
"xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2793 .
Case(
"xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2794 .
Case(
"vcc_lo", AMDGPU::VCC_LO)
2795 .
Case(
"vcc_hi", AMDGPU::VCC_HI)
2796 .
Case(
"exec_lo", AMDGPU::EXEC_LO)
2797 .
Case(
"exec_hi", AMDGPU::EXEC_HI)
2798 .
Case(
"tma_lo", AMDGPU::TMA_LO)
2799 .
Case(
"tma_hi", AMDGPU::TMA_HI)
2800 .
Case(
"tba_lo", AMDGPU::TBA_LO)
2801 .
Case(
"tba_hi", AMDGPU::TBA_HI)
2802 .
Case(
"pc", AMDGPU::PC_REG)
2803 .
Case(
"null", AMDGPU::SGPR_NULL)
2807bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2808 SMLoc &EndLoc,
bool RestoreOnFailure) {
2809 auto R = parseRegister();
2810 if (!R)
return true;
2812 RegNo =
R->getReg();
2813 StartLoc =
R->getStartLoc();
2814 EndLoc =
R->getEndLoc();
2818bool AMDGPUAsmParser::parseRegister(MCRegister &
Reg, SMLoc &StartLoc,
2820 return ParseRegister(
Reg, StartLoc, EndLoc,
false);
2823ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &
Reg, SMLoc &StartLoc,
2825 bool Result = ParseRegister(
Reg, StartLoc, EndLoc,
true);
2826 bool PendingErrors = getParser().hasPendingError();
2827 getParser().clearPendingErrors();
2835bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &
Reg,
unsigned &RegWidth,
2836 RegisterKind RegKind,
2838 RegisterKind RegKind1, SMLoc Loc) {
2840 if (RegKind == IS_SGPR) {
2841 unsigned RegIdx = (
Reg - AMDGPU::SGPR0) + RegWidth / 32;
2842 if ((RegIdx == 106 && Reg1 == AMDGPU::VCC_LO) ||
2843 (RegIdx == 107 && Reg1 == AMDGPU::VCC_HI)) {
2849 if (RegKind != RegKind1) {
2850 Error(Loc,
"registers in a list must be of the same kind");
2851 return MCRegister();
2856 if (
Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2861 if (
Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2862 Reg = AMDGPU::FLAT_SCR;
2866 if (
Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2867 Reg = AMDGPU::XNACK_MASK;
2871 if (
Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2876 if (
Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2881 if (
Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2886 Error(Loc,
"register does not fit in the list");
2892 if (Reg1 !=
Reg + RegWidth / 32) {
2893 Error(Loc,
"registers in a list must have consecutive indices");
2911 {{
"ttmp"}, IS_TTMP},
2917 return Kind == IS_VGPR ||
2925 if (Str.starts_with(
Reg.Name))
2931 return !Str.getAsInteger(10, Num);
2935AMDGPUAsmParser::isRegister(
const AsmToken &Token,
2936 const AsmToken &NextToken)
const {
2951 StringRef RegSuffix = Str.substr(
RegName.size());
2952 if (!RegSuffix.
empty()) {
2970AMDGPUAsmParser::isRegister()
2972 return isRegister(
getToken(), peekToken());
2975MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
unsigned RegNum,
2976 unsigned SubReg,
unsigned RegWidth,
2980 unsigned AlignSize = 1;
2981 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2987 if (RegNum % AlignSize != 0) {
2988 Error(Loc,
"invalid register alignment");
2989 return MCRegister();
2992 unsigned RegIdx = RegNum / AlignSize;
2995 Error(Loc,
"invalid or unsupported register size");
2996 return MCRegister();
3000 const MCRegisterClass RC =
TRI->getRegClass(RCID);
3001 if (RegIdx >= RC.
getNumRegs() || (RegKind == IS_VGPR && RegIdx > 255)) {
3002 Error(Loc,
"register index is out of range");
3003 return AMDGPU::NoRegister;
3006 if (RegKind == IS_VGPR && !
isGFX1250Plus() && RegIdx + RegWidth / 32 > 256) {
3007 Error(Loc,
"register index is out of range");
3008 return MCRegister();
3024bool AMDGPUAsmParser::ParseRegRange(
unsigned &Num,
unsigned &RegWidth,
3026 int64_t RegLo, RegHi;
3030 SMLoc FirstIdxLoc = getLoc();
3037 SecondIdxLoc = getLoc();
3048 Error(FirstIdxLoc,
"invalid register index");
3053 Error(SecondIdxLoc,
"invalid register index");
3057 if (RegLo > RegHi) {
3058 Error(FirstIdxLoc,
"first register index should not exceed second index");
3062 if (RegHi == RegLo) {
3063 StringRef RegSuffix = getTokenStr();
3064 if (RegSuffix ==
".l") {
3065 SubReg = AMDGPU::lo16;
3067 }
else if (RegSuffix ==
".h") {
3068 SubReg = AMDGPU::hi16;
3073 Num =
static_cast<unsigned>(RegLo);
3074 RegWidth = 32 * ((RegHi - RegLo) + 1);
3079MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
3082 SmallVectorImpl<AsmToken> &Tokens) {
3088 RegKind = IS_SPECIAL;
3095MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
3098 SmallVectorImpl<AsmToken> &Tokens) {
3100 StringRef
RegName = getTokenStr();
3101 auto Loc = getLoc();
3105 Error(Loc,
"invalid register name");
3106 return MCRegister();
3114 unsigned SubReg = NoSubRegister;
3115 bool IsRange =
false;
3116 if (!RegSuffix.
empty()) {
3118 SubReg = AMDGPU::lo16;
3120 SubReg = AMDGPU::hi16;
3124 Error(Loc,
"invalid register index");
3125 return MCRegister();
3131 if (!ParseRegRange(RegNum, RegWidth, SubReg))
3132 return MCRegister();
3136 MCRegister
Reg = getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
3137 const MCRegisterInfo &
TRI = *
getContext().getRegisterInfo();
3138 if (RegKind == IS_SGPR && IsRange
3139 ? (
TRI.isSubRegister(
Reg, VCC_LO) ||
TRI.isSubRegister(
Reg, VCC_HI))
3140 : (
Reg == VCC_LO ||
Reg == VCC_HI)) {
3141 Error(Loc,
"register index is out of range");
3142 return MCRegister();
3148MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
3149 unsigned &RegNum,
unsigned &RegWidth,
3150 SmallVectorImpl<AsmToken> &Tokens) {
3152 auto ListLoc = getLoc();
3155 "expected a register or a list of registers")) {
3156 return MCRegister();
3161 auto Loc = getLoc();
3162 if (!ParseAMDGPURegister(RegKind,
Reg, RegNum, RegWidth))
3163 return MCRegister();
3164 if (RegWidth != 32) {
3165 Error(Loc,
"expected a single 32-bit register");
3166 return MCRegister();
3170 RegisterKind NextRegKind;
3172 unsigned NextRegNum, NextRegWidth;
3175 if (!ParseAMDGPURegister(NextRegKind, NextReg,
3176 NextRegNum, NextRegWidth,
3178 return MCRegister();
3180 if (NextRegWidth != 32) {
3181 Error(Loc,
"expected a single 32-bit register");
3182 return MCRegister();
3184 if (!AddNextRegisterToList(
Reg, RegWidth, RegKind, NextReg, NextRegKind,
3186 return MCRegister();
3190 "expected a comma or a closing square bracket")) {
3191 return MCRegister();
3195 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3200bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3201 MCRegister &
Reg,
unsigned &RegNum,
3203 SmallVectorImpl<AsmToken> &Tokens) {
3204 auto Loc = getLoc();
3208 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3210 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3212 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3217 assert(Parser.hasPendingError());
3221 if (!subtargetHasRegister(*
TRI,
Reg)) {
3222 if (
Reg == AMDGPU::SGPR_NULL) {
3223 Error(Loc,
"'null' operand is not supported on this GPU");
3226 " register not available on this GPU");
3234bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3235 MCRegister &
Reg,
unsigned &RegNum,
3237 bool RestoreOnFailure ) {
3241 if (ParseAMDGPURegister(RegKind,
Reg, RegNum, RegWidth, Tokens)) {
3242 if (RestoreOnFailure) {
3243 while (!Tokens.
empty()) {
3252std::optional<StringRef>
3253AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3256 return StringRef(
".amdgcn.next_free_vgpr");
3258 return StringRef(
".amdgcn.next_free_sgpr");
3260 return std::nullopt;
3264void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3265 auto SymbolName = getGprCountSymbolName(RegKind);
3266 assert(SymbolName &&
"initializing invalid register kind");
3272bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3273 unsigned DwordRegIndex,
3274 unsigned RegWidth) {
3279 auto SymbolName = getGprCountSymbolName(RegKind);
3284 int64_t NewMax = DwordRegIndex +
divideCeil(RegWidth, 32) - 1;
3288 return !
Error(getLoc(),
3289 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3293 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3295 if (OldCount <= NewMax)
3301std::unique_ptr<AMDGPUOperand>
3302AMDGPUAsmParser::parseRegister(
bool RestoreOnFailure) {
3304 SMLoc StartLoc = Tok.getLoc();
3305 SMLoc EndLoc = Tok.getEndLoc();
3306 RegisterKind RegKind;
3308 unsigned RegNum, RegWidth;
3310 if (!ParseAMDGPURegister(RegKind,
Reg, RegNum, RegWidth)) {
3314 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3317 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3318 return AMDGPUOperand::CreateReg(
this,
Reg, StartLoc, EndLoc);
3321ParseStatus AMDGPUAsmParser::parseImm(
OperandVector &Operands,
3325 if (isRegister() || isModifier())
3328 if (
Lit == LitModifier::None) {
3329 if (trySkipId(
"lit"))
3330 Lit = LitModifier::Lit;
3331 else if (trySkipId(
"lit64"))
3332 Lit = LitModifier::Lit64;
3334 if (
Lit != LitModifier::None) {
3337 ParseStatus S = parseImm(Operands, HasSP3AbsModifier,
Lit);
3346 const auto& NextTok = peekToken();
3349 bool Negate =
false;
3357 AMDGPUOperand::Modifiers Mods;
3365 StringRef Num = getTokenStr();
3368 APFloat RealVal(APFloat::IEEEdouble());
3369 auto roundMode = APFloat::rmNearestTiesToEven;
3370 if (
errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3373 RealVal.changeSign();
3376 AMDGPUOperand::CreateImm(
this, RealVal.bitcastToAPInt().getZExtValue(), S,
3377 AMDGPUOperand::ImmTyNone,
true));
3378 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*Operands.
back());
3379 Op.setModifiers(Mods);
3388 if (HasSP3AbsModifier) {
3397 if (getParser().parsePrimaryExpr(Expr, EndLoc,
nullptr))
3400 if (Parser.parseExpression(Expr))
3404 if (Expr->evaluateAsAbsolute(IntVal)) {
3406 return Error(S,
"literal value out of range");
3407 Operands.
push_back(AMDGPUOperand::CreateImm(
this, IntVal, S));
3408 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*Operands.
back());
3409 Op.setModifiers(Mods);
3411 if (
Lit != LitModifier::None)
3413 Operands.
push_back(AMDGPUOperand::CreateExpr(
this, Expr, S));
3422ParseStatus AMDGPUAsmParser::parseReg(
OperandVector &Operands) {
3426 if (
auto R = parseRegister()) {
3434ParseStatus AMDGPUAsmParser::parseRegOrImm(
OperandVector &Operands,
3436 ParseStatus Res = parseReg(Operands);
3441 return parseImm(Operands, HasSP3AbsMod,
Lit);
3445AMDGPUAsmParser::isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3448 return str ==
"abs" || str ==
"neg" || str ==
"sext";
3454AMDGPUAsmParser::isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3459AMDGPUAsmParser::isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3460 return isNamedOperandModifier(Token, NextToken) || Token.
is(
AsmToken::Pipe);
3464AMDGPUAsmParser::isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3465 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3482AMDGPUAsmParser::isModifier() {
3485 AsmToken NextToken[2];
3486 peekTokens(NextToken);
3488 return isOperandModifier(Tok, NextToken[0]) ||
3489 (Tok.
is(
AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3490 isOpcodeModifierWithVal(Tok, NextToken[0]);
3516AMDGPUAsmParser::parseSP3NegModifier() {
3518 AsmToken NextToken[2];
3519 peekTokens(NextToken);
3522 (isRegister(NextToken[0], NextToken[1]) ||
3524 isId(NextToken[0],
"abs"))) {
3533AMDGPUAsmParser::parseRegOrImmWithFPInputMods(
OperandVector &Operands,
3541 return Error(getLoc(),
"invalid syntax, expected 'neg' modifier");
3543 SP3Neg = parseSP3NegModifier();
3546 Neg = trySkipId(
"neg");
3548 return Error(Loc,
"expected register or immediate");
3552 Abs = trySkipId(
"abs");
3557 if (trySkipId(
"lit")) {
3558 Lit = LitModifier::Lit;
3561 }
else if (trySkipId(
"lit64")) {
3562 Lit = LitModifier::Lit64;
3565 if (!has64BitLiterals())
3566 return Error(Loc,
"lit64 is not supported on this GPU");
3572 return Error(Loc,
"expected register or immediate");
3576 Res = parseRegOrImm(Operands, SP3Abs,
Lit);
3578 Res = parseReg(Operands);
3581 return (SP3Neg || Neg || SP3Abs || Abs ||
Lit != LitModifier::None)
3585 if (
Lit != LitModifier::None && !Operands.
back()->isImm())
3586 Error(Loc,
"expected immediate with lit modifier");
3588 if (SP3Abs && !skipToken(
AsmToken::Pipe,
"expected vertical bar"))
3594 if (
Lit != LitModifier::None &&
3598 AMDGPUOperand::Modifiers Mods;
3599 Mods.Abs = Abs || SP3Abs;
3600 Mods.Neg = Neg || SP3Neg;
3603 if (Mods.hasFPModifiers() ||
Lit != LitModifier::None) {
3604 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*Operands.
back());
3606 return Error(
Op.getStartLoc(),
"expected an absolute expression");
3607 Op.setModifiers(Mods);
3613AMDGPUAsmParser::parseRegOrImmWithIntInputMods(
OperandVector &Operands,
3615 bool Sext = trySkipId(
"sext");
3616 if (Sext && !skipToken(
AsmToken::LParen,
"expected left paren after sext"))
3621 Res = parseRegOrImm(Operands);
3623 Res = parseReg(Operands);
3631 AMDGPUOperand::Modifiers Mods;
3634 if (Mods.hasIntModifiers()) {
3635 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*Operands.
back());
3637 return Error(
Op.getStartLoc(),
"expected an absolute expression");
3638 Op.setModifiers(Mods);
3644ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(
OperandVector &Operands) {
3645 return parseRegOrImmWithFPInputMods(Operands,
false);
3648ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(
OperandVector &Operands) {
3649 return parseRegOrImmWithIntInputMods(Operands,
false);
3652ParseStatus AMDGPUAsmParser::parseVReg32OrOff(
OperandVector &Operands) {
3653 auto Loc = getLoc();
3654 if (trySkipId(
"off")) {
3655 Operands.
push_back(AMDGPUOperand::CreateImm(
this, 0, Loc,
3656 AMDGPUOperand::ImmTyOff,
false));
3663 std::unique_ptr<AMDGPUOperand>
Reg = parseRegister();
3672unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3679 return Match_InvalidOperand;
3681 if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3682 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3685 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::dst_sel);
3687 if (!
Op.isImm() ||
Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3688 return Match_InvalidOperand;
3696 if (tryAnotherVOPDEncoding(Inst))
3697 return Match_InvalidOperand;
3699 return Match_Success;
3703 static const unsigned Variants[] = {
3713ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants()
const {
3714 if (isForcedDPP() && isForcedVOP3()) {
3718 if (getForcedEncodingSize() == 32) {
3723 if (isForcedVOP3()) {
3728 if (isForcedSDWA()) {
3734 if (isForcedDPP()) {
3742StringRef AMDGPUAsmParser::getMatchedVariantName()
const {
3743 if (isForcedDPP() && isForcedVOP3())
3746 if (getForcedEncodingSize() == 32)
3762AMDGPUAsmParser::findImplicitSGPRReadInVOP(
const MCInst &Inst)
const {
3766 case AMDGPU::FLAT_SCR:
3768 case AMDGPU::VCC_LO:
3769 case AMDGPU::VCC_HI:
3776 return MCRegister();
3783bool AMDGPUAsmParser::isInlineConstant(
const MCInst &Inst,
3784 unsigned OpIdx)
const {
3841unsigned AMDGPUAsmParser::getConstantBusLimit(
unsigned Opcode)
const {
3847 case AMDGPU::V_LSHLREV_B64_e64:
3848 case AMDGPU::V_LSHLREV_B64_gfx10:
3849 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3850 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3851 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3852 case AMDGPU::V_LSHRREV_B64_e64:
3853 case AMDGPU::V_LSHRREV_B64_gfx10:
3854 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3855 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3856 case AMDGPU::V_ASHRREV_I64_e64:
3857 case AMDGPU::V_ASHRREV_I64_gfx10:
3858 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3859 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3860 case AMDGPU::V_LSHL_B64_e64:
3861 case AMDGPU::V_LSHR_B64_e64:
3862 case AMDGPU::V_ASHR_I64_e64:
3875 bool AddMandatoryLiterals =
false) {
3878 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3882 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immX) : -1;
3884 return {getNamedOperandIdx(Opcode, OpName::src0X),
3885 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3886 getNamedOperandIdx(Opcode, OpName::vsrc2X),
3887 getNamedOperandIdx(Opcode, OpName::src0Y),
3888 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3889 getNamedOperandIdx(Opcode, OpName::vsrc2Y),
3894 return {getNamedOperandIdx(Opcode, OpName::src0),
3895 getNamedOperandIdx(Opcode, OpName::src1),
3896 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3899bool AMDGPUAsmParser::usesConstantBus(
const MCInst &Inst,
unsigned OpIdx) {
3902 return !isInlineConstant(Inst,
OpIdx);
3909 return isSGPR(PReg,
TRI) && PReg != SGPR_NULL;
3920 const unsigned Opcode = Inst.
getOpcode();
3921 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3924 if (!LaneSelOp.
isReg())
3927 return LaneSelReg ==
M0 || LaneSelReg == M0_gfxpre11;
3930bool AMDGPUAsmParser::validateConstantBusLimitations(
3932 const unsigned Opcode = Inst.
getOpcode();
3933 const MCInstrDesc &
Desc = MII.
get(Opcode);
3934 MCRegister LastSGPR;
3935 unsigned ConstantBusUseCount = 0;
3936 unsigned NumLiterals = 0;
3937 unsigned LiteralSize;
3939 if (!(
Desc.TSFlags &
3954 SmallDenseSet<MCRegister> SGPRsUsed;
3955 MCRegister SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3957 SGPRsUsed.
insert(SGPRUsed);
3958 ++ConstantBusUseCount;
3963 unsigned ConstantBusLimit = getConstantBusLimit(Opcode);
3965 for (
int OpIdx : OpIndices) {
3970 if (usesConstantBus(Inst,
OpIdx)) {
3979 if (SGPRsUsed.
insert(LastSGPR).second) {
3980 ++ConstantBusUseCount;
4000 if (NumLiterals == 0) {
4003 }
else if (LiteralSize !=
Size) {
4009 if (ConstantBusUseCount + NumLiterals > ConstantBusLimit) {
4011 "invalid operand (violates constant bus restrictions)");
4018std::optional<unsigned>
4019AMDGPUAsmParser::checkVOPDRegBankConstraints(
const MCInst &Inst,
bool AsVOPD3) {
4021 const unsigned Opcode = Inst.
getOpcode();
4027 auto getVRegIdx = [&](unsigned,
unsigned OperandIdx) {
4028 const MCOperand &Opr = Inst.
getOperand(OperandIdx);
4037 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1170 ||
4038 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
4039 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
4040 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx13 ||
4041 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250 ||
4042 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx13;
4046 for (
auto OpName : {OpName::src0X, OpName::src0Y}) {
4047 int I = getNamedOperandIdx(Opcode, OpName);
4051 int64_t
Imm =
Op.getImm();
4057 for (
auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
4058 OpName::vsrc2Y, OpName::imm}) {
4059 int I = getNamedOperandIdx(Opcode, OpName);
4069 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(
4070 getVRegIdx, *
TRI, SkipSrc, AllowSameVGPR, AsVOPD3);
4072 return InvalidCompOprIdx;
4075bool AMDGPUAsmParser::validateVOPD(
const MCInst &Inst,
4082 for (
const std::unique_ptr<MCParsedAsmOperand> &Operand : Operands) {
4083 AMDGPUOperand &
Op = (AMDGPUOperand &)*Operand;
4084 if ((
Op.isRegKind() ||
Op.isImmTy(AMDGPUOperand::ImmTyNone)) &&
4086 Error(
Op.getStartLoc(),
"ABS not allowed in VOPD3 instructions");
4090 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
4091 if (!InvalidCompOprIdx.has_value())
4094 auto CompOprIdx = *InvalidCompOprIdx;
4097 std::max(InstInfo[
VOPD::X].getIndexInParsedOperands(CompOprIdx),
4098 InstInfo[
VOPD::Y].getIndexInParsedOperands(CompOprIdx));
4099 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
4101 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
4102 if (CompOprIdx == VOPD::Component::DST) {
4104 Error(Loc,
"dst registers must be distinct");
4106 Error(Loc,
"one dst register must be even and the other odd");
4108 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
4109 Error(Loc, Twine(
"src") + Twine(CompSrcIdx) +
4110 " operands must use different VGPR banks");
4118bool AMDGPUAsmParser::tryVOPD3(
const MCInst &Inst) {
4120 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst,
false);
4121 if (!InvalidCompOprIdx.has_value())
4125 InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst,
true);
4126 if (InvalidCompOprIdx.has_value()) {
4131 if (*InvalidCompOprIdx == VOPD::Component::DST)
4144bool AMDGPUAsmParser::tryVOPD(
const MCInst &Inst) {
4145 const unsigned Opcode = Inst.
getOpcode();
4160 for (
auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
4161 OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
4162 OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
4163 int I = getNamedOperandIdx(Opcode, OpName);
4170 return !tryVOPD3(Inst);
4175bool AMDGPUAsmParser::tryAnotherVOPDEncoding(
const MCInst &Inst) {
4176 const unsigned Opcode = Inst.
getOpcode();
4181 return tryVOPD(Inst);
4182 return tryVOPD3(Inst);
4185bool AMDGPUAsmParser::validateIntClampSupported(
const MCInst &Inst) {
4191 int ClampIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::clamp);
4202bool AMDGPUAsmParser::validateMIMGDataSize(
const MCInst &Inst,
SMLoc IDLoc) {
4210 int VDataIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
4211 int DMaskIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dmask);
4212 int TFEIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::tfe);
4220 unsigned VDataSize = getRegOperandSize(
Desc, VDataIdx);
4221 unsigned TFESize = (TFEIdx != -1 && Inst.
getOperand(TFEIdx).
getImm()) ? 1 : 0;
4226 bool IsPackedD16 =
false;
4230 int D16Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::d16);
4231 IsPackedD16 = D16Idx >= 0;
4236 if ((VDataSize / 4) ==
DataSize + TFESize)
4241 Modifiers = IsPackedD16 ?
"dmask and d16" :
"dmask";
4243 Modifiers = IsPackedD16 ?
"dmask, d16 and tfe" :
"dmask and tfe";
4245 Error(IDLoc,
Twine(
"image data size does not match ") + Modifiers);
4249bool AMDGPUAsmParser::validateMIMGAddrSize(
const MCInst &Inst, SMLoc IDLoc) {
4258 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4260 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
4262 ? AMDGPU::OpName::srsrc
4263 : AMDGPU::OpName::rsrc;
4264 int SrsrcIdx = AMDGPU::getNamedOperandIdx(
Opc, RSrcOpName);
4265 int DimIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dim);
4266 int A16Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::a16);
4270 assert(SrsrcIdx > VAddr0Idx);
4273 if (BaseOpcode->
BVH) {
4274 if (IsA16 == BaseOpcode->
A16)
4276 Error(IDLoc,
"image address size does not match a16");
4282 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
4283 unsigned ActualAddrSize =
4284 IsNSA ? SrsrcIdx - VAddr0Idx : getRegOperandSize(
Desc, VAddr0Idx) / 4;
4286 unsigned ExpectedAddrSize =
4290 if (hasPartialNSAEncoding() &&
4293 int VAddrLastIdx = SrsrcIdx - 1;
4294 unsigned VAddrLastSize = getRegOperandSize(
Desc, VAddrLastIdx) / 4;
4296 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
4299 if (ExpectedAddrSize > 12)
4300 ExpectedAddrSize = 16;
4305 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
4309 if (ActualAddrSize == ExpectedAddrSize)
4312 Error(IDLoc,
"image address size does not match dim and a16");
4316bool AMDGPUAsmParser::validateMIMGAtomicDMask(
const MCInst &Inst) {
4323 if (!
Desc.mayLoad() || !
Desc.mayStore())
4326 int DMaskIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dmask);
4333 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4336bool AMDGPUAsmParser::validateMIMGGatherDMask(
const MCInst &Inst) {
4344 int DMaskIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dmask);
4352 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4355bool AMDGPUAsmParser::validateMIMGDim(
const MCInst &Inst,
4370 for (
unsigned i = 1, e = Operands.
size(); i != e; ++i) {
4371 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
4378bool AMDGPUAsmParser::validateMIMGMSAA(
const MCInst &Inst) {
4386 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4389 if (!BaseOpcode->
MSAA)
4392 int DimIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dim);
4398 return DimInfo->
MSAA;
4404 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4405 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4406 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4416bool AMDGPUAsmParser::validateMovrels(
const MCInst &Inst,
4425 const int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
4428 const MCOperand &Src0 = Inst.
getOperand(Src0Idx);
4436 Error(getOperandLoc(Operands, Src0Idx),
"source operand must be a VGPR");
4440bool AMDGPUAsmParser::validateMAIAccWrite(
const MCInst &Inst,
4445 if (
Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4448 const int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
4451 const MCOperand &Src0 = Inst.
getOperand(Src0Idx);
4458 Error(getOperandLoc(Operands, Src0Idx),
4459 "source operand must be either a VGPR or an inline constant");
4466bool AMDGPUAsmParser::validateMAISrc2(
const MCInst &Inst,
4469 const MCInstrDesc &
Desc = MII.
get(Opcode);
4472 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4475 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4479 if (Inst.
getOperand(Src2Idx).
isImm() && isInlineConstant(Inst, Src2Idx)) {
4480 Error(getOperandLoc(Operands, Src2Idx),
4481 "inline constants are not allowed for this operand");
4488bool AMDGPUAsmParser::validateMFMA(
const MCInst &Inst,
4496 int BlgpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::blgp);
4497 if (BlgpIdx != -1) {
4498 if (
const MFMA_F8F6F4_Info *Info = AMDGPU::isMFMA_F8F6F4(
Opc)) {
4499 int CbszIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::cbsz);
4509 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
4510 Error(getOperandLoc(Operands, Src0Idx),
4511 "wrong register tuple size for cbsz value " + Twine(CBSZ));
4516 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
4517 Error(getOperandLoc(Operands, Src1Idx),
4518 "wrong register tuple size for blgp value " + Twine(BLGP));
4526 const int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
4530 const MCOperand &Src2 = Inst.
getOperand(Src2Idx);
4534 MCRegister Src2Reg = Src2.
getReg();
4536 if (Src2Reg == DstReg)
4541 .getSizeInBits() <= 128)
4544 if (
TRI->regsOverlap(Src2Reg, DstReg)) {
4545 Error(getOperandLoc(Operands, Src2Idx),
4546 "source 2 operand must not partially overlap with dst");
4553bool AMDGPUAsmParser::validateDivScale(
const MCInst &Inst) {
4557 case V_DIV_SCALE_F32_gfx6_gfx7:
4558 case V_DIV_SCALE_F32_vi:
4559 case V_DIV_SCALE_F32_gfx10:
4560 case V_DIV_SCALE_F64_gfx6_gfx7:
4561 case V_DIV_SCALE_F64_vi:
4562 case V_DIV_SCALE_F64_gfx10:
4568 for (
auto Name : {AMDGPU::OpName::src0_modifiers,
4569 AMDGPU::OpName::src2_modifiers,
4570 AMDGPU::OpName::src2_modifiers}) {
4581bool AMDGPUAsmParser::validateMIMGD16(
const MCInst &Inst) {
4589 int D16Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::d16);
4598bool AMDGPUAsmParser::validateTensorR128(
const MCInst &Inst) {
4605 int R128Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::r128);
4613 case AMDGPU::V_SUBREV_F32_e32:
4614 case AMDGPU::V_SUBREV_F32_e64:
4615 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4616 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4617 case AMDGPU::V_SUBREV_F32_e32_vi:
4618 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4619 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4620 case AMDGPU::V_SUBREV_F32_e64_vi:
4622 case AMDGPU::V_SUBREV_CO_U32_e32:
4623 case AMDGPU::V_SUBREV_CO_U32_e64:
4624 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4625 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4627 case AMDGPU::V_SUBBREV_U32_e32:
4628 case AMDGPU::V_SUBBREV_U32_e64:
4629 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4630 case AMDGPU::V_SUBBREV_U32_e32_vi:
4631 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4632 case AMDGPU::V_SUBBREV_U32_e64_vi:
4634 case AMDGPU::V_SUBREV_U32_e32:
4635 case AMDGPU::V_SUBREV_U32_e64:
4636 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4637 case AMDGPU::V_SUBREV_U32_e32_vi:
4638 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4639 case AMDGPU::V_SUBREV_U32_e64_vi:
4641 case AMDGPU::V_SUBREV_F16_e32:
4642 case AMDGPU::V_SUBREV_F16_e64:
4643 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4644 case AMDGPU::V_SUBREV_F16_e32_vi:
4645 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4646 case AMDGPU::V_SUBREV_F16_e64_vi:
4648 case AMDGPU::V_SUBREV_U16_e32:
4649 case AMDGPU::V_SUBREV_U16_e64:
4650 case AMDGPU::V_SUBREV_U16_e32_vi:
4651 case AMDGPU::V_SUBREV_U16_e64_vi:
4653 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4654 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4655 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4657 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4658 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4660 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4661 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4663 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4664 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4666 case AMDGPU::V_LSHRREV_B32_e32:
4667 case AMDGPU::V_LSHRREV_B32_e64:
4668 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4669 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4670 case AMDGPU::V_LSHRREV_B32_e32_vi:
4671 case AMDGPU::V_LSHRREV_B32_e64_vi:
4672 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4673 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4675 case AMDGPU::V_ASHRREV_I32_e32:
4676 case AMDGPU::V_ASHRREV_I32_e64:
4677 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4678 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4679 case AMDGPU::V_ASHRREV_I32_e32_vi:
4680 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4681 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4682 case AMDGPU::V_ASHRREV_I32_e64_vi:
4684 case AMDGPU::V_LSHLREV_B32_e32:
4685 case AMDGPU::V_LSHLREV_B32_e64:
4686 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4687 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4688 case AMDGPU::V_LSHLREV_B32_e32_vi:
4689 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4690 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4691 case AMDGPU::V_LSHLREV_B32_e64_vi:
4693 case AMDGPU::V_LSHLREV_B16_e32:
4694 case AMDGPU::V_LSHLREV_B16_e64:
4695 case AMDGPU::V_LSHLREV_B16_e32_vi:
4696 case AMDGPU::V_LSHLREV_B16_e64_vi:
4697 case AMDGPU::V_LSHLREV_B16_gfx10:
4699 case AMDGPU::V_LSHRREV_B16_e32:
4700 case AMDGPU::V_LSHRREV_B16_e64:
4701 case AMDGPU::V_LSHRREV_B16_e32_vi:
4702 case AMDGPU::V_LSHRREV_B16_e64_vi:
4703 case AMDGPU::V_LSHRREV_B16_gfx10:
4705 case AMDGPU::V_ASHRREV_I16_e32:
4706 case AMDGPU::V_ASHRREV_I16_e64:
4707 case AMDGPU::V_ASHRREV_I16_e32_vi:
4708 case AMDGPU::V_ASHRREV_I16_e64_vi:
4709 case AMDGPU::V_ASHRREV_I16_gfx10:
4711 case AMDGPU::V_LSHLREV_B64_e64:
4712 case AMDGPU::V_LSHLREV_B64_gfx10:
4713 case AMDGPU::V_LSHLREV_B64_vi:
4715 case AMDGPU::V_LSHRREV_B64_e64:
4716 case AMDGPU::V_LSHRREV_B64_gfx10:
4717 case AMDGPU::V_LSHRREV_B64_vi:
4719 case AMDGPU::V_ASHRREV_I64_e64:
4720 case AMDGPU::V_ASHRREV_I64_gfx10:
4721 case AMDGPU::V_ASHRREV_I64_vi:
4723 case AMDGPU::V_PK_LSHLREV_B16:
4724 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4725 case AMDGPU::V_PK_LSHLREV_B16_vi:
4727 case AMDGPU::V_PK_LSHRREV_B16:
4728 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4729 case AMDGPU::V_PK_LSHRREV_B16_vi:
4730 case AMDGPU::V_PK_ASHRREV_I16:
4731 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4732 case AMDGPU::V_PK_ASHRREV_I16_vi:
4739bool AMDGPUAsmParser::validateLdsDirect(
const MCInst &Inst,
4741 using namespace SIInstrFlags;
4742 const unsigned Opcode = Inst.
getOpcode();
4743 const MCInstrDesc &
Desc = MII.
get(Opcode);
4748 if ((
Desc.TSFlags & Enc) == 0)
4751 for (
auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4752 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4756 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4759 Error(getOperandLoc(Operands, SrcIdx),
4760 "lds_direct is not supported on this GPU");
4765 Error(getOperandLoc(Operands, SrcIdx),
4766 "lds_direct cannot be used with this instruction");
4770 if (SrcName != OpName::src0) {
4771 Error(getOperandLoc(Operands, SrcIdx),
4772 "lds_direct may be used as src0 only");
4781SMLoc AMDGPUAsmParser::getFlatOffsetLoc(
const OperandVector &Operands)
const {
4782 for (
unsigned i = 1, e = Operands.
size(); i != e; ++i) {
4783 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
4784 if (
Op.isFlatOffset())
4785 return Op.getStartLoc();
4790bool AMDGPUAsmParser::validateOffset(
const MCInst &Inst,
4793 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4799 return validateFlatOffset(Inst, Operands);
4802 return validateSMEMOffset(Inst, Operands);
4808 const unsigned OffsetSize = 24;
4809 if (!
isUIntN(OffsetSize - 1,
Op.getImm())) {
4810 Error(getFlatOffsetLoc(Operands),
4811 Twine(
"expected a ") + Twine(OffsetSize - 1) +
4812 "-bit unsigned offset for buffer ops");
4816 const unsigned OffsetSize = 16;
4817 if (!
isUIntN(OffsetSize,
Op.getImm())) {
4818 Error(getFlatOffsetLoc(Operands),
4819 Twine(
"expected a ") + Twine(OffsetSize) +
"-bit unsigned offset");
4826bool AMDGPUAsmParser::validateFlatOffset(
const MCInst &Inst,
4833 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4837 if (!hasFlatOffsets() &&
Op.getImm() != 0) {
4838 Error(getFlatOffsetLoc(Operands),
4839 "flat offset modifier is not supported on this GPU");
4846 bool AllowNegative =
4849 if (!
isIntN(OffsetSize,
Op.getImm()) || (!AllowNegative &&
Op.getImm() < 0)) {
4850 Error(getFlatOffsetLoc(Operands),
4851 Twine(
"expected a ") +
4852 (AllowNegative ? Twine(OffsetSize) +
"-bit signed offset"
4853 : Twine(OffsetSize - 1) +
"-bit unsigned offset"));
4860SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(
const OperandVector &Operands)
const {
4862 for (
unsigned i = 2, e = Operands.
size(); i != e; ++i) {
4863 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
4864 if (
Op.isSMEMOffset() ||
Op.isSMEMOffsetMod())
4865 return Op.getStartLoc();
4870bool AMDGPUAsmParser::validateSMEMOffset(
const MCInst &Inst,
4880 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4894 Error(getSMEMOffsetLoc(Operands),
4896 ?
"expected a 23-bit unsigned offset for buffer ops"
4897 :
isGFX12Plus() ?
"expected a 24-bit signed offset"
4898 : (
isVI() || IsBuffer) ?
"expected a 20-bit unsigned offset"
4899 :
"expected a 21-bit signed offset");
4904bool AMDGPUAsmParser::validateSOPLiteral(
const MCInst &Inst,
4907 const MCInstrDesc &
Desc = MII.
get(Opcode);
4911 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4912 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4914 const int OpIndices[] = { Src0Idx, Src1Idx };
4916 unsigned NumExprs = 0;
4917 unsigned NumLiterals = 0;
4920 for (
int OpIdx : OpIndices) {
4921 if (
OpIdx == -1)
break;
4927 std::optional<int64_t>
Imm;
4930 }
else if (MO.
isExpr()) {
4939 if (!
Imm.has_value()) {
4941 }
else if (!isInlineConstant(Inst,
OpIdx)) {
4945 if (NumLiterals == 0 || LiteralValue !=
Value) {
4953 if (NumLiterals + NumExprs <= 1)
4956 Error(getOperandLoc(Operands, Src1Idx),
4957 "only one unique literal operand is allowed");
4961bool AMDGPUAsmParser::validateOpSel(
const MCInst &Inst) {
4964 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
4974 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
4975 if (OpSelIdx != -1) {
4979 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel_hi);
4980 if (OpSelHiIdx != -1) {
4989 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
4999 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
5000 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
5001 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
5002 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel_hi);
5004 const MCOperand &Src0 = Inst.
getOperand(Src0Idx);
5005 const MCOperand &Src1 = Inst.
getOperand(Src1Idx);
5011 auto VerifyOneSGPR = [
OpSel, OpSelHi](
unsigned Index) ->
bool {
5013 return ((OpSel & Mask) == 0) && ((OpSelHi &
Mask) == 0);
5023 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
5024 if (Src2Idx != -1) {
5025 const MCOperand &Src2 = Inst.
getOperand(Src2Idx);
5035bool AMDGPUAsmParser::validateTrue16OpSel(
const MCInst &Inst) {
5036 if (!hasTrue16Insts())
5038 const MCRegisterInfo *MRI = getMRI();
5040 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
5046 if (OpSelOpValue == 0)
5048 unsigned OpCount = 0;
5049 for (AMDGPU::OpName OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
5050 AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) {
5051 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), OpName);
5058 bool OpSelOpIsHi = ((OpSelOpValue & (1 << OpCount)) != 0);
5059 if (OpSelOpIsHi != VGPRSuffixIsHi)
5068bool AMDGPUAsmParser::validateNeg(
const MCInst &Inst, AMDGPU::OpName OpName) {
5069 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
5082 int NegIdx = AMDGPU::getNamedOperandIdx(
Opc, OpName);
5093 const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
5094 AMDGPU::OpName::src1_modifiers,
5095 AMDGPU::OpName::src2_modifiers};
5097 for (
unsigned i = 0; i < 3; ++i) {
5107bool AMDGPUAsmParser::validateDPP(
const MCInst &Inst,
5110 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dpp_ctrl);
5111 if (DppCtrlIdx >= 0) {
5118 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
5119 Error(S,
isGFX12() ?
"DP ALU dpp only supports row_share"
5120 :
"DP ALU dpp only supports row_newbcast");
5125 int Dpp8Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dpp8);
5126 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
5129 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
5131 const MCOperand &Src1 = Inst.
getOperand(Src1Idx);
5134 Error(getOperandLoc(Operands, Src1Idx),
5135 "invalid operand for instruction");
5139 Error(getInstLoc(Operands),
5140 "src1 immediate operand invalid for instruction");
5150bool AMDGPUAsmParser::validateVccOperand(MCRegister
Reg)
const {
5151 return (
Reg == AMDGPU::VCC && isWave64()) ||
5152 (
Reg == AMDGPU::VCC_LO && isWave32());
5156bool AMDGPUAsmParser::validateVOPLiteral(
const MCInst &Inst,
5159 const MCInstrDesc &
Desc = MII.
get(Opcode);
5160 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
5162 !HasMandatoryLiteral && !
isVOPD(Opcode))
5167 std::optional<unsigned> LiteralOpIdx;
5170 for (
int OpIdx : OpIndices) {
5180 std::optional<int64_t>
Imm;
5186 bool IsAnotherLiteral =
false;
5187 bool IsForcedLit = findMCOperand(Operands,
OpIdx).isForcedLit();
5188 bool IsForcedLit64 = findMCOperand(Operands,
OpIdx).isForcedLit64();
5189 if (!
Imm.has_value()) {
5191 IsAnotherLiteral =
true;
5192 }
else if (IsForcedLit || IsForcedLit64 || !isInlineConstant(Inst,
OpIdx)) {
5197 HasMandatoryLiteral);
5198 unsigned OpTy =
Desc.operands()[
OpIdx].OperandType;
5208 (IsForcedLit64 && !HasMandatoryLiteral)) &&
5209 (!has64BitLiterals() ||
Desc.getSize() != 4)) {
5211 "invalid operand for instruction");
5216 if (!IsForcedFP64 && (IsForcedLit64 || !IsValid32Op) &&
5217 OpIdx != getNamedOperandIdx(Opcode, OpName::src0)) {
5219 "invalid operand for instruction");
5223 if (IsFP64 && IsValid32Op && !IsForcedFP64)
5230 if (IsAnotherLiteral && !HasMandatoryLiteral &&
5231 !getFeatureBits()[FeatureVOP3Literal]) {
5233 "literal operands are not supported");
5237 if (LiteralOpIdx && IsAnotherLiteral) {
5238 Error(getLaterLoc(getOperandLoc(Operands,
OpIdx),
5239 getOperandLoc(Operands, *LiteralOpIdx)),
5240 "only one unique literal operand is allowed");
5244 if (IsAnotherLiteral)
5245 LiteralOpIdx =
OpIdx;
5268bool AMDGPUAsmParser::validateAGPRLdSt(
const MCInst &Inst)
const {
5276 ? AMDGPU::OpName::data0
5277 : AMDGPU::OpName::vdata;
5279 const MCRegisterInfo *MRI = getMRI();
5280 int DstAreg =
IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
5284 int Data2Areg =
IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
5285 if (Data2Areg >= 0 && Data2Areg != DataAreg)
5289 auto FB = getFeatureBits();
5290 if (FB[AMDGPU::FeatureGFX90AInsts]) {
5291 if (DataAreg < 0 || DstAreg < 0)
5293 return DstAreg == DataAreg;
5296 return DstAreg < 1 && DataAreg < 1;
5299bool AMDGPUAsmParser::validateVGPRAlign(
const MCInst &Inst)
const {
5300 auto FB = getFeatureBits();
5301 if (!FB[AMDGPU::FeatureRequiresAlignedVGPRs])
5305 const MCRegisterInfo *MRI = getMRI();
5308 if (FB[AMDGPU::FeatureGFX90AInsts] &&
Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
5311 if (FB[AMDGPU::FeatureGFX1250Insts]) {
5315 case AMDGPU::DS_LOAD_TR6_B96:
5316 case AMDGPU::DS_LOAD_TR6_B96_gfx12:
5320 case AMDGPU::GLOBAL_LOAD_TR6_B96:
5321 case AMDGPU::GLOBAL_LOAD_TR6_B96_gfx1250: {
5325 int VAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
5326 if (VAddrIdx != -1) {
5329 if ((
Sub - AMDGPU::VGPR0) & 1)
5334 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR:
5335 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR_gfx1250:
5340 const MCRegisterClass &VGPR32 = MRI->
getRegClass(AMDGPU::VGPR_32RegClassID);
5341 const MCRegisterClass &AGPR32 = MRI->
getRegClass(AMDGPU::AGPR_32RegClassID);
5360SMLoc AMDGPUAsmParser::getBLGPLoc(
const OperandVector &Operands)
const {
5361 for (
unsigned i = 1, e = Operands.
size(); i != e; ++i) {
5362 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
5364 return Op.getStartLoc();
5369bool AMDGPUAsmParser::validateBLGP(
const MCInst &Inst,
5372 int BlgpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::blgp);
5375 SMLoc BLGPLoc = getBLGPLoc(Operands);
5378 bool IsNeg = StringRef(BLGPLoc.
getPointer()).starts_with(
"neg:");
5379 auto FB = getFeatureBits();
5380 bool UsesNeg =
false;
5381 if (FB[AMDGPU::FeatureGFX940Insts]) {
5383 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
5384 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
5385 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
5386 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
5391 if (IsNeg == UsesNeg)
5395 UsesNeg ?
"invalid modifier: blgp is not supported"
5396 :
"invalid modifier: neg is not supported");
5401bool AMDGPUAsmParser::validateWaitCnt(
const MCInst &Inst,
5407 if (
Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
5408 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
5409 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
5410 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
5413 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::sdst);
5416 if (
Reg == AMDGPU::SGPR_NULL)
5419 Error(getOperandLoc(Operands, Src0Idx),
"src0 must be null");
5423bool AMDGPUAsmParser::validateDS(
const MCInst &Inst,
5429 return validateGWS(Inst, Operands);
5434 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::gds);
5439 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
5440 Error(S,
"gds modifier is not supported on this GPU");
5448bool AMDGPUAsmParser::validateGWS(
const MCInst &Inst,
5450 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
5454 if (
Opc != AMDGPU::DS_GWS_INIT_vi &&
Opc != AMDGPU::DS_GWS_BARRIER_vi &&
5455 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
5458 const MCRegisterInfo *MRI = getMRI();
5459 const MCRegisterClass &VGPR32 = MRI->
getRegClass(AMDGPU::VGPR_32RegClassID);
5461 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::data0);
5464 auto RegIdx =
Reg - (VGPR32.
contains(
Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
5466 Error(getOperandLoc(Operands, Data0Pos),
"vgpr must be even aligned");
5473bool AMDGPUAsmParser::validateCoherencyBits(
const MCInst &Inst,
5476 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.
getOpcode(),
5477 AMDGPU::OpName::cpol);
5485 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5488 Error(S,
"scale_offset is not supported on this GPU");
5491 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5494 Error(S,
"nv is not supported on this GPU");
5499 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5502 Error(S,
"scale_offset is not supported for this instruction");
5506 return validateTHAndScopeBits(Inst, Operands, CPol);
5511 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5512 Error(S,
"cache policy is not supported for SMRD instructions");
5516 Error(IDLoc,
"invalid cache policy for SMEM instruction");
5525 if (!(TSFlags & AllowSCCModifier)) {
5526 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5530 "scc modifier is not supported for this instruction on this GPU");
5541 :
"instruction must use glc");
5546 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5549 &CStr.data()[CStr.find(
isGFX940() ?
"sc0" :
"glc")]);
5551 :
"instruction must not use glc");
5559bool AMDGPUAsmParser::validateTHAndScopeBits(
const MCInst &Inst,
5561 const unsigned CPol) {
5565 const unsigned Opcode = Inst.
getOpcode();
5566 const MCInstrDesc &TID = MII.
get(Opcode);
5569 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5576 return PrintError(
"th:TH_ATOMIC_RETURN requires a destination operand");
5581 return PrintError(
"instruction must use th:TH_ATOMIC_RETURN");
5589 return PrintError(
"invalid th value for SMEM instruction");
5596 return PrintError(
"scope and th combination is not valid");
5602 return PrintError(
"invalid th value for atomic instructions");
5605 return PrintError(
"invalid th value for store instructions");
5608 return PrintError(
"invalid th value for load instructions");
5614bool AMDGPUAsmParser::validateTFE(
const MCInst &Inst,
5617 if (
Desc.mayStore() &&
5619 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5620 if (Loc != getInstLoc(Operands)) {
5621 Error(Loc,
"TFE modifier has no meaning for store instructions");
5629bool AMDGPUAsmParser::validateWMMA(
const MCInst &Inst,
5635 int AFmtIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_a_fmt);
5639 int BFmtIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_b_fmt);
5642 auto validateFmt = [&](
unsigned Fmt, AMDGPU::OpName SrcOp) ->
bool {
5643 int SrcIdx = AMDGPU::getNamedOperandIdx(
Opc, SrcOp);
5651 Error(getOperandLoc(Operands, SrcIdx),
5652 "wrong register tuple size for " +
5657 if (!validateFmt(AFmt, AMDGPU::OpName::src0) ||
5658 !validateFmt(BFmt, AMDGPU::OpName::src1))
5662 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_a_scale_fmt);
5663 if (AScaleIdx == -1)
5667 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_b_scale_fmt);
5670 Error(getImmLoc(AMDGPUOperand::ImmTyMatrixAFMT, Operands),
5671 "invalid matrix and scale format combination");
5678bool AMDGPUAsmParser::validateInstruction(
const MCInst &Inst, SMLoc IDLoc,
5680 if (!validateLdsDirect(Inst, Operands))
5682 if (!validateTrue16OpSel(Inst)) {
5683 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5684 "op_sel operand conflicts with 16-bit operand suffix");
5687 if (!validateSOPLiteral(Inst, Operands))
5689 if (!validateVOPLiteral(Inst, Operands)) {
5692 if (!validateConstantBusLimitations(Inst, Operands)) {
5695 if (!validateVOPD(Inst, Operands)) {
5698 if (!validateIntClampSupported(Inst)) {
5699 Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands),
5700 "integer clamping is not supported on this GPU");
5703 if (!validateOpSel(Inst)) {
5704 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5705 "invalid op_sel operand");
5708 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5709 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5710 "invalid neg_lo operand");
5713 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5714 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5715 "invalid neg_hi operand");
5718 if (!validateDPP(Inst, Operands)) {
5722 if (!validateMIMGD16(Inst)) {
5723 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5724 "d16 modifier is not supported on this GPU");
5727 if (!validateMIMGDim(Inst, Operands)) {
5728 Error(IDLoc,
"missing dim operand");
5731 if (!validateTensorR128(Inst)) {
5732 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5733 "instruction must set modifier r128=0");
5736 if (!validateMIMGMSAA(Inst)) {
5737 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5738 "invalid dim; must be MSAA type");
5741 if (!validateMIMGDataSize(Inst, IDLoc)) {
5744 if (!validateMIMGAddrSize(Inst, IDLoc))
5746 if (!validateMIMGAtomicDMask(Inst)) {
5747 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5748 "invalid atomic image dmask");
5751 if (!validateMIMGGatherDMask(Inst)) {
5752 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5753 "invalid image_gather dmask: only one bit must be set");
5756 if (!validateMovrels(Inst, Operands)) {
5759 if (!validateOffset(Inst, Operands)) {
5762 if (!validateMAIAccWrite(Inst, Operands)) {
5765 if (!validateMAISrc2(Inst, Operands)) {
5768 if (!validateMFMA(Inst, Operands)) {
5771 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5775 if (!validateAGPRLdSt(Inst)) {
5776 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5777 ?
"invalid register class: data and dst should be all VGPR or AGPR"
5778 :
"invalid register class: agpr loads and stores not supported on this GPU"
5782 if (!validateVGPRAlign(Inst)) {
5784 "invalid register class: vgpr tuples must be 64 bit aligned");
5787 if (!validateDS(Inst, Operands)) {
5791 if (!validateBLGP(Inst, Operands)) {
5795 if (!validateDivScale(Inst)) {
5796 Error(IDLoc,
"ABS not allowed in VOP3B instructions");
5799 if (!validateWaitCnt(Inst, Operands)) {
5802 if (!validateTFE(Inst, Operands)) {
5805 if (!validateWMMA(Inst, Operands)) {
5814 unsigned VariantID = 0);
5818 unsigned VariantID);
5820bool AMDGPUAsmParser::isSupportedMnemo(
StringRef Mnemo,
5825bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5826 const FeatureBitset &FBS,
5827 ArrayRef<unsigned> Variants) {
5828 for (
auto Variant : Variants) {
5836bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5838 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5841 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5846 getParser().clearPendingErrors();
5850 StringRef VariantName = getMatchedVariantName();
5851 if (!VariantName.
empty() && isSupportedMnemo(Mnemo, FBS)) {
5854 " variant of this instruction is not supported"));
5858 if (
isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5859 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5861 FeatureBitset FeaturesWS32 = getFeatureBits();
5862 FeaturesWS32.
flip(AMDGPU::FeatureWavefrontSize64)
5863 .
flip(AMDGPU::FeatureWavefrontSize32);
5864 FeatureBitset AvailableFeaturesWS32 =
5865 ComputeAvailableFeatures(FeaturesWS32);
5867 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5868 return Error(IDLoc,
"instruction requires wavesize=32");
5872 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5873 return Error(IDLoc,
"instruction not supported on this GPU (" +
5874 getSTI().
getCPU() +
")" +
": " + Mnemo);
5879 return Error(IDLoc,
"invalid instruction" + Suggestion);
5885 const auto &
Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5886 if (
Op.isToken() && InvalidOprIdx > 1) {
5887 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5888 return PrevOp.isToken() && PrevOp.getToken() ==
"::";
5893bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc,
unsigned &Opcode,
5896 uint64_t &ErrorInfo,
5897 bool MatchingInlineAsm) {
5900 unsigned Result = Match_Success;
5901 for (
auto Variant : getMatchedVariants()) {
5903 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5908 if (R == Match_Success || R == Match_MissingFeature ||
5909 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5910 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5911 Result != Match_MissingFeature)) {
5915 if (R == Match_Success)
5919 if (Result == Match_Success) {
5920 if (!validateInstruction(Inst, IDLoc, Operands)) {
5927 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).
getToken();
5928 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5934 case Match_MissingFeature:
5938 return Error(IDLoc,
"operands are not valid for this GPU or mode");
5940 case Match_InvalidOperand: {
5941 SMLoc ErrorLoc = IDLoc;
5942 if (ErrorInfo != ~0ULL) {
5943 if (ErrorInfo >= Operands.
size()) {
5944 return Error(IDLoc,
"too few operands for instruction");
5946 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5947 if (ErrorLoc == SMLoc())
5951 return Error(ErrorLoc,
"invalid VOPDY instruction");
5953 return Error(ErrorLoc,
"invalid operand for instruction");
5956 case Match_MnemonicFail:
5962bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5967 if (getParser().parseAbsoluteExpression(Tmp)) {
5970 Ret =
static_cast<uint32_t
>(Tmp);
5974bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5975 if (!getSTI().getTargetTriple().isAMDGCN())
5976 return TokError(
"directive only supported for amdgcn architecture");
5978 std::string TargetIDDirective;
5979 SMLoc TargetStart = getTok().getLoc();
5980 if (getParser().parseEscapedString(TargetIDDirective))
5983 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5984 if (getTargetStreamer().getTargetID()->
toString() != TargetIDDirective)
5985 return getParser().Error(
5987 (Twine(
".amdgcn_target directive's target id ") +
5988 Twine(TargetIDDirective) +
5989 Twine(
" does not match the specified target id ") +
5990 Twine(getTargetStreamer().getTargetID()->
toString())));
5995bool AMDGPUAsmParser::OutOfRangeError(SMRange
Range) {
5999bool AMDGPUAsmParser::calculateGPRBlocks(
6000 const FeatureBitset &Features,
const MCExpr *VCCUsed,
6001 const MCExpr *FlatScrUsed,
bool XNACKUsed,
6002 std::optional<bool> EnableWavefrontSize32,
const MCExpr *NextFreeVGPR,
6003 SMRange VGPRRange,
const MCExpr *NextFreeSGPR, SMRange SGPRRange,
6004 const MCExpr *&VGPRBlocks,
const MCExpr *&SGPRBlocks) {
6010 const MCExpr *
NumSGPRs = NextFreeSGPR;
6011 int64_t EvaluatedSGPRs;
6018 if (
NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
Version.Major >= 8 &&
6019 !Features.
test(FeatureSGPRInitBug) &&
6020 static_cast<uint64_t
>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
6021 return OutOfRangeError(SGPRRange);
6023 const MCExpr *ExtraSGPRs =
6027 if (
NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
6028 (
Version.Major <= 7 || Features.
test(FeatureSGPRInitBug)) &&
6029 static_cast<uint64_t
>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
6030 return OutOfRangeError(SGPRRange);
6032 if (Features.
test(FeatureSGPRInitBug))
6039 auto GetNumGPRBlocks = [&Ctx](
const MCExpr *NumGPR,
6040 unsigned Granule) ->
const MCExpr * {
6044 const MCExpr *AlignToGPR =
6046 const MCExpr *DivGPR =
6052 VGPRBlocks = GetNumGPRBlocks(
6061bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
6062 if (!getSTI().getTargetTriple().isAMDGCN())
6063 return TokError(
"directive only supported for amdgcn architecture");
6066 return TokError(
"directive only supported for amdhsa OS");
6068 StringRef KernelName;
6069 if (getParser().parseIdentifier(KernelName))
6072 AMDGPU::MCKernelDescriptor KD =
6084 const MCExpr *NextFreeVGPR = ZeroExpr;
6086 const MCExpr *NamedBarCnt = ZeroExpr;
6087 uint64_t SharedVGPRCount = 0;
6088 uint64_t PreloadLength = 0;
6089 uint64_t PreloadOffset = 0;
6091 const MCExpr *NextFreeSGPR = ZeroExpr;
6094 unsigned ImpliedUserSGPRCount = 0;
6098 std::optional<unsigned> ExplicitUserSGPRCount;
6099 const MCExpr *ReserveVCC = OneExpr;
6100 const MCExpr *ReserveFlatScr = OneExpr;
6101 std::optional<bool> EnableWavefrontSize32;
6107 SMRange IDRange = getTok().getLocRange();
6108 if (!parseId(
ID,
"expected .amdhsa_ directive or .end_amdhsa_kernel"))
6111 if (
ID ==
".end_amdhsa_kernel")
6115 return TokError(
".amdhsa_ directives cannot be repeated");
6117 SMLoc ValStart = getLoc();
6118 const MCExpr *ExprVal;
6119 if (getParser().parseExpression(ExprVal))
6121 SMLoc ValEnd = getLoc();
6122 SMRange ValRange = SMRange(ValStart, ValEnd);
6125 uint64_t Val = IVal;
6126 bool EvaluatableExpr;
6127 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
6129 return OutOfRangeError(ValRange);
6133#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
6134 if (!isUInt<ENTRY##_WIDTH>(Val)) \
6135 return OutOfRangeError(RANGE); \
6136 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
6141#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
6143 return Error(IDRange.Start, "directive should have resolvable expression", \
6146 if (
ID ==
".amdhsa_group_segment_fixed_size") {
6149 return OutOfRangeError(ValRange);
6151 }
else if (
ID ==
".amdhsa_private_segment_fixed_size") {
6154 return OutOfRangeError(ValRange);
6156 }
else if (
ID ==
".amdhsa_kernarg_size") {
6158 return OutOfRangeError(ValRange);
6160 }
else if (
ID ==
".amdhsa_user_sgpr_count") {
6162 ExplicitUserSGPRCount = Val;
6163 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_buffer") {
6167 "directive is not supported with architected flat scratch",
6170 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
6173 ImpliedUserSGPRCount += 4;
6174 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_preload_length") {
6177 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
6180 return OutOfRangeError(ValRange);
6184 ImpliedUserSGPRCount += Val;
6185 PreloadLength = Val;
6187 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_preload_offset") {
6190 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
6193 return OutOfRangeError(ValRange);
6197 PreloadOffset = Val;
6198 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_ptr") {
6201 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
6204 ImpliedUserSGPRCount += 2;
6205 }
else if (
ID ==
".amdhsa_user_sgpr_queue_ptr") {
6208 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
6211 ImpliedUserSGPRCount += 2;
6212 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_segment_ptr") {
6215 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
6218 ImpliedUserSGPRCount += 2;
6219 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_id") {
6222 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
6225 ImpliedUserSGPRCount += 2;
6226 }
else if (
ID ==
".amdhsa_user_sgpr_flat_scratch_init") {
6229 "directive is not supported with architected flat scratch",
6233 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
6236 ImpliedUserSGPRCount += 2;
6237 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_size") {
6240 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
6243 ImpliedUserSGPRCount += 1;
6244 }
else if (
ID ==
".amdhsa_wavefront_size32") {
6246 if (IVersion.
Major < 10)
6247 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
6248 EnableWavefrontSize32 = Val;
6250 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
6252 }
else if (
ID ==
".amdhsa_uses_dynamic_stack") {
6254 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
6256 }
else if (
ID ==
".amdhsa_system_sgpr_private_segment_wavefront_offset") {
6259 "directive is not supported with architected flat scratch",
6262 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6264 }
else if (
ID ==
".amdhsa_enable_private_segment") {
6268 "directive is not supported without architected flat scratch",
6271 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6273 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_x") {
6275 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
6277 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_y") {
6279 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
6281 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_z") {
6283 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
6285 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_info") {
6287 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
6289 }
else if (
ID ==
".amdhsa_system_vgpr_workitem_id") {
6291 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
6293 }
else if (
ID ==
".amdhsa_next_free_vgpr") {
6294 VGPRRange = ValRange;
6295 NextFreeVGPR = ExprVal;
6296 }
else if (
ID ==
".amdhsa_next_free_sgpr") {
6297 SGPRRange = ValRange;
6298 NextFreeSGPR = ExprVal;
6299 }
else if (
ID ==
".amdhsa_accum_offset") {
6301 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
6302 AccumOffset = ExprVal;
6303 }
else if (
ID ==
".amdhsa_named_barrier_count") {
6305 return Error(IDRange.
Start,
"directive requires gfx1250+", IDRange);
6306 NamedBarCnt = ExprVal;
6307 }
else if (
ID ==
".amdhsa_reserve_vcc") {
6309 return OutOfRangeError(ValRange);
6310 ReserveVCC = ExprVal;
6311 }
else if (
ID ==
".amdhsa_reserve_flat_scratch") {
6312 if (IVersion.
Major < 7)
6313 return Error(IDRange.
Start,
"directive requires gfx7+", IDRange);
6316 "directive is not supported with architected flat scratch",
6319 return OutOfRangeError(ValRange);
6320 ReserveFlatScr = ExprVal;
6321 }
else if (
ID ==
".amdhsa_reserve_xnack_mask") {
6322 if (IVersion.
Major < 8)
6323 return Error(IDRange.
Start,
"directive requires gfx8+", IDRange);
6325 return OutOfRangeError(ValRange);
6326 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
6327 return getParser().Error(IDRange.
Start,
".amdhsa_reserve_xnack_mask does not match target id",
6329 }
else if (
ID ==
".amdhsa_float_round_mode_32") {
6331 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
6333 }
else if (
ID ==
".amdhsa_float_round_mode_16_64") {
6335 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
6337 }
else if (
ID ==
".amdhsa_float_denorm_mode_32") {
6339 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
6341 }
else if (
ID ==
".amdhsa_float_denorm_mode_16_64") {
6343 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
6345 }
else if (
ID ==
".amdhsa_dx10_clamp") {
6346 if (!getSTI().hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
6347 return Error(IDRange.
Start,
"directive unsupported on gfx1170+",
6350 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
6352 }
else if (
ID ==
".amdhsa_ieee_mode") {
6353 if (!getSTI().hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
6354 return Error(IDRange.
Start,
"directive unsupported on gfx1170+",
6357 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
6359 }
else if (
ID ==
".amdhsa_fp16_overflow") {
6360 if (IVersion.
Major < 9)
6361 return Error(IDRange.
Start,
"directive requires gfx9+", IDRange);
6363 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
6365 }
else if (
ID ==
".amdhsa_tg_split") {
6367 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
6370 }
else if (
ID ==
".amdhsa_workgroup_processor_mode") {
6373 "directive unsupported on " + getSTI().
getCPU(), IDRange);
6375 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
6377 }
else if (
ID ==
".amdhsa_memory_ordered") {
6378 if (IVersion.
Major < 10)
6379 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
6381 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
6383 }
else if (
ID ==
".amdhsa_forward_progress") {
6384 if (IVersion.
Major < 10)
6385 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
6387 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
6389 }
else if (
ID ==
".amdhsa_shared_vgpr_count") {
6391 if (IVersion.
Major < 10 || IVersion.
Major >= 12)
6392 return Error(IDRange.
Start,
"directive requires gfx10 or gfx11",
6394 SharedVGPRCount = Val;
6396 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
6398 }
else if (
ID ==
".amdhsa_inst_pref_size") {
6399 if (IVersion.
Major < 11)
6400 return Error(IDRange.
Start,
"directive requires gfx11+", IDRange);
6401 if (IVersion.
Major == 11) {
6403 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
6407 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
6410 }
else if (
ID ==
".amdhsa_exception_fp_ieee_invalid_op") {
6413 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
6415 }
else if (
ID ==
".amdhsa_exception_fp_denorm_src") {
6417 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
6419 }
else if (
ID ==
".amdhsa_exception_fp_ieee_div_zero") {
6422 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
6424 }
else if (
ID ==
".amdhsa_exception_fp_ieee_overflow") {
6426 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
6428 }
else if (
ID ==
".amdhsa_exception_fp_ieee_underflow") {
6430 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
6432 }
else if (
ID ==
".amdhsa_exception_fp_ieee_inexact") {
6434 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
6436 }
else if (
ID ==
".amdhsa_exception_int_div_zero") {
6438 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
6440 }
else if (
ID ==
".amdhsa_round_robin_scheduling") {
6441 if (IVersion.
Major < 12)
6442 return Error(IDRange.
Start,
"directive requires gfx12+", IDRange);
6444 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
6447 return Error(IDRange.
Start,
"unknown .amdhsa_kernel directive", IDRange);
6450#undef PARSE_BITS_ENTRY
6453 if (!Seen.
contains(
".amdhsa_next_free_vgpr"))
6454 return TokError(
".amdhsa_next_free_vgpr directive is required");
6456 if (!Seen.
contains(
".amdhsa_next_free_sgpr"))
6457 return TokError(
".amdhsa_next_free_sgpr directive is required");
6459 unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(ImpliedUserSGPRCount);
6461 return TokError(
"too many user SGPRs enabled, found " +
6462 Twine(UserSGPRCount) +
", but only " +
6468 if (PreloadLength) {
6474 const MCExpr *VGPRBlocks;
6475 const MCExpr *SGPRBlocks;
6476 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
6477 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
6478 EnableWavefrontSize32, NextFreeVGPR,
6479 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
6483 int64_t EvaluatedVGPRBlocks;
6484 bool VGPRBlocksEvaluatable =
6485 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
6486 if (VGPRBlocksEvaluatable &&
6488 static_cast<uint64_t
>(EvaluatedVGPRBlocks))) {
6489 return OutOfRangeError(VGPRRange);
6493 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
6494 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT,
getContext());
6496 int64_t EvaluatedSGPRBlocks;
6497 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
6499 static_cast<uint64_t
>(EvaluatedSGPRBlocks)))
6500 return OutOfRangeError(SGPRRange);
6503 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
6504 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
getContext());
6506 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
6507 return TokError(
"amdgpu_user_sgpr_count smaller than implied by "
6508 "enabled user SGPRs");
6514 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
6515 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT,
getContext());
6520 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
6521 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT,
getContext());
6526 return TokError(
"Kernarg size should be resolvable");
6527 uint64_t kernarg_size = IVal;
6528 if (PreloadLength && kernarg_size &&
6529 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
6530 return TokError(
"Kernarg preload length + offset is larger than the "
6531 "kernarg segment size");
6534 if (!Seen.
contains(
".amdhsa_accum_offset"))
6535 return TokError(
".amdhsa_accum_offset directive is required");
6536 int64_t EvaluatedAccum;
6537 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
6538 uint64_t UEvaluatedAccum = EvaluatedAccum;
6539 if (AccumEvaluatable &&
6540 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
6541 return TokError(
"accum_offset should be in range [4..256] in "
6544 int64_t EvaluatedNumVGPR;
6545 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
6548 alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
6549 return TokError(
"accum_offset exceeds total VGPR allocation");
6555 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
6556 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
6562 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
6563 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
6566 if (IVersion.
Major >= 10 && IVersion.
Major < 12) {
6568 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
6569 return TokError(
"shared_vgpr_count directive not valid on "
6570 "wavefront size 32");
6573 if (VGPRBlocksEvaluatable &&
6574 (SharedVGPRCount * 2 +
static_cast<uint64_t
>(EvaluatedVGPRBlocks) >
6576 return TokError(
"shared_vgpr_count*2 + "
6577 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
6582 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
6583 NextFreeVGPR, NextFreeSGPR,
6584 ReserveVCC, ReserveFlatScr);
6588bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
6590 if (ParseAsAbsoluteExpression(
Version))
6593 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(
Version);
6597bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef
ID,
6598 AMDGPUMCKernelCodeT &
C) {
6601 if (
ID ==
"max_scratch_backing_memory_byte_size") {
6602 Parser.eatToEndOfStatement();
6606 SmallString<40> ErrStr;
6607 raw_svector_ostream Err(ErrStr);
6608 if (!
C.ParseKernelCodeT(
ID, getParser(), Err)) {
6609 return TokError(Err.
str());
6613 if (
ID ==
"enable_wavefront_size32") {
6616 return TokError(
"enable_wavefront_size32=1 is only allowed on GFX10+");
6618 return TokError(
"enable_wavefront_size32=1 requires +WavefrontSize32");
6621 return TokError(
"enable_wavefront_size32=0 requires +WavefrontSize64");
6625 if (
ID ==
"wavefront_size") {
6626 if (
C.wavefront_size == 5) {
6628 return TokError(
"wavefront_size=5 is only allowed on GFX10+");
6630 return TokError(
"wavefront_size=5 requires +WavefrontSize32");
6631 }
else if (
C.wavefront_size == 6) {
6633 return TokError(
"wavefront_size=6 requires +WavefrontSize64");
6640bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6641 AMDGPUMCKernelCodeT KernelCode;
6650 if (!parseId(
ID,
"expected value identifier or .end_amd_kernel_code_t"))
6653 if (
ID ==
".end_amd_kernel_code_t")
6656 if (ParseAMDKernelCodeTValue(
ID, KernelCode))
6661 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
6666bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6667 StringRef KernelName;
6668 if (!parseId(KernelName,
"expected symbol name"))
6671 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6678bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6679 if (!getSTI().getTargetTriple().isAMDGCN()) {
6680 return Error(getLoc(),
6681 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6685 auto TargetIDDirective = getLexer().getTok().getStringContents();
6686 if (getTargetStreamer().getTargetID()->
toString() != TargetIDDirective)
6687 return Error(getParser().getTok().getLoc(),
"target id must match options");
6689 getTargetStreamer().EmitISAVersion();
6695bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6698 std::string HSAMetadataString;
6703 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6704 return Error(getLoc(),
"invalid HSA metadata");
6711bool AMDGPUAsmParser::ParseToEndDirective(
const char *AssemblerDirectiveBegin,
6712 const char *AssemblerDirectiveEnd,
6713 std::string &CollectString) {
6715 raw_string_ostream CollectStream(CollectString);
6717 getLexer().setSkipSpace(
false);
6719 bool FoundEnd =
false;
6722 CollectStream << getTokenStr();
6726 if (trySkipId(AssemblerDirectiveEnd)) {
6731 CollectStream << Parser.parseStringToEndOfStatement()
6732 <<
getContext().getAsmInfo().getSeparatorString();
6734 Parser.eatToEndOfStatement();
6737 getLexer().setSkipSpace(
true);
6740 return TokError(Twine(
"expected directive ") +
6741 Twine(AssemblerDirectiveEnd) + Twine(
" not found"));
6748bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6754 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6755 if (!PALMetadata->setFromString(
String))
6756 return Error(getLoc(),
"invalid PAL metadata");
6761bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6763 return Error(getLoc(),
6765 "not available on non-amdpal OSes")).str());
6768 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6769 PALMetadata->setLegacy();
6772 if (ParseAsAbsoluteExpression(
Key)) {
6773 return TokError(Twine(
"invalid value in ") +
6777 return TokError(Twine(
"expected an even number of values in ") +
6780 if (ParseAsAbsoluteExpression(
Value)) {
6781 return TokError(Twine(
"invalid value in ") +
6784 PALMetadata->setRegister(
Key,
Value);
6793bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6794 if (getParser().checkForValidSection())
6798 SMLoc NameLoc = getLoc();
6799 if (getParser().parseIdentifier(Name))
6800 return TokError(
"expected identifier in directive");
6803 if (getParser().parseComma())
6809 SMLoc SizeLoc = getLoc();
6810 if (getParser().parseAbsoluteExpression(
Size))
6813 return Error(SizeLoc,
"size must be non-negative");
6814 if (
Size > LocalMemorySize)
6815 return Error(SizeLoc,
"size is too large");
6817 int64_t Alignment = 4;
6819 SMLoc AlignLoc = getLoc();
6820 if (getParser().parseAbsoluteExpression(Alignment))
6823 return Error(AlignLoc,
"alignment must be a power of two");
6828 if (Alignment >= 1u << 31)
6829 return Error(AlignLoc,
"alignment is too large");
6835 Symbol->redefineIfPossible();
6836 if (!
Symbol->isUndefined())
6837 return Error(NameLoc,
"invalid symbol redefinition");
6839 getTargetStreamer().emitAMDGPULDS(Symbol,
Size,
Align(Alignment));
6843bool AMDGPUAsmParser::ParseDirectiveAMDGPUInfo() {
6844 if (getParser().checkForValidSection())
6848 if (getParser().parseIdentifier(FuncName))
6849 return TokError(
"expected symbol name after .amdgpu_info");
6852 AMDGPU::InfoSectionData ParsedInfoData;
6853 AMDGPU::FuncInfo FI;
6855 bool HasScalarAttrs =
false;
6862 SMLoc IDLoc = getLoc();
6863 if (!parseId(
ID,
"expected directive or .end_amdgpu_info"))
6866 if (
ID ==
".end_amdgpu_info")
6874 return Error(IDLoc,
"unknown .amdgpu_info directive '" +
ID +
"'");
6876 if (Dir ==
"flags") {
6878 if (getParser().parseAbsoluteExpression(Val))
6881 FI.
UsesVCC = !!(
Flags & AMDGPU::FuncInfoFlags::FUNC_USES_VCC);
6883 !!(
Flags & AMDGPU::FuncInfoFlags::FUNC_USES_FLAT_SCRATCH);
6885 HasScalarAttrs =
true;
6886 }
else if (Dir ==
"num_sgpr") {
6888 if (getParser().parseAbsoluteExpression(Val))
6890 FI.
NumSGPR =
static_cast<uint32_t
>(Val);
6891 HasScalarAttrs =
true;
6892 }
else if (Dir ==
"num_vgpr") {
6894 if (getParser().parseAbsoluteExpression(Val))
6897 HasScalarAttrs =
true;
6898 }
else if (Dir ==
"num_agpr") {
6900 if (getParser().parseAbsoluteExpression(Val))
6903 HasScalarAttrs =
true;
6904 }
else if (Dir ==
"private_segment_size") {
6906 if (getParser().parseAbsoluteExpression(Val))
6909 HasScalarAttrs =
true;
6910 }
else if (Dir ==
"use") {
6912 if (getParser().parseIdentifier(ResName))
6913 return TokError(
"expected resource symbol for .amdgpu_use");
6914 ParsedInfoData.
Uses.push_back(
6915 {FuncSym,
getContext().getOrCreateSymbol(ResName)});
6916 }
else if (Dir ==
"call") {
6918 if (getParser().parseIdentifier(DstName))
6919 return TokError(
"expected callee symbol for .amdgpu_call");
6920 ParsedInfoData.
Calls.push_back(
6921 {FuncSym,
getContext().getOrCreateSymbol(DstName)});
6922 }
else if (Dir ==
"indirect_call") {
6924 if (getParser().parseEscapedString(TypeId))
6925 return TokError(
"expected type ID string for .amdgpu_indirect_call");
6926 ParsedInfoData.
IndirectCalls.push_back({FuncSym, std::move(TypeId)});
6927 }
else if (Dir ==
"typeid") {
6929 if (getParser().parseEscapedString(TypeId))
6930 return TokError(
"expected type ID string for .amdgpu_typeid");
6931 ParsedInfoData.
TypeIds.push_back({FuncSym, std::move(TypeId)});
6933 return Error(IDLoc,
"unknown .amdgpu_info directive '" +
ID +
"'");
6938 ParsedInfoData.
Funcs.push_back(std::move(FI));
6940 AMDGPU::InfoSectionData &
Data = InfoData ? *InfoData : InfoData.emplace();
6941 for (AMDGPU::FuncInfo &Func : ParsedInfoData.
Funcs)
6942 Data.Funcs.push_back(std::move(Func));
6943 for (std::pair<MCSymbol *, MCSymbol *> &Use : ParsedInfoData.
Uses)
6944 Data.Uses.push_back(Use);
6945 for (std::pair<MCSymbol *, MCSymbol *> &
Call : ParsedInfoData.
Calls)
6947 for (std::pair<MCSymbol *, std::string> &
IndirectCall :
6950 for (std::pair<MCSymbol *, std::string> &TypeId : ParsedInfoData.
TypeIds)
6951 Data.TypeIds.push_back(std::move(TypeId));
6956void AMDGPUAsmParser::onEndOfFile() {
6958 getTargetStreamer().emitAMDGPUInfo(*InfoData);
6961bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6962 StringRef IDVal = DirectiveID.
getString();
6965 if (IDVal ==
".amdhsa_kernel")
6966 return ParseDirectiveAMDHSAKernel();
6968 if (IDVal ==
".amdhsa_code_object_version")
6969 return ParseDirectiveAMDHSACodeObjectVersion();
6973 return ParseDirectiveHSAMetadata();
6975 if (IDVal ==
".amd_kernel_code_t")
6976 return ParseDirectiveAMDKernelCodeT();
6978 if (IDVal ==
".amdgpu_hsa_kernel")
6979 return ParseDirectiveAMDGPUHsaKernel();
6981 if (IDVal ==
".amd_amdgpu_isa")
6982 return ParseDirectiveISAVersion();
6986 Twine(
" directive is "
6987 "not available on non-amdhsa OSes"))
6992 if (IDVal ==
".amdgcn_target")
6993 return ParseDirectiveAMDGCNTarget();
6995 if (IDVal ==
".amdgpu_lds")
6996 return ParseDirectiveAMDGPULDS();
6998 if (IDVal ==
".amdgpu_info")
6999 return ParseDirectiveAMDGPUInfo();
7002 return ParseDirectivePALMetadataBegin();
7005 return ParseDirectivePALMetadata();
7010bool AMDGPUAsmParser::subtargetHasRegister(
const MCRegisterInfo &MRI,
7017 return hasSGPR104_SGPR105();
7020 case SRC_SHARED_BASE_LO:
7021 case SRC_SHARED_BASE:
7022 case SRC_SHARED_LIMIT_LO:
7023 case SRC_SHARED_LIMIT:
7024 case SRC_PRIVATE_BASE_LO:
7025 case SRC_PRIVATE_BASE:
7026 case SRC_PRIVATE_LIMIT_LO:
7027 case SRC_PRIVATE_LIMIT:
7029 case SRC_FLAT_SCRATCH_BASE_LO:
7030 case SRC_FLAT_SCRATCH_BASE_HI:
7031 return hasGloballyAddressableScratch();
7032 case SRC_POPS_EXITING_WAVE_ID:
7044 return (
isVI() ||
isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
7074 return hasSGPR102_SGPR103();
7079ParseStatus AMDGPUAsmParser::parseOperand(
OperandVector &Operands,
7082 ParseStatus Res = parseVOPD(Operands);
7087 Res = MatchOperandParserImpl(Operands, Mnemonic);
7099 SMLoc LBraceLoc = getLoc();
7104 auto Loc = getLoc();
7105 Res = parseReg(Operands);
7107 Error(Loc,
"expected a register");
7111 RBraceLoc = getLoc();
7116 "expected a comma or a closing square bracket"))
7120 if (Operands.
size() - Prefix > 1) {
7122 AMDGPUOperand::CreateToken(
this,
"[", LBraceLoc));
7123 Operands.
push_back(AMDGPUOperand::CreateToken(
this,
"]", RBraceLoc));
7129 return parseRegOrImm(Operands);
7132StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
7134 setForcedEncodingSize(0);
7135 setForcedDPP(
false);
7136 setForcedSDWA(
false);
7138 if (
Name.consume_back(
"_e64_dpp")) {
7140 setForcedEncodingSize(64);
7143 if (
Name.consume_back(
"_e64")) {
7144 setForcedEncodingSize(64);
7147 if (
Name.consume_back(
"_e32")) {
7148 setForcedEncodingSize(32);
7151 if (
Name.consume_back(
"_dpp")) {
7155 if (
Name.consume_back(
"_sdwa")) {
7156 setForcedSDWA(
true);
7164 unsigned VariantID);
7170 Name = parseMnemonicSuffix(Name);
7176 Operands.
push_back(AMDGPUOperand::CreateToken(
this, Name, NameLoc));
7178 bool IsMIMG = Name.starts_with(
"image_");
7181 OperandMode
Mode = OperandMode_Default;
7183 Mode = OperandMode_NSA;
7187 checkUnsupportedInstruction(Name, NameLoc);
7188 if (!Parser.hasPendingError()) {
7191 :
"not a valid operand.";
7192 Error(getLoc(), Msg);
7211ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
7214 if (!trySkipId(Name))
7217 Operands.
push_back(AMDGPUOperand::CreateToken(
this, Name, S));
7221ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
const char *Prefix,
7230ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
7231 const char *Prefix,
OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7232 std::function<
bool(int64_t &)> ConvertResult) {
7236 ParseStatus Res = parseIntWithPrefix(Prefix,
Value);
7240 if (ConvertResult && !ConvertResult(
Value)) {
7241 Error(S,
"invalid " + StringRef(Prefix) +
" value.");
7244 Operands.
push_back(AMDGPUOperand::CreateImm(
this,
Value, S, ImmTy));
7248ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
7249 const char *Prefix,
OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7250 bool (*ConvertResult)(int64_t &)) {
7259 const unsigned MaxSize = 4;
7263 for (
int I = 0; ; ++
I) {
7265 SMLoc Loc = getLoc();
7269 if (
Op != 0 &&
Op != 1)
7270 return Error(Loc,
"invalid " + StringRef(Prefix) +
" value.");
7277 if (
I + 1 == MaxSize)
7278 return Error(getLoc(),
"expected a closing square bracket");
7284 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Val, S, ImmTy));
7288ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
7290 AMDGPUOperand::ImmTy ImmTy,
7291 bool IgnoreNegative) {
7295 if (trySkipId(Name)) {
7297 }
else if (trySkipId(
"no", Name)) {
7306 return Error(S,
"r128 modifier is not supported on this GPU");
7307 if (Name ==
"a16" && !
hasA16())
7308 return Error(S,
"a16 modifier is not supported on this GPU");
7310 if (Bit == 0 && Name ==
"gds") {
7311 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).
getToken();
7313 return Error(S,
"nogds is not allowed");
7316 if (
isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
7317 ImmTy = AMDGPUOperand::ImmTyR128A16;
7319 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Bit, S, ImmTy));
7323unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
7324 bool &Disabling)
const {
7325 Disabling =
Id.consume_front(
"no");
7328 return StringSwitch<unsigned>(Id)
7335 return StringSwitch<unsigned>(Id)
7343ParseStatus AMDGPUAsmParser::parseCPol(
OperandVector &Operands) {
7345 SMLoc StringLoc = getLoc();
7347 int64_t CPolVal = 0;
7356 ResTH = parseTH(Operands, TH);
7367 ResScope = parseScope(Operands, Scope);
7380 if (trySkipId(
"nv")) {
7384 }
else if (trySkipId(
"no",
"nv")) {
7391 if (trySkipId(
"scale_offset")) {
7395 }
else if (trySkipId(
"no",
"scale_offset")) {
7408 Operands.
push_back(AMDGPUOperand::CreateImm(
this, CPolVal, StringLoc,
7409 AMDGPUOperand::ImmTyCPol));
7413 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).
getToken();
7414 SMLoc OpLoc = getLoc();
7415 unsigned Enabled = 0, Seen = 0;
7419 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
7426 return Error(S,
"dlc modifier is not supported on this GPU");
7429 return Error(S,
"scc modifier is not supported on this GPU");
7432 return Error(S,
"duplicate cache policy modifier");
7444 AMDGPUOperand::CreateImm(
this,
Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
7448ParseStatus AMDGPUAsmParser::parseScope(
OperandVector &Operands,
7453 ParseStatus Res = parseStringOrIntWithPrefix(
7454 Operands,
"scope", {
"SCOPE_CU",
"SCOPE_SE",
"SCOPE_DEV",
"SCOPE_SYS"},
7463ParseStatus AMDGPUAsmParser::parseTH(
OperandVector &Operands, int64_t &TH) {
7468 ParseStatus Res = parseStringWithPrefix(
"th",
Value, StringLoc);
7472 if (
Value ==
"TH_DEFAULT")
7474 else if (
Value ==
"TH_STORE_LU" ||
Value ==
"TH_LOAD_WB" ||
7475 Value ==
"TH_LOAD_NT_WB") {
7476 return Error(StringLoc,
"invalid th value");
7477 }
else if (
Value.consume_front(
"TH_ATOMIC_")) {
7479 }
else if (
Value.consume_front(
"TH_LOAD_")) {
7481 }
else if (
Value.consume_front(
"TH_STORE_")) {
7484 return Error(StringLoc,
"invalid th value");
7487 if (
Value ==
"BYPASS")
7492 TH |= StringSwitch<int64_t>(
Value)
7502 .Default(0xffffffff);
7504 TH |= StringSwitch<int64_t>(
Value)
7515 .Default(0xffffffff);
7518 if (TH == 0xffffffff)
7519 return Error(StringLoc,
"invalid th value");
7526 AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
7527 AMDGPUOperand::ImmTy ImmT, int64_t
Default = 0,
7528 std::optional<unsigned> InsertAt = std::nullopt) {
7529 auto i = OptionalIdx.find(ImmT);
7530 if (i != OptionalIdx.end()) {
7531 unsigned Idx = i->second;
7532 const AMDGPUOperand &
Op =
7533 static_cast<const AMDGPUOperand &
>(*Operands[Idx]);
7537 Op.addImmOperands(Inst, 1);
7539 if (InsertAt.has_value())
7546ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
7552 StringLoc = getLoc();
7557ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7558 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7563 SMLoc StringLoc = getLoc();
7567 Value = getTokenStr();
7571 if (
Value == Ids[IntVal])
7576 if (IntVal < 0 || IntVal >= (int64_t)Ids.
size())
7577 return Error(StringLoc,
"invalid " + Twine(Name) +
" value");
7582ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7583 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7584 AMDGPUOperand::ImmTy
Type) {
7588 ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal);
7590 Operands.
push_back(AMDGPUOperand::CreateImm(
this, IntVal, S,
Type));
7599bool AMDGPUAsmParser::tryParseFmt(
const char *Pref,
7603 SMLoc Loc = getLoc();
7605 auto Res = parseIntWithPrefix(Pref, Val);
7611 if (Val < 0 || Val > MaxVal) {
7612 Error(Loc, Twine(
"out of range ", StringRef(Pref)));
7620ParseStatus AMDGPUAsmParser::tryParseIndexKey(
OperandVector &Operands,
7621 AMDGPUOperand::ImmTy ImmTy) {
7622 const char *Pref =
"index_key";
7624 SMLoc Loc = getLoc();
7625 auto Res = parseIntWithPrefix(Pref, ImmVal);
7629 if ((ImmTy == AMDGPUOperand::ImmTyIndexKey16bit ||
7630 ImmTy == AMDGPUOperand::ImmTyIndexKey32bit) &&
7631 (ImmVal < 0 || ImmVal > 1))
7632 return Error(Loc, Twine(
"out of range ", StringRef(Pref)));
7634 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
7635 return Error(Loc, Twine(
"out of range ", StringRef(Pref)));
7637 Operands.
push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc, ImmTy));
7641ParseStatus AMDGPUAsmParser::parseIndexKey8bit(
OperandVector &Operands) {
7642 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
7645ParseStatus AMDGPUAsmParser::parseIndexKey16bit(
OperandVector &Operands) {
7646 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
7649ParseStatus AMDGPUAsmParser::parseIndexKey32bit(
OperandVector &Operands) {
7650 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey32bit);
7653ParseStatus AMDGPUAsmParser::tryParseMatrixFMT(
OperandVector &Operands,
7655 AMDGPUOperand::ImmTy
Type) {
7660ParseStatus AMDGPUAsmParser::parseMatrixAFMT(
OperandVector &Operands) {
7661 return tryParseMatrixFMT(Operands,
"matrix_a_fmt",
7662 AMDGPUOperand::ImmTyMatrixAFMT);
7665ParseStatus AMDGPUAsmParser::parseMatrixBFMT(
OperandVector &Operands) {
7666 return tryParseMatrixFMT(Operands,
"matrix_b_fmt",
7667 AMDGPUOperand::ImmTyMatrixBFMT);
7670ParseStatus AMDGPUAsmParser::tryParseMatrixScale(
OperandVector &Operands,
7672 AMDGPUOperand::ImmTy
Type) {
7677ParseStatus AMDGPUAsmParser::parseMatrixAScale(
OperandVector &Operands) {
7678 return tryParseMatrixScale(Operands,
"matrix_a_scale",
7679 AMDGPUOperand::ImmTyMatrixAScale);
7682ParseStatus AMDGPUAsmParser::parseMatrixBScale(
OperandVector &Operands) {
7683 return tryParseMatrixScale(Operands,
"matrix_b_scale",
7684 AMDGPUOperand::ImmTyMatrixBScale);
7687ParseStatus AMDGPUAsmParser::tryParseMatrixScaleFmt(
OperandVector &Operands,
7689 AMDGPUOperand::ImmTy
Type) {
7694ParseStatus AMDGPUAsmParser::parseMatrixAScaleFmt(
OperandVector &Operands) {
7695 return tryParseMatrixScaleFmt(Operands,
"matrix_a_scale_fmt",
7696 AMDGPUOperand::ImmTyMatrixAScaleFmt);
7699ParseStatus AMDGPUAsmParser::parseMatrixBScaleFmt(
OperandVector &Operands) {
7700 return tryParseMatrixScaleFmt(Operands,
"matrix_b_scale_fmt",
7701 AMDGPUOperand::ImmTyMatrixBScaleFmt);
7706ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &
Format) {
7707 using namespace llvm::AMDGPU::MTBUFFormat;
7713 for (
int I = 0;
I < 2; ++
I) {
7714 if (Dfmt == DFMT_UNDEF && !tryParseFmt(
"dfmt", DFMT_MAX, Dfmt))
7717 if (Nfmt == NFMT_UNDEF && !tryParseFmt(
"nfmt", NFMT_MAX, Nfmt))
7722 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
7728 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
7731 Dfmt = (Dfmt ==
DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7732 Nfmt = (Nfmt ==
NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7738ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &
Format) {
7739 using namespace llvm::AMDGPU::MTBUFFormat;
7743 if (!tryParseFmt(
"format", UFMT_MAX, Fmt))
7746 if (Fmt == UFMT_UNDEF)
7753bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
7755 StringRef FormatStr,
7757 using namespace llvm::AMDGPU::MTBUFFormat;
7761 if (
Format != DFMT_UNDEF) {
7767 if (
Format != NFMT_UNDEF) {
7772 Error(Loc,
"unsupported format");
7776ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
7779 using namespace llvm::AMDGPU::MTBUFFormat;
7783 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
7788 SMLoc Loc = getLoc();
7789 if (!parseId(Str,
"expected a format string") ||
7790 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
7792 if (Dfmt == DFMT_UNDEF)
7793 return Error(Loc,
"duplicate numeric format");
7794 if (Nfmt == NFMT_UNDEF)
7795 return Error(Loc,
"duplicate data format");
7798 Dfmt = (Dfmt ==
DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7799 Nfmt = (Nfmt ==
NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7803 if (Ufmt == UFMT_UNDEF)
7804 return Error(FormatLoc,
"unsupported format");
7813ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
7816 using namespace llvm::AMDGPU::MTBUFFormat;
7819 if (Id == UFMT_UNDEF)
7823 return Error(Loc,
"unified format is not supported on this GPU");
7829ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &
Format) {
7830 using namespace llvm::AMDGPU::MTBUFFormat;
7831 SMLoc Loc = getLoc();
7836 return Error(Loc,
"out of range format");
7841ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &
Format) {
7842 using namespace llvm::AMDGPU::MTBUFFormat;
7848 StringRef FormatStr;
7849 SMLoc Loc = getLoc();
7850 if (!parseId(FormatStr,
"expected a format string"))
7853 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc,
Format);
7855 Res = parseSymbolicSplitFormat(FormatStr, Loc,
Format);
7865 return parseNumericFormat(
Format);
7868ParseStatus AMDGPUAsmParser::parseFORMAT(
OperandVector &Operands) {
7869 using namespace llvm::AMDGPU::MTBUFFormat;
7873 SMLoc Loc = getLoc();
7883 AMDGPUOperand::CreateImm(
this,
Format, Loc, AMDGPUOperand::ImmTyFORMAT));
7895 Res = parseRegOrImm(Operands);
7902 Res = parseSymbolicOrNumericFormat(
Format);
7907 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*Operands[
Size - 2]);
7908 assert(
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7915 return Error(getLoc(),
"duplicate format");
7919ParseStatus AMDGPUAsmParser::parseFlatOffset(
OperandVector &Operands) {
7921 parseIntWithPrefix(
"offset", Operands, AMDGPUOperand::ImmTyOffset);
7923 Res = parseIntWithPrefix(
"inst_offset", Operands,
7924 AMDGPUOperand::ImmTyInstOffset);
7929ParseStatus AMDGPUAsmParser::parseR128A16(
OperandVector &Operands) {
7931 parseNamedBit(
"r128", Operands, AMDGPUOperand::ImmTyR128A16);
7933 Res = parseNamedBit(
"a16", Operands, AMDGPUOperand::ImmTyA16);
7937ParseStatus AMDGPUAsmParser::parseBLGP(
OperandVector &Operands) {
7939 parseIntWithPrefix(
"blgp", Operands, AMDGPUOperand::ImmTyBLGP);
7942 parseOperandArrayWithPrefix(
"neg", Operands, AMDGPUOperand::ImmTyBLGP);
7951void AMDGPUAsmParser::cvtExp(MCInst &Inst,
const OperandVector &Operands) {
7952 OptionalImmIndexMap OptionalIdx;
7954 unsigned OperandIdx[4];
7955 unsigned EnMask = 0;
7958 for (
unsigned i = 1, e = Operands.
size(); i != e; ++i) {
7959 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
7964 OperandIdx[SrcIdx] = Inst.
size();
7965 Op.addRegOperands(Inst, 1);
7972 OperandIdx[SrcIdx] = Inst.
size();
7978 if (
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7979 Op.addImmOperands(Inst, 1);
7983 if (
Op.isToken() && (
Op.getToken() ==
"done" ||
Op.getToken() ==
"row_en"))
7987 OptionalIdx[
Op.getImmTy()] = i;
7993 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
8000 for (
auto i = 0; i < SrcIdx; ++i) {
8002 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
8027 IntVal =
encode(ISA, IntVal, CntVal);
8028 if (CntVal !=
decode(ISA, IntVal)) {
8030 IntVal =
encode(ISA, IntVal, -1);
8038bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
8040 SMLoc CntLoc = getLoc();
8041 StringRef CntName = getTokenStr();
8048 SMLoc ValLoc = getLoc();
8057 if (CntName ==
"vmcnt" || CntName ==
"vmcnt_sat") {
8059 }
else if (CntName ==
"expcnt" || CntName ==
"expcnt_sat") {
8061 }
else if (CntName ==
"lgkmcnt" || CntName ==
"lgkmcnt_sat") {
8064 Error(CntLoc,
"invalid counter name " + CntName);
8069 Error(ValLoc,
"too large value for " + CntName);
8078 Error(getLoc(),
"expected a counter name");
8086ParseStatus AMDGPUAsmParser::parseSWaitCnt(
OperandVector &Operands) {
8093 if (!parseCnt(Waitcnt))
8101 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Waitcnt, S));
8105bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
8106 SMLoc FieldLoc = getLoc();
8107 StringRef FieldName = getTokenStr();
8112 SMLoc ValueLoc = getLoc();
8119 if (FieldName ==
"instid0") {
8121 }
else if (FieldName ==
"instskip") {
8123 }
else if (FieldName ==
"instid1") {
8126 Error(FieldLoc,
"invalid field name " + FieldName);
8145 .Case(
"VALU_DEP_1", 1)
8146 .Case(
"VALU_DEP_2", 2)
8147 .Case(
"VALU_DEP_3", 3)
8148 .Case(
"VALU_DEP_4", 4)
8149 .Case(
"TRANS32_DEP_1", 5)
8150 .Case(
"TRANS32_DEP_2", 6)
8151 .Case(
"TRANS32_DEP_3", 7)
8152 .Case(
"FMA_ACCUM_CYCLE_1", 8)
8153 .Case(
"SALU_CYCLE_1", 9)
8154 .Case(
"SALU_CYCLE_2", 10)
8155 .Case(
"SALU_CYCLE_3", 11)
8163 Delay |=
Value << Shift;
8167ParseStatus AMDGPUAsmParser::parseSDelayALU(
OperandVector &Operands) {
8173 if (!parseDelay(Delay))
8181 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Delay, S));
8186AMDGPUOperand::isSWaitCnt()
const {
8190bool AMDGPUOperand::isSDelayALU()
const {
return isImm(); }
8196void AMDGPUAsmParser::depCtrError(SMLoc Loc,
int ErrorId,
8197 StringRef DepCtrName) {
8200 Error(Loc, Twine(
"invalid counter name ", DepCtrName));
8203 Error(Loc, Twine(DepCtrName,
" is not supported on this GPU"));
8206 Error(Loc, Twine(
"duplicate counter name ", DepCtrName));
8209 Error(Loc, Twine(
"invalid value for ", DepCtrName));
8216bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr,
unsigned &UsedOprMask) {
8218 using namespace llvm::AMDGPU::DepCtr;
8220 SMLoc DepCtrLoc = getLoc();
8221 StringRef DepCtrName = getTokenStr();
8231 unsigned PrevOprMask = UsedOprMask;
8232 int CntVal =
encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
8235 depCtrError(DepCtrLoc, CntVal, DepCtrName);
8244 Error(getLoc(),
"expected a counter name");
8249 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
8250 DepCtr = (DepCtr & ~CntValMask) | CntVal;
8254ParseStatus AMDGPUAsmParser::parseDepCtr(
OperandVector &Operands) {
8255 using namespace llvm::AMDGPU::DepCtr;
8258 SMLoc Loc = getLoc();
8261 unsigned UsedOprMask = 0;
8263 if (!parseDepCtr(DepCtr, UsedOprMask))
8271 Operands.
push_back(AMDGPUOperand::CreateImm(
this, DepCtr, Loc));
8275bool AMDGPUOperand::isDepCtr()
const {
return isS16Imm(); }
8281ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
8283 OperandInfoTy &Width) {
8284 using namespace llvm::AMDGPU::Hwreg;
8290 HwReg.Loc = getLoc();
8293 HwReg.IsSymbolic =
true;
8295 }
else if (!
parseExpr(HwReg.Val,
"a register name")) {
8303 if (!skipToken(
AsmToken::Comma,
"expected a comma or a closing parenthesis"))
8313 Width.Loc = getLoc();
8321ParseStatus AMDGPUAsmParser::parseHwreg(
OperandVector &Operands) {
8322 using namespace llvm::AMDGPU::Hwreg;
8325 SMLoc Loc = getLoc();
8327 StructuredOpField HwReg(
"id",
"hardware register", HwregId::Width,
8329 StructuredOpField
Offset(
"offset",
"bit offset", HwregOffset::Width,
8330 HwregOffset::Default);
8331 struct : StructuredOpField {
8332 using StructuredOpField::StructuredOpField;
8333 bool validate(AMDGPUAsmParser &Parser)
const override {
8335 return Error(Parser,
"only values from 1 to 32 are legal");
8338 } Width(
"size",
"bitfield width", HwregSize::Width, HwregSize::Default);
8339 ParseStatus Res = parseStructuredOpFields({&HwReg, &
Offset, &Width});
8342 Res = parseHwregFunc(HwReg,
Offset, Width);
8345 if (!validateStructuredOpFields({&HwReg, &
Offset, &Width}))
8347 ImmVal = HwregEncoding::encode(HwReg.Val,
Offset.Val, Width.Val);
8351 parseExpr(ImmVal,
"a hwreg macro, structured immediate"))
8358 return Error(Loc,
"invalid immediate: only 16-bit values are legal");
8360 AMDGPUOperand::CreateImm(
this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
8364bool AMDGPUOperand::isHwreg()
const {
8365 return isImmTy(ImmTyHwreg);
8373AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
8375 OperandInfoTy &Stream) {
8376 using namespace llvm::AMDGPU::SendMsg;
8381 Msg.IsSymbolic =
true;
8383 }
else if (!
parseExpr(Msg.Val,
"a message name")) {
8388 Op.IsDefined =
true;
8391 (
Op.Val =
getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
8394 }
else if (!
parseExpr(
Op.Val,
"an operation name")) {
8399 Stream.IsDefined =
true;
8400 Stream.Loc = getLoc();
8410AMDGPUAsmParser::validateSendMsg(
const OperandInfoTy &Msg,
8411 const OperandInfoTy &
Op,
8412 const OperandInfoTy &Stream) {
8413 using namespace llvm::AMDGPU::SendMsg;
8418 bool Strict = Msg.IsSymbolic;
8422 Error(Msg.Loc,
"specified message id is not supported on this GPU");
8427 Error(Msg.Loc,
"invalid message id");
8433 Error(
Op.Loc,
"message does not support operations");
8435 Error(Msg.Loc,
"missing message operation");
8441 Error(
Op.Loc,
"specified operation id is not supported on this GPU");
8443 Error(
Op.Loc,
"invalid operation id");
8448 Error(Stream.Loc,
"message operation does not support streams");
8452 Error(Stream.Loc,
"invalid message stream id");
8458ParseStatus AMDGPUAsmParser::parseSendMsg(
OperandVector &Operands) {
8459 using namespace llvm::AMDGPU::SendMsg;
8462 SMLoc Loc = getLoc();
8466 OperandInfoTy
Op(OP_NONE_);
8467 OperandInfoTy Stream(STREAM_ID_NONE_);
8468 if (parseSendMsgBody(Msg,
Op, Stream) &&
8469 validateSendMsg(Msg,
Op, Stream)) {
8474 }
else if (
parseExpr(ImmVal,
"a sendmsg macro")) {
8476 return Error(Loc,
"invalid immediate: only 16-bit values are legal");
8481 Operands.
push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
8485bool AMDGPUOperand::isSendMsg()
const {
8486 return isImmTy(ImmTySendMsg);
8489ParseStatus AMDGPUAsmParser::parseWaitEvent(
OperandVector &Operands) {
8490 using namespace llvm::AMDGPU::WaitEvent;
8492 SMLoc Loc = getLoc();
8495 StructuredOpField DontWaitExportReady(
"dont_wait_export_ready",
"bit value",
8497 StructuredOpField ExportReady(
"export_ready",
"bit value", 1, 0);
8499 StructuredOpField *TargetBitfield =
8500 isGFX11() ? &DontWaitExportReady : &ExportReady;
8502 ParseStatus Res = parseStructuredOpFields({TargetBitfield});
8506 if (!validateStructuredOpFields({TargetBitfield}))
8508 ImmVal = TargetBitfield->Val;
8515 return Error(Loc,
"invalid immediate: only 16-bit values are legal");
8517 Operands.
push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc,
8518 AMDGPUOperand::ImmTyWaitEvent));
8522bool AMDGPUOperand::isWaitEvent()
const {
return isImmTy(ImmTyWaitEvent); }
8528ParseStatus AMDGPUAsmParser::parseInterpSlot(
OperandVector &Operands) {
8535 int Slot = StringSwitch<int>(Str)
8542 return Error(S,
"invalid interpolation slot");
8544 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Slot, S,
8545 AMDGPUOperand::ImmTyInterpSlot));
8549ParseStatus AMDGPUAsmParser::parseInterpAttr(
OperandVector &Operands) {
8556 if (!Str.starts_with(
"attr"))
8557 return Error(S,
"invalid interpolation attribute");
8559 StringRef Chan = Str.take_back(2);
8560 int AttrChan = StringSwitch<int>(Chan)
8567 return Error(S,
"invalid or missing interpolation attribute channel");
8569 Str = Str.drop_back(2).drop_front(4);
8572 if (Str.getAsInteger(10, Attr))
8573 return Error(S,
"invalid or missing interpolation attribute number");
8576 return Error(S,
"out of bounds interpolation attribute number");
8580 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Attr, S,
8581 AMDGPUOperand::ImmTyInterpAttr));
8582 Operands.
push_back(AMDGPUOperand::CreateImm(
8583 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
8591ParseStatus AMDGPUAsmParser::parseExpTgt(
OperandVector &Operands) {
8592 using namespace llvm::AMDGPU::Exp;
8602 return Error(S, (Id == ET_INVALID)
8603 ?
"invalid exp target"
8604 :
"exp target is not supported on this GPU");
8606 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Id, S,
8607 AMDGPUOperand::ImmTyExpTgt));
8616AMDGPUAsmParser::isId(
const AsmToken &Token,
const StringRef Id)
const {
8621AMDGPUAsmParser::isId(
const StringRef Id)
const {
8627 return getTokenKind() ==
Kind;
8630StringRef AMDGPUAsmParser::getId()
const {
8635AMDGPUAsmParser::trySkipId(
const StringRef Id) {
8644AMDGPUAsmParser::trySkipId(
const StringRef Pref,
const StringRef Id) {
8646 StringRef Tok = getTokenStr();
8657 if (isId(Id) && peekToken().is(Kind)) {
8667 if (isToken(Kind)) {
8676 const StringRef ErrMsg) {
8677 if (!trySkipToken(Kind)) {
8678 Error(getLoc(), ErrMsg);
8685AMDGPUAsmParser::parseExpr(int64_t &
Imm, StringRef Expected) {
8689 if (Parser.parseExpression(Expr))
8692 if (Expr->evaluateAsAbsolute(
Imm))
8695 if (Expected.empty()) {
8696 Error(S,
"expected absolute expression");
8698 Error(S, Twine(
"expected ", Expected) +
8699 Twine(
" or an absolute expression"));
8709 if (Parser.parseExpression(Expr))
8713 if (Expr->evaluateAsAbsolute(IntVal)) {
8714 Operands.
push_back(AMDGPUOperand::CreateImm(
this, IntVal, S));
8716 Operands.
push_back(AMDGPUOperand::CreateExpr(
this, Expr, S));
8722AMDGPUAsmParser::parseString(StringRef &Val,
const StringRef ErrMsg) {
8724 Val =
getToken().getStringContents();
8728 Error(getLoc(), ErrMsg);
8733AMDGPUAsmParser::parseId(StringRef &Val,
const StringRef ErrMsg) {
8735 Val = getTokenStr();
8739 if (!ErrMsg.
empty())
8740 Error(getLoc(), ErrMsg);
8745AMDGPUAsmParser::getToken()
const {
8746 return Parser.getTok();
8749AsmToken AMDGPUAsmParser::peekToken(
bool ShouldSkipSpace) {
8752 : getLexer().peekTok(ShouldSkipSpace);
8757 auto TokCount = getLexer().peekTokens(Tokens);
8759 for (
auto Idx = TokCount; Idx < Tokens.
size(); ++Idx)
8764AMDGPUAsmParser::getTokenKind()
const {
8765 return getLexer().getKind();
8769AMDGPUAsmParser::getLoc()
const {
8774AMDGPUAsmParser::getTokenStr()
const {
8779AMDGPUAsmParser::lex() {
8783const AMDGPUOperand &
8784AMDGPUAsmParser::findMCOperand(
const OperandVector &Operands,
8785 int MCOpIdx)
const {
8786 for (
const auto &
Op : Operands) {
8787 const AMDGPUOperand &TargetOp =
static_cast<AMDGPUOperand &
>(*Op);
8788 if (TargetOp.getMCOpIdx() == MCOpIdx)
8794SMLoc AMDGPUAsmParser::getInstLoc(
const OperandVector &Operands)
const {
8795 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
8799SMLoc AMDGPUAsmParser::getLaterLoc(SMLoc a, SMLoc b) {
8803SMLoc AMDGPUAsmParser::getOperandLoc(
const OperandVector &Operands,
8804 int MCOpIdx)
const {
8805 return findMCOperand(Operands, MCOpIdx).getStartLoc();
8809AMDGPUAsmParser::getOperandLoc(std::function<
bool(
const AMDGPUOperand&)>
Test,
8811 for (
unsigned i = Operands.
size() - 1; i > 0; --i) {
8812 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
8814 return Op.getStartLoc();
8816 return getInstLoc(Operands);
8820AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy
Type,
8822 auto Test = [=](
const AMDGPUOperand&
Op) {
return Op.isImmTy(
Type); };
8823 return getOperandLoc(
Test, Operands);
8837 StringRef
Id = getTokenStr();
8838 SMLoc IdLoc = getLoc();
8844 find_if(Fields, [Id](StructuredOpField *
F) {
return F->Id ==
Id; });
8845 if (
I == Fields.
end())
8846 return Error(IdLoc,
"unknown field");
8847 if ((*I)->IsDefined)
8848 return Error(IdLoc,
"duplicate field");
8851 (*I)->Loc = getLoc();
8854 (*I)->IsDefined =
true;
8861bool AMDGPUAsmParser::validateStructuredOpFields(
8863 return all_of(Fields, [
this](
const StructuredOpField *
F) {
8864 return F->validate(*
this);
8875 const unsigned OrMask,
8876 const unsigned XorMask) {
8885bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &
Op,
const unsigned MinVal,
8886 const unsigned MaxVal,
8887 const Twine &ErrMsg, SMLoc &Loc) {
8904AMDGPUAsmParser::parseSwizzleOperands(
const unsigned OpNum, int64_t*
Op,
8905 const unsigned MinVal,
8906 const unsigned MaxVal,
8907 const StringRef ErrMsg) {
8909 for (
unsigned i = 0; i < OpNum; ++i) {
8910 if (!parseSwizzleOperand(
Op[i], MinVal, MaxVal, ErrMsg, Loc))
8918AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &
Imm) {
8919 using namespace llvm::AMDGPU::Swizzle;
8922 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
8923 "expected a 2-bit lane id")) {
8934AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &
Imm) {
8935 using namespace llvm::AMDGPU::Swizzle;
8941 if (!parseSwizzleOperand(GroupSize,
8943 "group size must be in the interval [2,32]",
8948 Error(Loc,
"group size must be a power of two");
8951 if (parseSwizzleOperand(LaneIdx,
8953 "lane id must be in the interval [0,group size - 1]",
8962AMDGPUAsmParser::parseSwizzleReverse(int64_t &
Imm) {
8963 using namespace llvm::AMDGPU::Swizzle;
8968 if (!parseSwizzleOperand(GroupSize,
8970 "group size must be in the interval [2,32]",
8975 Error(Loc,
"group size must be a power of two");
8984AMDGPUAsmParser::parseSwizzleSwap(int64_t &
Imm) {
8985 using namespace llvm::AMDGPU::Swizzle;
8990 if (!parseSwizzleOperand(GroupSize,
8992 "group size must be in the interval [1,16]",
8997 Error(Loc,
"group size must be a power of two");
9006AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &
Imm) {
9007 using namespace llvm::AMDGPU::Swizzle;
9014 SMLoc StrLoc = getLoc();
9015 if (!parseString(Ctl)) {
9018 if (Ctl.
size() != BITMASK_WIDTH) {
9019 Error(StrLoc,
"expected a 5-character mask");
9023 unsigned AndMask = 0;
9024 unsigned OrMask = 0;
9025 unsigned XorMask = 0;
9027 for (
size_t i = 0; i < Ctl.
size(); ++i) {
9031 Error(StrLoc,
"invalid mask");
9052bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &
Imm) {
9053 using namespace llvm::AMDGPU::Swizzle;
9056 Error(getLoc(),
"FFT mode swizzle not supported on this GPU");
9062 if (!parseSwizzleOperand(Swizzle, 0, FFT_SWIZZLE_MAX,
9063 "FFT swizzle must be in the interval [0," +
9064 Twine(FFT_SWIZZLE_MAX) + Twine(
']'),
9072bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &
Imm) {
9073 using namespace llvm::AMDGPU::Swizzle;
9076 Error(getLoc(),
"Rotate mode swizzle not supported on this GPU");
9083 if (!parseSwizzleOperand(
Direction, 0, 1,
9084 "direction must be 0 (left) or 1 (right)", Loc))
9088 if (!parseSwizzleOperand(
9089 RotateSize, 0, ROTATE_MAX_SIZE,
9090 "number of threads to rotate must be in the interval [0," +
9091 Twine(ROTATE_MAX_SIZE) + Twine(
']'),
9096 (RotateSize << ROTATE_SIZE_SHIFT);
9101AMDGPUAsmParser::parseSwizzleOffset(int64_t &
Imm) {
9103 SMLoc OffsetLoc = getLoc();
9109 Error(OffsetLoc,
"expected a 16-bit offset");
9116AMDGPUAsmParser::parseSwizzleMacro(int64_t &
Imm) {
9117 using namespace llvm::AMDGPU::Swizzle;
9121 SMLoc ModeLoc = getLoc();
9124 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
9125 Ok = parseSwizzleQuadPerm(
Imm);
9126 }
else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
9127 Ok = parseSwizzleBitmaskPerm(
Imm);
9128 }
else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
9129 Ok = parseSwizzleBroadcast(
Imm);
9130 }
else if (trySkipId(IdSymbolic[ID_SWAP])) {
9131 Ok = parseSwizzleSwap(
Imm);
9132 }
else if (trySkipId(IdSymbolic[ID_REVERSE])) {
9133 Ok = parseSwizzleReverse(
Imm);
9134 }
else if (trySkipId(IdSymbolic[ID_FFT])) {
9135 Ok = parseSwizzleFFT(
Imm);
9136 }
else if (trySkipId(IdSymbolic[ID_ROTATE])) {
9137 Ok = parseSwizzleRotate(
Imm);
9139 Error(ModeLoc,
"expected a swizzle mode");
9142 return Ok && skipToken(
AsmToken::RParen,
"expected a closing parentheses");
9148ParseStatus AMDGPUAsmParser::parseSwizzle(
OperandVector &Operands) {
9152 if (trySkipId(
"offset")) {
9156 if (trySkipId(
"swizzle")) {
9157 Ok = parseSwizzleMacro(
Imm);
9159 Ok = parseSwizzleOffset(
Imm);
9163 Operands.
push_back(AMDGPUOperand::CreateImm(
this,
Imm, S, AMDGPUOperand::ImmTySwizzle));
9171AMDGPUOperand::isSwizzle()
const {
9172 return isImmTy(ImmTySwizzle);
9179int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
9181 using namespace llvm::AMDGPU::VGPRIndexMode;
9193 for (
unsigned ModeId = ID_MIN; ModeId <=
ID_MAX; ++ModeId) {
9194 if (trySkipId(IdSymbolic[ModeId])) {
9202 "expected a VGPR index mode or a closing parenthesis" :
9203 "expected a VGPR index mode");
9208 Error(S,
"duplicate VGPR index mode");
9216 "expected a comma or a closing parenthesis"))
9223ParseStatus AMDGPUAsmParser::parseGPRIdxMode(
OperandVector &Operands) {
9225 using namespace llvm::AMDGPU::VGPRIndexMode;
9231 Imm = parseGPRIdxMacro();
9235 if (getParser().parseAbsoluteExpression(
Imm))
9238 return Error(S,
"invalid immediate: only 4-bit values are legal");
9242 AMDGPUOperand::CreateImm(
this,
Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
9246bool AMDGPUOperand::isGPRIdxMode()
const {
9247 return isImmTy(ImmTyGprIdxMode);
9254ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(
OperandVector &Operands) {
9259 if (isRegister() || isModifier())
9265 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.
size() - 1]);
9266 assert(Opr.isImm() || Opr.isExpr());
9267 SMLoc Loc = Opr.getStartLoc();
9271 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
9272 Error(Loc,
"expected an absolute expression or a label");
9273 }
else if (Opr.isImm() && !Opr.isS16Imm()) {
9274 Error(Loc,
"expected a 16-bit signed jump offset");
9284ParseStatus AMDGPUAsmParser::parseBoolReg(
OperandVector &Operands) {
9285 return parseReg(Operands);
9292void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
9295 OptionalImmIndexMap OptionalIdx;
9296 unsigned FirstOperandIdx = 1;
9297 bool IsAtomicReturn =
false;
9304 for (
unsigned i = FirstOperandIdx, e = Operands.
size(); i != e; ++i) {
9305 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
9309 Op.addRegOperands(Inst, 1);
9313 if (IsAtomicReturn && i == FirstOperandIdx)
9314 Op.addRegOperands(Inst, 1);
9319 if (
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
9320 Op.addImmOperands(Inst, 1);
9332 OptionalIdx[
Op.getImmTy()] = i;
9346bool AMDGPUOperand::isSMRDOffset8()
const {
9350bool AMDGPUOperand::isSMEMOffset()
const {
9352 return isImmLiteral();
9355bool AMDGPUOperand::isSMRDLiteralOffset()
const {
9390bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
9391 if (BoundCtrl == 0 || BoundCtrl == 1) {
9399void AMDGPUAsmParser::onBeginOfFile() {
9400 if (!getParser().getStreamer().getTargetStreamer() ||
9404 if (!getTargetStreamer().getTargetID())
9405 getTargetStreamer().initializeTargetID(getSTI(),
9406 getSTI().getFeatureString());
9409 getTargetStreamer().EmitDirectiveAMDGCNTarget();
9418bool AMDGPUAsmParser::parsePrimaryExpr(
const MCExpr *&Res, SMLoc &EndLoc) {
9422 StringRef TokenId = getTokenStr();
9423 AGVK VK = StringSwitch<AGVK>(TokenId)
9424 .Case(
"max", AGVK::AGVK_Max)
9425 .Case(
"min", AGVK::AGVK_Min)
9426 .Case(
"or", AGVK::AGVK_Or)
9427 .Case(
"extrasgprs", AGVK::AGVK_ExtraSGPRs)
9428 .Case(
"totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
9429 .Case(
"alignto", AGVK::AGVK_AlignTo)
9430 .Case(
"occupancy", AGVK::AGVK_Occupancy)
9431 .Case(
"instprefsize", AGVK::AGVK_InstPrefSize)
9432 .Default(AGVK::AGVK_None);
9436 uint64_t CommaCount = 0;
9441 if (Exprs.
empty()) {
9443 "empty " + Twine(TokenId) +
" expression");
9446 if (CommaCount + 1 != Exprs.
size()) {
9448 "mismatch of commas in " + Twine(TokenId) +
" expression");
9452 Expected && Exprs.
size() != Expected) {
9453 Error(
getToken().getLoc(), Twine(TokenId) +
" expression expects " +
9454 Twine(Expected) +
" operands");
9461 if (getParser().parseExpression(Expr, EndLoc))
9465 if (LastTokenWasComma)
9469 "unexpected token in " + Twine(TokenId) +
" expression");
9475 return getParser().parsePrimaryExpr(Res, EndLoc,
nullptr);
9478ParseStatus AMDGPUAsmParser::parseOModSI(
OperandVector &Operands) {
9479 StringRef
Name = getTokenStr();
9480 if (Name ==
"mul") {
9481 return parseIntWithPrefix(
"mul", Operands,
9485 if (Name ==
"div") {
9486 return parseIntWithPrefix(
"div", Operands,
9497 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
9502 const AMDGPU::OpName
Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9503 AMDGPU::OpName::src2};
9511 int DstIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
9516 int ModIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0_modifiers);
9518 if (
DstOp.isReg() &&
9523 if ((OpSel & (1 << SrcNum)) != 0)
9529void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
9531 cvtVOP3P(Inst, Operands);
9535void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
const OperandVector &Operands,
9536 OptionalImmIndexMap &OptionalIdx) {
9537 cvtVOP3P(Inst, Operands, OptionalIdx);
9546 &&
Desc.NumOperands > (OpNum + 1)
9548 &&
Desc.operands()[OpNum + 1].RegClass != -1
9550 &&
Desc.getOperandConstraint(OpNum + 1,
9554void AMDGPUAsmParser::cvtOpSelHelper(MCInst &Inst,
unsigned OpSel) {
9556 constexpr AMDGPU::OpName
Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9557 AMDGPU::OpName::src2};
9558 constexpr AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9559 AMDGPU::OpName::src1_modifiers,
9560 AMDGPU::OpName::src2_modifiers};
9561 for (
int J = 0; J < 3; ++J) {
9562 int OpIdx = AMDGPU::getNamedOperandIdx(
Opc,
Ops[J]);
9568 int ModIdx = AMDGPU::getNamedOperandIdx(
Opc, ModOps[J]);
9571 if ((OpSel & (1 << J)) != 0)
9574 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && (OpSel & (1 << 3)) != 0)
9581void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst,
const OperandVector &Operands)
9583 OptionalImmIndexMap OptionalIdx;
9588 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9589 ((AMDGPUOperand &)*Operands[
I++]).addRegOperands(Inst, 1);
9592 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
9593 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[
I]);
9595 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9596 }
else if (
Op.isInterpSlot() ||
Op.isInterpAttr() ||
9597 Op.isInterpAttrChan()) {
9599 }
else if (
Op.isImmModifier()) {
9600 OptionalIdx[
Op.getImmTy()] =
I;
9608 AMDGPUOperand::ImmTyHigh);
9612 AMDGPUOperand::ImmTyClamp);
9616 AMDGPUOperand::ImmTyOModSI);
9621 AMDGPUOperand::ImmTyOpSel);
9622 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
9625 cvtOpSelHelper(Inst, OpSel);
9629void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst,
const OperandVector &Operands)
9631 OptionalImmIndexMap OptionalIdx;
9636 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9637 ((AMDGPUOperand &)*Operands[
I++]).addRegOperands(Inst, 1);
9640 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
9641 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[
I]);
9643 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9644 }
else if (
Op.isImmModifier()) {
9645 OptionalIdx[
Op.getImmTy()] =
I;
9653 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
9663 cvtOpSelHelper(Inst, OpSel);
9666void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
9668 OptionalImmIndexMap OptionalIdx;
9671 int CbszOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::cbsz);
9675 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J)
9676 static_cast<AMDGPUOperand &
>(*Operands[
I++]).addRegOperands(Inst, 1);
9678 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
9679 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*Operands[
I]);
9684 if (NumOperands == CbszOpIdx) {
9689 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9690 }
else if (
Op.isImmModifier()) {
9691 OptionalIdx[
Op.getImmTy()] =
I;
9693 Op.addRegOrImmOperands(Inst, 1);
9698 auto CbszIdx = OptionalIdx.find(AMDGPUOperand::ImmTyCBSZ);
9699 if (CbszIdx != OptionalIdx.end()) {
9700 int CbszVal = ((AMDGPUOperand &)*Operands[CbszIdx->second]).
getImm();
9704 int BlgpOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::blgp);
9705 auto BlgpIdx = OptionalIdx.find(AMDGPUOperand::ImmTyBLGP);
9706 if (BlgpIdx != OptionalIdx.end()) {
9707 int BlgpVal = ((AMDGPUOperand &)*Operands[BlgpIdx->second]).
getImm();
9718 auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel);
9719 if (OpselIdx != OptionalIdx.end()) {
9720 OpSel =
static_cast<const AMDGPUOperand &
>(*Operands[OpselIdx->second])
9724 unsigned OpSelHi = 0;
9725 auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi);
9726 if (OpselHiIdx != OptionalIdx.end()) {
9727 OpSelHi =
static_cast<const AMDGPUOperand &
>(*Operands[OpselHiIdx->second])
9730 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9731 AMDGPU::OpName::src1_modifiers};
9733 for (
unsigned J = 0; J < 2; ++J) {
9734 unsigned ModVal = 0;
9735 if (OpSel & (1 << J))
9737 if (OpSelHi & (1 << J))
9740 const int ModIdx = AMDGPU::getNamedOperandIdx(
Opc, ModOps[J]);
9745void AMDGPUAsmParser::cvtVOP3(MCInst &Inst,
const OperandVector &Operands,
9746 OptionalImmIndexMap &OptionalIdx) {
9751 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9752 ((AMDGPUOperand &)*Operands[
I++]).addRegOperands(Inst, 1);
9755 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
9756 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[
I]);
9758 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9759 }
else if (
Op.isImmModifier()) {
9760 OptionalIdx[
Op.getImmTy()] =
I;
9762 Op.addRegOrImmOperands(Inst, 1);
9768 AMDGPUOperand::ImmTyScaleSel);
9772 AMDGPUOperand::ImmTyClamp);
9778 AMDGPUOperand::ImmTyByteSel);
9783 AMDGPUOperand::ImmTyOModSI);
9790 auto *it = Inst.
begin();
9791 std::advance(it, AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2_modifiers));
9799void AMDGPUAsmParser::cvtVOP3(MCInst &Inst,
const OperandVector &Operands) {
9800 OptionalImmIndexMap OptionalIdx;
9801 cvtVOP3(Inst, Operands, OptionalIdx);
9804void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
const OperandVector &Operands,
9805 OptionalImmIndexMap &OptIdx) {
9811 if (
Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
9812 Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
9813 Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
9814 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
9815 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx11 ||
9816 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx11 ||
9817 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
9818 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12 ||
9819 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx13 ||
9820 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx13) {
9829 int VdstInIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
9830 if (VdstInIdx != -1 && VdstInIdx ==
static_cast<int>(Inst.
getNumOperands()))
9833 int BitOp3Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::bitop3);
9834 if (BitOp3Idx != -1) {
9841 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
9842 if (OpSelIdx != -1) {
9846 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel_hi);
9847 if (OpSelHiIdx != -1) {
9848 int DefaultVal =
IsPacked ? -1 : 0;
9854 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_a_fmt);
9855 if (MatrixAFMTIdx != -1) {
9857 AMDGPUOperand::ImmTyMatrixAFMT, 0);
9861 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_b_fmt);
9862 if (MatrixBFMTIdx != -1) {
9864 AMDGPUOperand::ImmTyMatrixBFMT, 0);
9867 int MatrixAScaleIdx =
9868 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_a_scale);
9869 if (MatrixAScaleIdx != -1) {
9871 AMDGPUOperand::ImmTyMatrixAScale, 0);
9874 int MatrixBScaleIdx =
9875 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_b_scale);
9876 if (MatrixBScaleIdx != -1) {
9878 AMDGPUOperand::ImmTyMatrixBScale, 0);
9881 int MatrixAScaleFmtIdx =
9882 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_a_scale_fmt);
9883 if (MatrixAScaleFmtIdx != -1) {
9885 AMDGPUOperand::ImmTyMatrixAScaleFmt, 0);
9888 int MatrixBScaleFmtIdx =
9889 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_b_scale_fmt);
9890 if (MatrixBScaleFmtIdx != -1) {
9892 AMDGPUOperand::ImmTyMatrixBScaleFmt, 0);
9897 AMDGPUOperand::ImmTyMatrixAReuse, 0);
9901 AMDGPUOperand::ImmTyMatrixBReuse, 0);
9903 int NegLoIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::neg_lo);
9907 int NegHiIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::neg_hi);
9911 const AMDGPU::OpName
Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9912 AMDGPU::OpName::src2};
9913 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9914 AMDGPU::OpName::src1_modifiers,
9915 AMDGPU::OpName::src2_modifiers};
9918 unsigned OpSelHi = 0;
9925 if (OpSelHiIdx != -1)
9934 for (
int J = 0; J < 3; ++J) {
9935 int OpIdx = AMDGPU::getNamedOperandIdx(
Opc,
Ops[J]);
9939 int ModIdx = AMDGPU::getNamedOperandIdx(
Opc, ModOps[J]);
9944 uint32_t ModVal = 0;
9947 if (SrcOp.
isReg() && getMRI()
9954 if ((OpSel & (1 << J)) != 0)
9958 if ((OpSelHi & (1 << J)) != 0)
9961 if ((NegLo & (1 << J)) != 0)
9964 if ((NegHi & (1 << J)) != 0)
9971void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
const OperandVector &Operands) {
9972 OptionalImmIndexMap OptIdx;
9973 cvtVOP3(Inst, Operands, OptIdx);
9974 cvtVOP3P(Inst, Operands, OptIdx);
9978 unsigned i,
unsigned Opc,
9980 if (AMDGPU::getNamedOperandIdx(
Opc,
OpName) != -1)
9981 ((AMDGPUOperand &)*
Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
9983 ((AMDGPUOperand &)*
Operands[i]).addRegOperands(Inst, 1);
9986void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst,
const OperandVector &Operands) {
9989 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
9992 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
9993 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1);
9995 OptionalImmIndexMap OptIdx;
9996 for (
unsigned i = 5; i < Operands.
size(); ++i) {
9997 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
9998 OptIdx[
Op.getImmTy()] = i;
10003 AMDGPUOperand::ImmTyIndexKey8bit);
10007 AMDGPUOperand::ImmTyIndexKey16bit);
10011 AMDGPUOperand::ImmTyIndexKey32bit);
10016 cvtVOP3P(Inst, Operands, OptIdx);
10023ParseStatus AMDGPUAsmParser::parseVOPD(
OperandVector &Operands) {
10028 SMLoc S = getLoc();
10031 Operands.
push_back(AMDGPUOperand::CreateToken(
this,
"::", S));
10032 SMLoc OpYLoc = getLoc();
10035 Operands.
push_back(AMDGPUOperand::CreateToken(
this, OpYName, OpYLoc));
10038 return Error(OpYLoc,
"expected a VOPDY instruction after ::");
10044void AMDGPUAsmParser::cvtVOPD(MCInst &Inst,
const OperandVector &Operands) {
10047 auto addOp = [&](uint16_t ParsedOprIdx) {
10048 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
10050 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
10054 Op.addRegOperands(Inst, 1);
10058 Op.addImmOperands(Inst, 1);
10070 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
10074 const auto &CInfo = InstInfo[CompIdx];
10075 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
10076 for (
unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
10077 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
10078 if (CInfo.hasSrc2Acc())
10079 addOp(CInfo.getIndexOfDstInParsedOperands());
10083 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::bitop3);
10084 if (BitOp3Idx != -1) {
10085 OptionalImmIndexMap OptIdx;
10086 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands.
back());
10088 OptIdx[
Op.getImmTy()] = Operands.
size() - 1;
10098bool AMDGPUOperand::isDPP8()
const {
10099 return isImmTy(ImmTyDPP8);
10102bool AMDGPUOperand::isDPPCtrl()
const {
10103 using namespace AMDGPU::DPP;
10105 bool result = isImm() && getImmTy() == ImmTyDppCtrl &&
isUInt<9>(
getImm());
10108 return (
Imm >= DppCtrl::QUAD_PERM_FIRST &&
Imm <= DppCtrl::QUAD_PERM_LAST) ||
10109 (
Imm >= DppCtrl::ROW_SHL_FIRST &&
Imm <= DppCtrl::ROW_SHL_LAST) ||
10110 (
Imm >= DppCtrl::ROW_SHR_FIRST &&
Imm <= DppCtrl::ROW_SHR_LAST) ||
10111 (
Imm >= DppCtrl::ROW_ROR_FIRST &&
Imm <= DppCtrl::ROW_ROR_LAST) ||
10112 (
Imm == DppCtrl::WAVE_SHL1) ||
10113 (
Imm == DppCtrl::WAVE_ROL1) ||
10114 (
Imm == DppCtrl::WAVE_SHR1) ||
10115 (
Imm == DppCtrl::WAVE_ROR1) ||
10116 (
Imm == DppCtrl::ROW_MIRROR) ||
10117 (
Imm == DppCtrl::ROW_HALF_MIRROR) ||
10118 (
Imm == DppCtrl::BCAST15) ||
10119 (
Imm == DppCtrl::BCAST31) ||
10120 (
Imm >= DppCtrl::ROW_SHARE_FIRST &&
Imm <= DppCtrl::ROW_SHARE_LAST) ||
10121 (
Imm >= DppCtrl::ROW_XMASK_FIRST &&
Imm <= DppCtrl::ROW_XMASK_LAST);
10130bool AMDGPUOperand::isBLGP()
const {
10134bool AMDGPUOperand::isS16Imm()
const {
10138bool AMDGPUOperand::isU16Imm()
const {
10146bool AMDGPUAsmParser::parseDimId(
unsigned &Encoding) {
10151 SMLoc Loc =
getToken().getEndLoc();
10152 Token = std::string(getTokenStr());
10154 if (getLoc() != Loc)
10159 if (!parseId(Suffix))
10163 StringRef DimId = Token;
10174ParseStatus AMDGPUAsmParser::parseDim(
OperandVector &Operands) {
10178 SMLoc S = getLoc();
10184 SMLoc Loc = getLoc();
10185 if (!parseDimId(Encoding))
10186 return Error(Loc,
"invalid dim value");
10188 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Encoding, S,
10189 AMDGPUOperand::ImmTyDim));
10197ParseStatus AMDGPUAsmParser::parseDPP8(
OperandVector &Operands) {
10198 SMLoc S = getLoc();
10207 if (!skipToken(
AsmToken::LBrac,
"expected an opening square bracket"))
10210 for (
size_t i = 0; i < 8; ++i) {
10214 SMLoc Loc = getLoc();
10215 if (getParser().parseAbsoluteExpression(Sels[i]))
10217 if (0 > Sels[i] || 7 < Sels[i])
10218 return Error(Loc,
"expected a 3-bit value");
10221 if (!skipToken(
AsmToken::RBrac,
"expected a closing square bracket"))
10225 for (
size_t i = 0; i < 8; ++i)
10226 DPP8 |= (Sels[i] << (i * 3));
10228 Operands.
push_back(AMDGPUOperand::CreateImm(
this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
10233AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
10235 if (Ctrl ==
"row_newbcast")
10238 if (Ctrl ==
"row_share" ||
10239 Ctrl ==
"row_xmask")
10242 if (Ctrl ==
"wave_shl" ||
10243 Ctrl ==
"wave_shr" ||
10244 Ctrl ==
"wave_rol" ||
10245 Ctrl ==
"wave_ror" ||
10246 Ctrl ==
"row_bcast")
10249 return Ctrl ==
"row_mirror" ||
10250 Ctrl ==
"row_half_mirror" ||
10251 Ctrl ==
"quad_perm" ||
10252 Ctrl ==
"row_shl" ||
10253 Ctrl ==
"row_shr" ||
10258AMDGPUAsmParser::parseDPPCtrlPerm() {
10261 if (!skipToken(
AsmToken::LBrac,
"expected an opening square bracket"))
10265 for (
int i = 0; i < 4; ++i) {
10270 SMLoc Loc = getLoc();
10271 if (getParser().parseAbsoluteExpression(Temp))
10273 if (Temp < 0 || Temp > 3) {
10274 Error(Loc,
"expected a 2-bit value");
10278 Val += (Temp << i * 2);
10281 if (!skipToken(
AsmToken::RBrac,
"expected a closing square bracket"))
10288AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
10289 using namespace AMDGPU::DPP;
10294 SMLoc Loc = getLoc();
10296 if (getParser().parseAbsoluteExpression(Val))
10299 struct DppCtrlCheck {
10305 DppCtrlCheck
Check = StringSwitch<DppCtrlCheck>(Ctrl)
10306 .Case(
"wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
10307 .Case(
"wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
10308 .Case(
"wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
10309 .Case(
"wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
10310 .Case(
"row_shl", {DppCtrl::ROW_SHL0, 1, 15})
10311 .Case(
"row_shr", {DppCtrl::ROW_SHR0, 1, 15})
10312 .Case(
"row_ror", {DppCtrl::ROW_ROR0, 1, 15})
10313 .Case(
"row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
10314 .Case(
"row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
10315 .Case(
"row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
10319 if (
Check.Ctrl == -1) {
10320 Valid = (
Ctrl ==
"row_bcast" && (Val == 15 || Val == 31));
10328 Error(Loc, Twine(
"invalid ", Ctrl) + Twine(
" value"));
10335ParseStatus AMDGPUAsmParser::parseDPPCtrl(
OperandVector &Operands) {
10336 using namespace AMDGPU::DPP;
10339 !isSupportedDPPCtrl(getTokenStr(), Operands))
10342 SMLoc S = getLoc();
10348 if (Ctrl ==
"row_mirror") {
10349 Val = DppCtrl::ROW_MIRROR;
10350 }
else if (Ctrl ==
"row_half_mirror") {
10351 Val = DppCtrl::ROW_HALF_MIRROR;
10354 if (Ctrl ==
"quad_perm") {
10355 Val = parseDPPCtrlPerm();
10357 Val = parseDPPCtrlSel(Ctrl);
10366 AMDGPUOperand::CreateImm(
this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
10370void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst,
const OperandVector &Operands,
10372 OptionalImmIndexMap OptionalIdx;
10379 int OldIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::old);
10381 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2_modifiers);
10382 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
10386 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
10387 ((AMDGPUOperand &)*Operands[
I++]).addRegOperands(Inst, 1);
10391 int VdstInIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
10392 bool IsVOP3CvtSrDpp =
Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
10393 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx13 ||
10394 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
10395 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx13 ||
10396 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
10397 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx13 ||
10398 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
10399 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx13;
10401 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
10405 if (OldIdx == NumOperands) {
10407 constexpr int DST_IDX = 0;
10409 }
else if (Src2ModIdx == NumOperands) {
10419 if (IsVOP3CvtSrDpp) {
10428 if (TiedTo != -1) {
10433 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[
I]);
10435 if (IsDPP8 &&
Op.isDppFI()) {
10438 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
10439 }
else if (
Op.isReg()) {
10440 Op.addRegOperands(Inst, 1);
10441 }
else if (
Op.isImm() &&
10443 Op.addImmOperands(Inst, 1);
10444 }
else if (
Op.isImm()) {
10445 OptionalIdx[
Op.getImmTy()] =
I;
10453 AMDGPUOperand::ImmTyClamp);
10459 AMDGPUOperand::ImmTyByteSel);
10466 cvtVOP3P(Inst, Operands, OptionalIdx);
10468 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
10475 using namespace llvm::AMDGPU::DPP;
10485 AMDGPUOperand::ImmTyDppFI);
10489void AMDGPUAsmParser::cvtDPP(MCInst &Inst,
const OperandVector &Operands,
bool IsDPP8) {
10490 OptionalImmIndexMap OptionalIdx;
10494 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
10495 ((AMDGPUOperand &)*Operands[
I++]).addRegOperands(Inst, 1);
10499 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
10502 if (TiedTo != -1) {
10507 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[
I]);
10509 if (
Op.isReg() && validateVccOperand(
Op.getReg())) {
10517 Op.addImmOperands(Inst, 1);
10519 Op.addRegWithFPInputModsOperands(Inst, 2);
10520 }
else if (
Op.isDppFI()) {
10522 }
else if (
Op.isReg()) {
10523 Op.addRegOperands(Inst, 1);
10529 Op.addRegWithFPInputModsOperands(Inst, 2);
10530 }
else if (
Op.isReg()) {
10531 Op.addRegOperands(Inst, 1);
10532 }
else if (
Op.isDPPCtrl()) {
10533 Op.addImmOperands(Inst, 1);
10534 }
else if (
Op.isImm()) {
10536 OptionalIdx[
Op.getImmTy()] =
I;
10544 using namespace llvm::AMDGPU::DPP;
10552 AMDGPUOperand::ImmTyDppFI);
10561ParseStatus AMDGPUAsmParser::parseSDWASel(
OperandVector &Operands,
10563 AMDGPUOperand::ImmTy
Type) {
10564 return parseStringOrIntWithPrefix(
10566 {
"BYTE_0",
"BYTE_1",
"BYTE_2",
"BYTE_3",
"WORD_0",
"WORD_1",
"DWORD"},
10570ParseStatus AMDGPUAsmParser::parseSDWADstUnused(
OperandVector &Operands) {
10571 return parseStringOrIntWithPrefix(
10572 Operands,
"dst_unused", {
"UNUSED_PAD",
"UNUSED_SEXT",
"UNUSED_PRESERVE"},
10573 AMDGPUOperand::ImmTySDWADstUnused);
10576void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst,
const OperandVector &Operands) {
10577 cvtSDWA(Inst, Operands, SDWAInstType::VOP1);
10580void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst,
const OperandVector &Operands) {
10581 cvtSDWA(Inst, Operands, SDWAInstType::VOP2);
10584void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst,
const OperandVector &Operands) {
10585 cvtSDWA(Inst, Operands, SDWAInstType::VOP2,
true,
true);
10588void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst,
const OperandVector &Operands) {
10589 cvtSDWA(Inst, Operands, SDWAInstType::VOP2,
false,
true);
10592void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst,
const OperandVector &Operands) {
10593 cvtSDWA(Inst, Operands, SDWAInstType::VOPC,
isVI());
10596void AMDGPUAsmParser::cvtSDWA(MCInst &Inst,
const OperandVector &Operands,
10597 SDWAInstType BasicInstType,
bool SkipDstVcc,
10599 using namespace llvm::AMDGPU::SDWA;
10601 OptionalImmIndexMap OptionalIdx;
10602 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
10603 bool SkippedVcc =
false;
10607 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
10608 ((AMDGPUOperand &)*Operands[
I++]).addRegOperands(Inst, 1);
10611 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
10612 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[
I]);
10613 if (SkipVcc && !SkippedVcc &&
Op.isReg() &&
10614 (
Op.getReg() == AMDGPU::VCC ||
Op.getReg() == AMDGPU::VCC_LO)) {
10620 if (BasicInstType == SDWAInstType::VOP2 &&
10626 if (BasicInstType == SDWAInstType::VOPC && Inst.
getNumOperands() == 0) {
10632 Op.addRegOrImmWithInputModsOperands(Inst, 2);
10633 }
else if (
Op.isImm()) {
10635 OptionalIdx[
Op.getImmTy()] =
I;
10639 SkippedVcc =
false;
10643 if (
Opc != AMDGPU::V_NOP_sdwa_gfx10 &&
Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
10644 Opc != AMDGPU::V_NOP_sdwa_vi) {
10646 switch (BasicInstType) {
10647 case SDWAInstType::VOP1:
10650 AMDGPUOperand::ImmTyClamp, 0);
10654 AMDGPUOperand::ImmTyOModSI, 0);
10658 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10662 AMDGPUOperand::ImmTySDWADstUnused,
10663 DstUnused::UNUSED_PRESERVE);
10665 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10668 case SDWAInstType::VOP2:
10670 AMDGPUOperand::ImmTyClamp, 0);
10675 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10676 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
10677 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10678 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10681 case SDWAInstType::VOPC:
10684 AMDGPUOperand::ImmTyClamp, 0);
10685 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10686 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10693 if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
10694 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
10695 auto *it = Inst.
begin();
10697 it, AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::src2));
10709#define GET_MATCHER_IMPLEMENTATION
10710#define GET_MNEMONIC_SPELL_CHECKER
10711#define GET_MNEMONIC_CHECKER
10712#include "AMDGPUGenAsmMatcher.inc"
10718 return parseTokenOp(
"addr64",
Operands);
10720 return parseNamedBit(
"done",
Operands, AMDGPUOperand::ImmTyDone,
true);
10722 return parseTokenOp(
"idxen",
Operands);
10724 return parseNamedBit(
"lds",
Operands, AMDGPUOperand::ImmTyLDS,
10727 return parseTokenOp(
"offen",
Operands);
10729 return parseTokenOp(
"off",
Operands);
10730 case MCK_row_95_en:
10731 return parseNamedBit(
"row_en",
Operands, AMDGPUOperand::ImmTyRowEn,
true);
10733 return parseNamedBit(
"gds",
Operands, AMDGPUOperand::ImmTyGDS);
10735 return parseNamedBit(
"tfe",
Operands, AMDGPUOperand::ImmTyTFE);
10737 return tryCustomParseOperand(
Operands, MCK);
10742unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &
Op,
10748 AMDGPUOperand &Operand = (AMDGPUOperand&)
Op;
10751 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
10753 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
10755 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
10757 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
10759 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
10761 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
10763 return Operand.isDone() ? Match_Success : Match_InvalidOperand;
10764 case MCK_row_95_en:
10765 return Operand.isRowEn() ? Match_Success : Match_InvalidOperand;
10773 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
10775 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
10776 case MCK_SOPPBrTarget:
10777 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
10778 case MCK_VReg32OrOff:
10779 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
10780 case MCK_InterpSlot:
10781 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
10782 case MCK_InterpAttr:
10783 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
10784 case MCK_InterpAttrChan:
10785 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
10787 case MCK_SReg_64_XEXEC:
10797 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
10799 return Match_InvalidOperand;
10807ParseStatus AMDGPUAsmParser::parseEndpgm(
OperandVector &Operands) {
10808 SMLoc S = getLoc();
10817 return Error(S,
"expected a 16-bit value");
10820 AMDGPUOperand::CreateImm(
this,
Imm, S, AMDGPUOperand::ImmTyEndpgm));
10824bool AMDGPUOperand::isEndpgm()
const {
return isImmTy(ImmTyEndpgm); }
10830bool AMDGPUOperand::isSplitBarrier()
const {
return isInlinableImm(MVT::i32); }
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
SmallVector< int16_t, MAX_SRC_OPERANDS_NUM > OperandIndices
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, AMDGPU::OpName OpName)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name, const MCRegisterInfo *MRI)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static bool IsRevOpcode(const unsigned Opcode)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static MCRegister getSpecialRegForName(StringRef RegName)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0, std::optional< unsigned > InsertAt=std::nullopt)
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
Enums shared between the AMDGPU backend (LLVM) and the ELF linker (LLD) for the .amdgpu....
AMDHSA kernel descriptor definitions.
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_EXTERNAL_VISIBILITY
static llvm::Expected< InlineInfo > decode(GsymDataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
static Value * getOpcode(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Loop::LoopBounds::Direction Direction
Register const TargetRegisterInfo * TRI
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file implements the SmallBitVector class.
StringSet - A set-like wrapper for the StringMap.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
static const char * getRegisterName(MCRegister Reg)
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static unsigned getNumExpectedArgs(VariantKind Kind)
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
static const fltSemantics & IEEEsingle()
static const fltSemantics & BFloat()
static const fltSemantics & IEEEdouble()
static constexpr roundingMode rmNearestTiesToEven
static const fltSemantics & IEEEhalf()
opStatus
IEEE-754R 7: Default exception handling.
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
bool is(TokenKind K) const
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
void printExpr(raw_ostream &, const MCExpr &) const
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Context object for machine code objects.
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Instances of this class represent a single low-level machine instruction.
unsigned getNumOperands() const
unsigned getOpcode() const
iterator insert(iterator I, const MCOperand &Op)
void addOperand(const MCOperand Op)
const MCOperand & getOperand(unsigned i) const
Describe properties that are true of each instruction in the target description file.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
int16_t getOpRegClassID(const MCOperandInfo &OpInfo, unsigned HwModeId) const
Return the ID of the register class to use for OpInfo, for the active HwMode HwModeId.
Instances of this class represent operands of the MCInst class.
static MCOperand createExpr(const MCExpr *Val)
static MCOperand createReg(MCRegister Reg)
static MCOperand createImm(int64_t Val)
void setReg(MCRegister Reg)
Set the register number.
MCRegister getReg() const
Returns the register number.
const MCExpr * getExpr() const
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
bool regsOverlap(MCRegister RegA, MCRegister RegB) const
Returns true if the two registers are equal or alias each other.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Wrapper class representing physical registers. Should be passed by value.
constexpr bool isValid() const
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
Generic base class for all target subtargets.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
bool isVariable() const
isVariable - Check if this is a variable symbol.
LLVM_ABI void setVariableValue(const MCExpr *Value)
void setRedefinable(bool Value)
Mark this symbol as redefinable.
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
MCTargetAsmParser - Generic interface to target specific assembly parsers.
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
constexpr unsigned id() const
Represents a location in source code.
static SMLoc getFromPointer(const char *Ptr)
constexpr const char * getPointer() const
constexpr bool isValid() const
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Represent a constant reference to a string, i.e.
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
constexpr size_t size() const
Get the string size.
constexpr const char * data() const
Get a pointer to the start of the string (which may not be null terminated).
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
bool consume_front(char Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
bool contains(StringRef key) const
Check if the set contains the given key.
std::pair< typename Base::iterator, bool > insert(StringRef key)
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
std::pair< iterator, bool > insert(const ValueT &V)
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
static constexpr CustomOperand Operands[]
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
unsigned getAddressableNumSGPRs(const MCSubtargetInfo &STI)
unsigned getVGPREncodingGranule(const MCSubtargetInfo &STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo &STI)
unsigned getLocalMemorySize(const MCSubtargetInfo &STI)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
ArrayRef< GFXVersion > getGFXVersions()
constexpr unsigned COMPONENTS[]
constexpr const char *const ModMatrixFmt[]
constexpr const char *const ModMatrixScaleFmt[]
constexpr const char *const ModMatrixScale[]
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
bool isInlineValue(MCRegister Reg)
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
FuncInfoFlags
Per-function flags packed into INFO_FLAGS entries.
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
unsigned getTemporalHintType(const MCInstrDesc TID)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY bool isLitExpr(const MCExpr *Expr)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX13(const MCSubtargetInfo &STI)
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool hasMAIInsts(const MCSubtargetInfo &STI)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX13Plus(const MCSubtargetInfo &STI)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
LLVM_READONLY int64_t getLitValue(const MCExpr *Expr)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit)
bool isValidWMMAScaleFmtCombination(unsigned AFmt, unsigned AScale, unsigned BFmt, unsigned BScale)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT64
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
@ OPERAND_REG_IMM_V2FP16_SPLAT
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool isGFX1250(const MCSubtargetInfo &STI)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isGFX1250Plus(const MCSubtargetInfo &STI)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ UNDEF
UNDEF - An undefined node.
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
constexpr bool hasIntClamp(const T &...O)
@ Valid
The data is already valid.
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
Context & getContext() const
This is an optimization pass for GlobalISel generic memory operations.
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
FunctionAddr VTableAddr Value
StringMapEntry< Value * > ValueName
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
static bool isMem(const MachineInstr &MI, unsigned Op)
LLVM_ABI std::pair< StringRef, StringRef > getToken(StringRef Source, StringRef Delimiters=" \t\n\v\f\r")
getToken - This function extracts one token from source, ignoring any leading characters that appear ...
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
LLVM_ABI void PrintError(const Twine &Msg)
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
FunctionAddr VTableAddr uintptr_t uintptr_t DataSize
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Target & getTheR600Target()
The target for R600 GPUs.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
SmallVectorImpl< std::unique_ptr< MCParsedAsmOperand > > OperandVector
FunctionAddr VTableAddr uintptr_t uintptr_t Version
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Sub
Subtraction of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
@ Enabled
Convert any .debug_str_offsets tables to DWARF64 if needed.
@ Default
The result value is uniform if and only if all operands are uniform.
void initDefault(const MCSubtargetInfo &STI, MCContext &Ctx, bool InitMCExpr=true)
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
uint32_t PrivateSegmentSize
SmallVector< std::pair< MCSymbol *, std::string >, 4 > IndirectCalls
SmallVector< std::pair< MCSymbol *, MCSymbol * >, 8 > Calls
SmallVector< FuncInfo, 8 > Funcs
SmallVector< std::pair< MCSymbol *, std::string >, 4 > TypeIds
SmallVector< std::pair< MCSymbol *, MCSymbol * >, 4 > Uses
Instruction set architecture version.
const MCExpr * compute_pgm_rsrc2
const MCExpr * kernarg_size
const MCExpr * kernarg_preload
const MCExpr * compute_pgm_rsrc3
const MCExpr * private_segment_fixed_size
const MCExpr * compute_pgm_rsrc1
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
const MCExpr * group_segment_fixed_size
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
const MCExpr * kernel_code_properties
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...
uint32_t group_segment_fixed_size
uint32_t private_segment_fixed_size