44 class AMDGPUAsmParser;
46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
60 SMLoc StartLoc, EndLoc;
61 const AMDGPUAsmParser *AsmParser;
64 AMDGPUOperand(KindTy Kind_,
const AMDGPUAsmParser *AsmParser_)
67 using Ptr = std::unique_ptr<AMDGPUOperand>;
74 bool hasFPModifiers()
const {
return Abs || Neg; }
75 bool hasIntModifiers()
const {
return Sext; }
76 bool hasModifiers()
const {
return hasFPModifiers() || hasIntModifiers(); }
78 int64_t getFPModifiersOperand()
const {
85 int64_t getIntModifiersOperand()
const {
91 int64_t getModifiersOperand()
const {
92 assert(!(hasFPModifiers() && hasIntModifiers())
93 &&
"fp and int modifiers should not be used simultaneously");
94 if (hasFPModifiers()) {
95 return getFPModifiersOperand();
96 }
else if (hasIntModifiers()) {
97 return getIntModifiersOperand();
179 mutable ImmKindTy
Kind;
196 bool isToken()
const override {
204 return isSymbolRefExpr();
207 bool isSymbolRefExpr()
const {
208 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
211 bool isImm()
const override {
212 return Kind == Immediate;
215 void setImmKindNone()
const {
217 Imm.Kind = ImmKindTyNone;
220 void setImmKindLiteral()
const {
222 Imm.Kind = ImmKindTyLiteral;
225 void setImmKindConst()
const {
227 Imm.Kind = ImmKindTyConst;
230 bool IsImmKindLiteral()
const {
231 return isImm() && Imm.Kind == ImmKindTyLiteral;
234 bool isImmKindConst()
const {
235 return isImm() && Imm.Kind == ImmKindTyConst;
238 bool isInlinableImm(
MVT type)
const;
239 bool isLiteralImm(
MVT type)
const;
241 bool isRegKind()
const {
245 bool isReg()
const override {
246 return isRegKind() && !hasModifiers();
250 return isRegClass(RCID) || isInlinableImm(
type) || isLiteralImm(
type);
253 bool isRegOrImmWithInt16InputMods()
const {
257 bool isRegOrImmWithInt32InputMods()
const {
261 bool isRegOrImmWithInt64InputMods()
const {
265 bool isRegOrImmWithFP16InputMods()
const {
269 bool isRegOrImmWithFP32InputMods()
const {
273 bool isRegOrImmWithFP64InputMods()
const {
277 bool isVReg()
const {
278 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
279 isRegClass(AMDGPU::VReg_64RegClassID) ||
280 isRegClass(AMDGPU::VReg_96RegClassID) ||
281 isRegClass(AMDGPU::VReg_128RegClassID) ||
282 isRegClass(AMDGPU::VReg_160RegClassID) ||
283 isRegClass(AMDGPU::VReg_192RegClassID) ||
284 isRegClass(AMDGPU::VReg_256RegClassID) ||
285 isRegClass(AMDGPU::VReg_512RegClassID) ||
286 isRegClass(AMDGPU::VReg_1024RegClassID);
289 bool isVReg32()
const {
290 return isRegClass(AMDGPU::VGPR_32RegClassID);
293 bool isVReg32OrOff()
const {
294 return isOff() || isVReg32();
297 bool isNull()
const {
298 return isRegKind() &&
getReg() == AMDGPU::SGPR_NULL;
301 bool isVRegWithInputMods()
const;
303 bool isSDWAOperand(
MVT type)
const;
304 bool isSDWAFP16Operand()
const;
305 bool isSDWAFP32Operand()
const;
306 bool isSDWAInt16Operand()
const;
307 bool isSDWAInt32Operand()
const;
309 bool isImmTy(ImmTy ImmT)
const {
310 return isImm() && Imm.Type == ImmT;
313 bool isImmModifier()
const {
314 return isImm() && Imm.Type != ImmTyNone;
317 bool isClampSI()
const {
return isImmTy(ImmTyClampSI); }
318 bool isOModSI()
const {
return isImmTy(ImmTyOModSI); }
319 bool isDMask()
const {
return isImmTy(ImmTyDMask); }
320 bool isDim()
const {
return isImmTy(ImmTyDim); }
321 bool isUNorm()
const {
return isImmTy(ImmTyUNorm); }
322 bool isDA()
const {
return isImmTy(ImmTyDA); }
323 bool isR128A16()
const {
return isImmTy(ImmTyR128A16); }
324 bool isGFX10A16()
const {
return isImmTy(ImmTyA16); }
325 bool isLWE()
const {
return isImmTy(ImmTyLWE); }
326 bool isOff()
const {
return isImmTy(ImmTyOff); }
327 bool isExpTgt()
const {
return isImmTy(ImmTyExpTgt); }
328 bool isExpVM()
const {
return isImmTy(ImmTyExpVM); }
329 bool isExpCompr()
const {
return isImmTy(ImmTyExpCompr); }
330 bool isOffen()
const {
return isImmTy(ImmTyOffen); }
331 bool isIdxen()
const {
return isImmTy(ImmTyIdxen); }
332 bool isAddr64()
const {
return isImmTy(ImmTyAddr64); }
333 bool isOffset()
const {
return isImmTy(ImmTyOffset) &&
isUInt<16>(getImm()); }
334 bool isOffset0()
const {
return isImmTy(ImmTyOffset0) &&
isUInt<8>(getImm()); }
335 bool isOffset1()
const {
return isImmTy(ImmTyOffset1) &&
isUInt<8>(getImm()); }
337 bool isFlatOffset()
const {
return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
338 bool isGDS()
const {
return isImmTy(ImmTyGDS); }
339 bool isLDS()
const {
return isImmTy(ImmTyLDS); }
340 bool isCPol()
const {
return isImmTy(ImmTyCPol); }
341 bool isSWZ()
const {
return isImmTy(ImmTySWZ); }
342 bool isTFE()
const {
return isImmTy(ImmTyTFE); }
343 bool isD16()
const {
return isImmTy(ImmTyD16); }
344 bool isFORMAT()
const {
return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
345 bool isBankMask()
const {
return isImmTy(ImmTyDppBankMask); }
346 bool isRowMask()
const {
return isImmTy(ImmTyDppRowMask); }
347 bool isBoundCtrl()
const {
return isImmTy(ImmTyDppBoundCtrl); }
348 bool isFI()
const {
return isImmTy(ImmTyDppFi); }
349 bool isSDWADstSel()
const {
return isImmTy(ImmTySdwaDstSel); }
350 bool isSDWASrc0Sel()
const {
return isImmTy(ImmTySdwaSrc0Sel); }
351 bool isSDWASrc1Sel()
const {
return isImmTy(ImmTySdwaSrc1Sel); }
352 bool isSDWADstUnused()
const {
return isImmTy(ImmTySdwaDstUnused); }
353 bool isInterpSlot()
const {
return isImmTy(ImmTyInterpSlot); }
354 bool isInterpAttr()
const {
return isImmTy(ImmTyInterpAttr); }
355 bool isAttrChan()
const {
return isImmTy(ImmTyAttrChan); }
356 bool isOpSel()
const {
return isImmTy(ImmTyOpSel); }
357 bool isOpSelHi()
const {
return isImmTy(ImmTyOpSelHi); }
358 bool isNegLo()
const {
return isImmTy(ImmTyNegLo); }
359 bool isNegHi()
const {
return isImmTy(ImmTyNegHi); }
360 bool isHigh()
const {
return isImmTy(ImmTyHigh); }
363 return isClampSI() || isOModSI();
366 bool isRegOrImm()
const {
367 return isReg() || isImm();
370 bool isRegClass(
unsigned RCID)
const;
374 bool isRegOrInlineNoMods(
unsigned RCID,
MVT type)
const {
375 return (isRegClass(RCID) || isInlinableImm(
type)) && !hasModifiers();
378 bool isSCSrcB16()
const {
379 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID,
MVT::i16);
382 bool isSCSrcV2B16()
const {
386 bool isSCSrcB32()
const {
387 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID,
MVT::i32);
390 bool isSCSrcB64()
const {
391 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID,
MVT::i64);
394 bool isBoolReg()
const;
396 bool isSCSrcF16()
const {
397 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID,
MVT::f16);
400 bool isSCSrcV2F16()
const {
404 bool isSCSrcF32()
const {
405 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID,
MVT::f32);
408 bool isSCSrcF64()
const {
409 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID,
MVT::f64);
412 bool isSSrcB32()
const {
413 return isSCSrcB32() || isLiteralImm(
MVT::i32) || isExpr();
416 bool isSSrcB16()
const {
417 return isSCSrcB16() || isLiteralImm(
MVT::i16);
420 bool isSSrcV2B16()
const {
425 bool isSSrcB64()
const {
428 return isSCSrcB64() || isLiteralImm(
MVT::i64);
431 bool isSSrcF32()
const {
432 return isSCSrcB32() || isLiteralImm(
MVT::f32) || isExpr();
435 bool isSSrcF64()
const {
436 return isSCSrcB64() || isLiteralImm(
MVT::f64);
439 bool isSSrcF16()
const {
440 return isSCSrcB16() || isLiteralImm(
MVT::f16);
443 bool isSSrcV2F16()
const {
448 bool isSSrcV2FP32()
const {
453 bool isSCSrcV2FP32()
const {
458 bool isSSrcV2INT32()
const {
463 bool isSCSrcV2INT32()
const {
468 bool isSSrcOrLdsB32()
const {
469 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID,
MVT::i32) ||
473 bool isVCSrcB32()
const {
474 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID,
MVT::i32);
477 bool isVCSrcB64()
const {
478 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID,
MVT::i64);
481 bool isVCSrcB16()
const {
482 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID,
MVT::i16);
485 bool isVCSrcV2B16()
const {
489 bool isVCSrcF32()
const {
490 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID,
MVT::f32);
493 bool isVCSrcF64()
const {
494 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID,
MVT::f64);
497 bool isVCSrcF16()
const {
498 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID,
MVT::f16);
501 bool isVCSrcV2F16()
const {
505 bool isVSrcB32()
const {
506 return isVCSrcF32() || isLiteralImm(
MVT::i32) || isExpr();
509 bool isVSrcB64()
const {
510 return isVCSrcF64() || isLiteralImm(
MVT::i64);
513 bool isVSrcB16()
const {
514 return isVCSrcB16() || isLiteralImm(
MVT::i16);
517 bool isVSrcV2B16()
const {
518 return isVSrcB16() || isLiteralImm(
MVT::v2i16);
521 bool isVCSrcV2FP32()
const {
525 bool isVSrcV2FP32()
const {
526 return isVSrcF64() || isLiteralImm(
MVT::v2f32);
529 bool isVCSrcV2INT32()
const {
533 bool isVSrcV2INT32()
const {
534 return isVSrcB64() || isLiteralImm(
MVT::v2i32);
537 bool isVSrcF32()
const {
538 return isVCSrcF32() || isLiteralImm(
MVT::f32) || isExpr();
541 bool isVSrcF64()
const {
542 return isVCSrcF64() || isLiteralImm(
MVT::f64);
545 bool isVSrcF16()
const {
546 return isVCSrcF16() || isLiteralImm(
MVT::f16);
549 bool isVSrcV2F16()
const {
550 return isVSrcF16() || isLiteralImm(
MVT::v2f16);
553 bool isVISrcB32()
const {
554 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID,
MVT::i32);
557 bool isVISrcB16()
const {
558 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID,
MVT::i16);
561 bool isVISrcV2B16()
const {
565 bool isVISrcF32()
const {
566 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID,
MVT::f32);
569 bool isVISrcF16()
const {
570 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID,
MVT::f16);
573 bool isVISrcV2F16()
const {
574 return isVISrcF16() || isVISrcB32();
577 bool isVISrc_64B64()
const {
578 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID,
MVT::i64);
581 bool isVISrc_64F64()
const {
582 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID,
MVT::f64);
585 bool isVISrc_64V2FP32()
const {
586 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID,
MVT::f32);
589 bool isVISrc_64V2INT32()
const {
590 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID,
MVT::i32);
593 bool isVISrc_256B64()
const {
594 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID,
MVT::i64);
597 bool isVISrc_256F64()
const {
598 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID,
MVT::f64);
601 bool isVISrc_128B16()
const {
602 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID,
MVT::i16);
605 bool isVISrc_128V2B16()
const {
606 return isVISrc_128B16();
609 bool isVISrc_128B32()
const {
610 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID,
MVT::i32);
613 bool isVISrc_128F32()
const {
614 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID,
MVT::f32);
617 bool isVISrc_256V2FP32()
const {
618 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID,
MVT::f32);
621 bool isVISrc_256V2INT32()
const {
622 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID,
MVT::i32);
625 bool isVISrc_512B32()
const {
626 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID,
MVT::i32);
629 bool isVISrc_512B16()
const {
630 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID,
MVT::i16);
633 bool isVISrc_512V2B16()
const {
634 return isVISrc_512B16();
637 bool isVISrc_512F32()
const {
638 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID,
MVT::f32);
641 bool isVISrc_512F16()
const {
642 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID,
MVT::f16);
645 bool isVISrc_512V2F16()
const {
646 return isVISrc_512F16() || isVISrc_512B32();
649 bool isVISrc_1024B32()
const {
650 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID,
MVT::i32);
653 bool isVISrc_1024B16()
const {
654 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID,
MVT::i16);
657 bool isVISrc_1024V2B16()
const {
658 return isVISrc_1024B16();
661 bool isVISrc_1024F32()
const {
662 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID,
MVT::f32);
665 bool isVISrc_1024F16()
const {
666 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID,
MVT::f16);
669 bool isVISrc_1024V2F16()
const {
670 return isVISrc_1024F16() || isVISrc_1024B32();
673 bool isAISrcB32()
const {
674 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID,
MVT::i32);
677 bool isAISrcB16()
const {
678 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID,
MVT::i16);
681 bool isAISrcV2B16()
const {
685 bool isAISrcF32()
const {
686 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID,
MVT::f32);
689 bool isAISrcF16()
const {
690 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID,
MVT::f16);
693 bool isAISrcV2F16()
const {
694 return isAISrcF16() || isAISrcB32();
697 bool isAISrc_64B64()
const {
698 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID,
MVT::i64);
701 bool isAISrc_64F64()
const {
702 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID,
MVT::f64);
705 bool isAISrc_128B32()
const {
706 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID,
MVT::i32);
709 bool isAISrc_128B16()
const {
710 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID,
MVT::i16);
713 bool isAISrc_128V2B16()
const {
714 return isAISrc_128B16();
717 bool isAISrc_128F32()
const {
718 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID,
MVT::f32);
721 bool isAISrc_128F16()
const {
722 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID,
MVT::f16);
725 bool isAISrc_128V2F16()
const {
726 return isAISrc_128F16() || isAISrc_128B32();
729 bool isVISrc_128F16()
const {
730 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID,
MVT::f16);
733 bool isVISrc_128V2F16()
const {
734 return isVISrc_128F16() || isVISrc_128B32();
737 bool isAISrc_256B64()
const {
738 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID,
MVT::i64);
741 bool isAISrc_256F64()
const {
742 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID,
MVT::f64);
745 bool isAISrc_512B32()
const {
746 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID,
MVT::i32);
749 bool isAISrc_512B16()
const {
750 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID,
MVT::i16);
753 bool isAISrc_512V2B16()
const {
754 return isAISrc_512B16();
757 bool isAISrc_512F32()
const {
758 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID,
MVT::f32);
761 bool isAISrc_512F16()
const {
762 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID,
MVT::f16);
765 bool isAISrc_512V2F16()
const {
766 return isAISrc_512F16() || isAISrc_512B32();
769 bool isAISrc_1024B32()
const {
770 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID,
MVT::i32);
773 bool isAISrc_1024B16()
const {
774 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID,
MVT::i16);
777 bool isAISrc_1024V2B16()
const {
778 return isAISrc_1024B16();
781 bool isAISrc_1024F32()
const {
782 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID,
MVT::f32);
785 bool isAISrc_1024F16()
const {
786 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID,
MVT::f16);
789 bool isAISrc_1024V2F16()
const {
790 return isAISrc_1024F16() || isAISrc_1024B32();
793 bool isKImmFP32()
const {
797 bool isKImmFP16()
const {
801 bool isMem()
const override {
805 bool isExpr()
const {
809 bool isSoppBrTarget()
const {
810 return isExpr() || isImm();
813 bool isSWaitCnt()
const;
814 bool isHwreg()
const;
815 bool isSendMsg()
const;
816 bool isSwizzle()
const;
817 bool isSMRDOffset8()
const;
818 bool isSMEMOffset()
const;
819 bool isSMRDLiteralOffset()
const;
821 bool isDPPCtrl()
const;
825 bool isGPRIdxMode()
const;
826 bool isS16Imm()
const;
827 bool isU16Imm()
const;
828 bool isEndpgm()
const;
833 return S->getSymbol().getName();
840 return getExpressionAsToken();
845 int64_t getImm()
const {
850 void setImm(int64_t Val) {
855 ImmTy getImmTy()
const {
860 unsigned getReg()
const override {
865 SMLoc getStartLoc()
const override {
869 SMLoc getEndLoc()
const override {
874 return SMRange(StartLoc, EndLoc);
877 Modifiers getModifiers()
const {
878 assert(isRegKind() || isImmTy(ImmTyNone));
879 return isRegKind() ?
Reg.Mods : Imm.Mods;
882 void setModifiers(Modifiers Mods) {
883 assert(isRegKind() || isImmTy(ImmTyNone));
890 bool hasModifiers()
const {
891 return getModifiers().hasModifiers();
894 bool hasFPModifiers()
const {
895 return getModifiers().hasFPModifiers();
898 bool hasIntModifiers()
const {
899 return getModifiers().hasIntModifiers();
902 uint64_t applyInputFPModifiers(uint64_t Val,
unsigned Size)
const;
904 void addImmOperands(
MCInst &Inst,
unsigned N,
bool ApplyModifiers =
true)
const;
906 void addLiteralImmOperand(
MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const;
908 template <
unsigned Bitw
idth>
909 void addKImmFPOperands(
MCInst &Inst,
unsigned N)
const;
911 void addKImmFP16Operands(
MCInst &Inst,
unsigned N)
const {
912 addKImmFPOperands<16>(Inst,
N);
915 void addKImmFP32Operands(
MCInst &Inst,
unsigned N)
const {
916 addKImmFPOperands<32>(Inst,
N);
919 void addRegOperands(
MCInst &Inst,
unsigned N)
const;
921 void addBoolRegOperands(
MCInst &Inst,
unsigned N)
const {
922 addRegOperands(Inst,
N);
925 void addRegOrImmOperands(
MCInst &Inst,
unsigned N)
const {
927 addRegOperands(Inst,
N);
931 addImmOperands(Inst,
N);
934 void addRegOrImmWithInputModsOperands(
MCInst &Inst,
unsigned N)
const {
935 Modifiers Mods = getModifiers();
938 addRegOperands(Inst,
N);
940 addImmOperands(Inst,
N,
false);
944 void addRegOrImmWithFPInputModsOperands(
MCInst &Inst,
unsigned N)
const {
945 assert(!hasIntModifiers());
946 addRegOrImmWithInputModsOperands(Inst,
N);
949 void addRegOrImmWithIntInputModsOperands(
MCInst &Inst,
unsigned N)
const {
950 assert(!hasFPModifiers());
951 addRegOrImmWithInputModsOperands(Inst,
N);
954 void addRegWithInputModsOperands(
MCInst &Inst,
unsigned N)
const {
955 Modifiers Mods = getModifiers();
958 addRegOperands(Inst,
N);
961 void addRegWithFPInputModsOperands(
MCInst &Inst,
unsigned N)
const {
962 assert(!hasIntModifiers());
963 addRegWithInputModsOperands(Inst,
N);
966 void addRegWithIntInputModsOperands(
MCInst &Inst,
unsigned N)
const {
967 assert(!hasFPModifiers());
968 addRegWithInputModsOperands(Inst,
N);
971 void addSoppBrTargetOperands(
MCInst &Inst,
unsigned N)
const {
973 addImmOperands(Inst,
N);
982 case ImmTyNone: OS <<
"None";
break;
983 case ImmTyGDS: OS <<
"GDS";
break;
984 case ImmTyLDS: OS <<
"LDS";
break;
985 case ImmTyOffen: OS <<
"Offen";
break;
986 case ImmTyIdxen: OS <<
"Idxen";
break;
987 case ImmTyAddr64: OS <<
"Addr64";
break;
988 case ImmTyOffset: OS <<
"Offset";
break;
989 case ImmTyInstOffset: OS <<
"InstOffset";
break;
990 case ImmTyOffset0: OS <<
"Offset0";
break;
991 case ImmTyOffset1: OS <<
"Offset1";
break;
992 case ImmTyCPol: OS <<
"CPol";
break;
993 case ImmTySWZ: OS <<
"SWZ";
break;
994 case ImmTyTFE: OS <<
"TFE";
break;
995 case ImmTyD16: OS <<
"D16";
break;
996 case ImmTyFORMAT: OS <<
"FORMAT";
break;
997 case ImmTyClampSI: OS <<
"ClampSI";
break;
998 case ImmTyOModSI: OS <<
"OModSI";
break;
999 case ImmTyDPP8: OS <<
"DPP8";
break;
1000 case ImmTyDppCtrl: OS <<
"DppCtrl";
break;
1001 case ImmTyDppRowMask: OS <<
"DppRowMask";
break;
1002 case ImmTyDppBankMask: OS <<
"DppBankMask";
break;
1003 case ImmTyDppBoundCtrl: OS <<
"DppBoundCtrl";
break;
1004 case ImmTyDppFi: OS <<
"FI";
break;
1005 case ImmTySdwaDstSel: OS <<
"SdwaDstSel";
break;
1006 case ImmTySdwaSrc0Sel: OS <<
"SdwaSrc0Sel";
break;
1007 case ImmTySdwaSrc1Sel: OS <<
"SdwaSrc1Sel";
break;
1008 case ImmTySdwaDstUnused: OS <<
"SdwaDstUnused";
break;
1009 case ImmTyDMask: OS <<
"DMask";
break;
1010 case ImmTyDim: OS <<
"Dim";
break;
1011 case ImmTyUNorm: OS <<
"UNorm";
break;
1012 case ImmTyDA: OS <<
"DA";
break;
1013 case ImmTyR128A16: OS <<
"R128A16";
break;
1014 case ImmTyA16: OS <<
"A16";
break;
1015 case ImmTyLWE: OS <<
"LWE";
break;
1016 case ImmTyOff: OS <<
"Off";
break;
1017 case ImmTyExpTgt: OS <<
"ExpTgt";
break;
1018 case ImmTyExpCompr: OS <<
"ExpCompr";
break;
1019 case ImmTyExpVM: OS <<
"ExpVM";
break;
1020 case ImmTyHwreg: OS <<
"Hwreg";
break;
1021 case ImmTySendMsg: OS <<
"SendMsg";
break;
1022 case ImmTyInterpSlot: OS <<
"InterpSlot";
break;
1023 case ImmTyInterpAttr: OS <<
"InterpAttr";
break;
1024 case ImmTyAttrChan: OS <<
"AttrChan";
break;
1025 case ImmTyOpSel: OS <<
"OpSel";
break;
1026 case ImmTyOpSelHi: OS <<
"OpSelHi";
break;
1027 case ImmTyNegLo: OS <<
"NegLo";
break;
1028 case ImmTyNegHi: OS <<
"NegHi";
break;
1029 case ImmTySwizzle: OS <<
"Swizzle";
break;
1030 case ImmTyGprIdxMode: OS <<
"GprIdxMode";
break;
1031 case ImmTyHigh: OS <<
"High";
break;
1032 case ImmTyBLGP: OS <<
"BLGP";
break;
1033 case ImmTyCBSZ: OS <<
"CBSZ";
break;
1034 case ImmTyABID: OS <<
"ABID";
break;
1035 case ImmTyEndpgm: OS <<
"Endpgm";
break;
1042 OS <<
"<register " <<
getReg() <<
" mods: " <<
Reg.Mods <<
'>';
1045 OS <<
'<' << getImm();
1046 if (getImmTy() != ImmTyNone) {
1047 OS <<
" type: "; printImmTy(OS, getImmTy());
1049 OS <<
" mods: " << Imm.Mods <<
'>';
1052 OS <<
'\'' << getToken() <<
'\'';
1055 OS <<
"<expr " << *Expr << '>
';
1060 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1061 int64_t Val, SMLoc Loc,
1062 ImmTy Type = ImmTyNone,
1063 bool IsFPImm = false) {
1064 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1066 Op->Imm.IsFPImm = IsFPImm;
1067 Op->Imm.Kind = ImmKindTyNone;
1068 Op->Imm.Type = Type;
1069 Op->Imm.Mods = Modifiers();
1075 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1076 StringRef Str, SMLoc Loc,
1077 bool HasExplicitEncodingSize = true) {
1078 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1079 Res->Tok.Data = Str.data();
1080 Res->Tok.Length = Str.size();
1081 Res->StartLoc = Loc;
1086 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1087 unsigned RegNo, SMLoc S,
1089 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1090 Op->Reg.RegNo = RegNo;
1091 Op->Reg.Mods = Modifiers();
1097 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1098 const class MCExpr *Expr, SMLoc S) {
1099 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1107 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1108 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1112 //===----------------------------------------------------------------------===//
1114 //===----------------------------------------------------------------------===//
1116 // Holds info related to the current kernel, e.g. count of SGPRs used.
1117 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1118 // .amdgpu_hsa_kernel or at EOF.
1119 class KernelScopeInfo {
1120 int SgprIndexUnusedMin = -1;
1121 int VgprIndexUnusedMin = -1;
1122 MCContext *Ctx = nullptr;
1124 void usesSgprAt(int i) {
1125 if (i >= SgprIndexUnusedMin) {
1126 SgprIndexUnusedMin = ++i;
1128 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1129 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1134 void usesVgprAt(int i) {
1135 if (i >= VgprIndexUnusedMin) {
1136 VgprIndexUnusedMin = ++i;
1138 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1139 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
1145 KernelScopeInfo() = default;
1147 void initialize(MCContext &Context) {
1149 usesSgprAt(SgprIndexUnusedMin = -1);
1150 usesVgprAt(VgprIndexUnusedMin = -1);
1153 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1155 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1156 case IS_AGPR: // fall through
1157 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1163 class AMDGPUAsmParser : public MCTargetAsmParser {
1164 MCAsmParser &Parser;
1166 // Number of extra operands parsed after the first optional operand.
1167 // This may be necessary to skip hardcoded mandatory operands.
1168 static const unsigned MAX_OPR_LOOKAHEAD = 8;
1170 unsigned ForcedEncodingSize = 0;
1171 bool ForcedDPP = false;
1172 bool ForcedSDWA = false;
1173 KernelScopeInfo KernelScope;
1179 #define GET_ASSEMBLER_HEADER
1180 #include "AMDGPUGenAsmMatcher.inc"
1185 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1186 bool OutOfRangeError(SMRange Range);
1202 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1203 bool FlatScrUsed, bool XNACKUsed,
1204 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1205 SMRange VGPRRange, unsigned NextFreeSGPR,
1206 SMRange SGPRRange, unsigned &VGPRBlocks,
1207 unsigned &SGPRBlocks);
1208 bool ParseDirectiveAMDGCNTarget();
1209 bool ParseDirectiveAMDHSAKernel();
1210 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1211 bool ParseDirectiveHSACodeObjectVersion();
1212 bool ParseDirectiveHSACodeObjectISA();
1213 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1214 bool ParseDirectiveAMDKernelCodeT();
1215 // TODO: Possibly make subtargetHasRegister const.
1216 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1217 bool ParseDirectiveAMDGPUHsaKernel();
1219 bool ParseDirectiveISAVersion();
1220 bool ParseDirectiveHSAMetadata();
1221 bool ParseDirectivePALMetadataBegin();
1222 bool ParseDirectivePALMetadata();
1223 bool ParseDirectiveAMDGPULDS();
1227 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1228 const char *AssemblerDirectiveEnd,
1229 std::string &CollectString);
1231 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1232 RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1233 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1234 unsigned &RegNum, unsigned &RegWidth,
1235 bool RestoreOnFailure = false);
1236 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1237 unsigned &RegNum, unsigned &RegWidth,
1238 SmallVectorImpl<AsmToken> &Tokens);
1239 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1241 SmallVectorImpl<AsmToken> &Tokens);
1242 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1244 SmallVectorImpl<AsmToken> &Tokens);
1245 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1246 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1247 bool ParseRegRange(unsigned& Num, unsigned& Width);
1248 unsigned getRegularReg(RegisterKind RegKind,
1254 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1255 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1256 void initializeGprCountSymbol(RegisterKind RegKind);
1257 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1259 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1260 bool IsAtomic, bool IsLds = false);
1261 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1262 bool IsGdsHardcoded);
1265 enum AMDGPUMatchResultTy {
1266 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1269 OperandMode_Default,
1273 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1275 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1276 const MCInstrInfo &MII,
1277 const MCTargetOptions &Options)
1278 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1279 MCAsmParserExtension::Initialize(Parser);
1281 if (getFeatureBits().none()) {
1282 // Set default features.
1283 copySTI().ToggleFeature("southern-islands");
1286 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1289 // TODO: make those pre-defined variables read-only.
1290 // Currently there is none suitable machinery in the core llvm-mc for this.
1291 // MCSymbol::isRedefinable is intended for another purpose, and
1292 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1293 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1294 MCContext &Ctx = getContext();
1295 if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1297 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1298 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1299 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1300 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1301 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1302 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1305 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1306 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1307 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1308 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1309 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1310 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1312 if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1313 initializeGprCountSymbol(IS_VGPR);
1314 initializeGprCountSymbol(IS_SGPR);
1316 KernelScope.initialize(getContext());
1320 bool hasMIMG_R128() const {
1321 return AMDGPU::hasMIMG_R128(getSTI());
1324 bool hasPackedD16() const {
1325 return AMDGPU::hasPackedD16(getSTI());
1328 bool hasGFX10A16() const {
1329 return AMDGPU::hasGFX10A16(getSTI());
1333 return AMDGPU::isSI(getSTI());
1337 return AMDGPU::isCI(getSTI());
1341 return AMDGPU::isVI(getSTI());
1344 bool isGFX9() const {
1345 return AMDGPU::isGFX9(getSTI());
1348 bool isGFX90A() const {
1349 return AMDGPU::isGFX90A(getSTI());
1352 bool isGFX9Plus() const {
1353 return AMDGPU::isGFX9Plus(getSTI());
1356 bool isGFX10() const {
1357 return AMDGPU::isGFX10(getSTI());
1360 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1362 bool isGFX10_BEncoding() const {
1363 return AMDGPU::isGFX10_BEncoding(getSTI());
1366 bool hasInv2PiInlineImm() const {
1367 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1370 bool hasFlatOffsets() const {
1371 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1374 bool hasSGPR102_SGPR103() const {
1375 return !isVI() && !isGFX9();
1378 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1380 bool hasIntClamp() const {
1381 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1384 AMDGPUTargetStreamer &getTargetStreamer() {
1385 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1386 return static_cast<AMDGPUTargetStreamer &>(TS);
1389 const MCRegisterInfo *getMRI() const {
1390 // We need this const_cast because for some reason getContext() is not const
1392 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1395 const MCInstrInfo *getMII() const {
1399 const FeatureBitset &getFeatureBits() const {
1400 return getSTI().getFeatureBits();
1403 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1404 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1405 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1407 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1408 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1409 bool isForcedDPP() const { return ForcedDPP; }
1410 bool isForcedSDWA() const { return ForcedSDWA; }
1411 ArrayRef<unsigned> getMatchedVariants() const;
1412 StringRef getMatchedVariantName() const;
1414 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1415 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1416 bool RestoreOnFailure);
1417 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1418 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1419 SMLoc &EndLoc) override;
1420 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1421 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1422 unsigned Kind) override;
1423 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1424 OperandVector &Operands, MCStreamer &Out,
1425 uint64_t &ErrorInfo,
1426 bool MatchingInlineAsm) override;
1427 bool ParseDirective(AsmToken DirectiveID) override;
1428 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1429 OperandMode Mode = OperandMode_Default);
1430 StringRef parseMnemonicSuffix(StringRef Name);
1431 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1432 SMLoc NameLoc, OperandVector &Operands) override;
1433 //bool ProcessInstruction(MCInst &Inst);
1435 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1437 OperandMatchResultTy
1438 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1439 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1440 bool (*ConvertResult)(int64_t &) = nullptr);
1442 OperandMatchResultTy
1443 parseOperandArrayWithPrefix(const char *Prefix,
1444 OperandVector &Operands,
1445 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1446 bool (*ConvertResult)(int64_t&) = nullptr);
1448 OperandMatchResultTy
1449 parseNamedBit(StringRef Name, OperandVector &Operands,
1450 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1451 OperandMatchResultTy parseCPol(OperandVector &Operands);
1452 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1457 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1458 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1459 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1460 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1461 bool parseSP3NegModifier();
1462 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1463 OperandMatchResultTy parseReg(OperandVector &Operands);
1464 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1465 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1466 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1467 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1468 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1469 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1470 OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1471 OperandMatchResultTy parseUfmt(int64_t &Format);
1472 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1473 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1474 OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1475 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1476 OperandMatchResultTy parseNumericFormat(int64_t &Format);
1477 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1478 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1480 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1481 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1482 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1483 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1485 bool parseCnt(int64_t &IntVal);
1486 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1487 OperandMatchResultTy parseHwreg(OperandVector &Operands);
1490 struct OperandInfoTy {
1493 bool IsSymbolic = false;
1494 bool IsDefined = false;
1496 OperandInfoTy(int64_t Id_) : Id(Id_) {}
1499 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1500 bool validateSendMsg(const OperandInfoTy &Msg,
1501 const OperandInfoTy &Op,
1502 const OperandInfoTy &Stream);
1504 bool parseHwregBody(OperandInfoTy &HwReg,
1505 OperandInfoTy &Offset,
1506 OperandInfoTy &Width);
1507 bool validateHwreg(const OperandInfoTy &HwReg,
1508 const OperandInfoTy &Offset,
1509 const OperandInfoTy &Width);
1511 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1512 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1514 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1515 const OperandVector &Operands) const;
1516 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1517 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1518 SMLoc getLitLoc(const OperandVector &Operands) const;
1519 SMLoc getConstLoc(const OperandVector &Operands) const;
1521 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1522 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1523 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1524 bool validateSOPLiteral(const MCInst &Inst) const;
1525 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1526 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1527 bool validateIntClampSupported(const MCInst &Inst);
1528 bool validateMIMGAtomicDMask(const MCInst &Inst);
1529 bool validateMIMGGatherDMask(const MCInst &Inst);
1530 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1531 bool validateMIMGDataSize(const MCInst &Inst);
1532 bool validateMIMGAddrSize(const MCInst &Inst);
1533 bool validateMIMGD16(const MCInst &Inst);
1534 bool validateMIMGDim(const MCInst &Inst);
1535 bool validateMIMGMSAA(const MCInst &Inst);
1536 bool validateOpSel(const MCInst &Inst);
1537 bool validateVccOperand(unsigned Reg) const;
1538 bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands);
1539 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1540 bool validateAGPRLdSt(const MCInst &Inst) const;
1541 bool validateVGPRAlign(const MCInst &Inst) const;
1542 bool validateDivScale(const MCInst &Inst);
1543 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1544 const SMLoc &IDLoc);
1545 Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1546 unsigned getConstantBusLimit(unsigned Opcode) const;
1547 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1548 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1549 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1551 bool isSupportedMnemo(StringRef Mnemo,
1552 const FeatureBitset &FBS);
1553 bool isSupportedMnemo(StringRef Mnemo,
1554 const FeatureBitset &FBS,
1555 ArrayRef<unsigned> Variants);
1556 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1558 bool isId(const StringRef Id) const;
1559 bool isId(const AsmToken &Token, const StringRef Id) const;
1560 bool isToken(const AsmToken::TokenKind Kind) const;
1561 bool trySkipId(const StringRef Id);
1562 bool trySkipId(const StringRef Pref, const StringRef Id);
1563 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1564 bool trySkipToken(const AsmToken::TokenKind Kind);
1565 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1566 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1567 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1569 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1570 AsmToken::TokenKind getTokenKind() const;
1571 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1572 bool parseExpr(OperandVector &Operands);
1573 StringRef getTokenStr() const;
1574 AsmToken peekToken();
1575 AsmToken getToken() const;
1576 SMLoc getLoc() const;
1580 void onBeginOfFile() override;
1582 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1583 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1585 OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1586 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1587 OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1588 OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1589 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1590 OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1592 bool parseSwizzleOperand(int64_t &Op,
1593 const unsigned MinVal,
1594 const unsigned MaxVal,
1595 const StringRef ErrMsg,
1597 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1598 const unsigned MinVal,
1599 const unsigned MaxVal,
1600 const StringRef ErrMsg);
1601 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1602 bool parseSwizzleOffset(int64_t &Imm);
1603 bool parseSwizzleMacro(int64_t &Imm);
1604 bool parseSwizzleQuadPerm(int64_t &Imm);
1605 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1606 bool parseSwizzleBroadcast(int64_t &Imm);
1607 bool parseSwizzleSwap(int64_t &Imm);
1608 bool parseSwizzleReverse(int64_t &Imm);
1610 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1611 int64_t parseGPRIdxMacro();
1613 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1614 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1615 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1616 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1618 AMDGPUOperand::Ptr defaultCPol() const;
1620 AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1621 AMDGPUOperand::Ptr defaultSMEMOffset() const;
1622 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1623 AMDGPUOperand::Ptr defaultFlatOffset() const;
1625 OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1627 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1628 OptionalImmIndexMap &OptionalIdx);
1629 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1630 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1631 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1633 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1635 void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1636 bool IsAtomic = false);
1637 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1638 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1640 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1642 bool parseDimId(unsigned &Encoding);
1643 OperandMatchResultTy parseDim(OperandVector &Operands);
1644 OperandMatchResultTy parseDPP8(OperandVector &Operands);
1645 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1646 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1647 int64_t parseDPPCtrlSel(StringRef Ctrl);
1648 int64_t parseDPPCtrlPerm();
1649 AMDGPUOperand::Ptr defaultRowMask() const;
1650 AMDGPUOperand::Ptr defaultBankMask() const;
1651 AMDGPUOperand::Ptr defaultBoundCtrl() const;
1652 AMDGPUOperand::Ptr defaultFI() const;
1653 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1654 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1656 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1657 AMDGPUOperand::ImmTy Type);
1658 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1659 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1660 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1661 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1662 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1663 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1664 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1665 uint64_t BasicInstType,
1666 bool SkipDstVcc = false,
1667 bool SkipSrcVcc = false);
1669 AMDGPUOperand::Ptr defaultBLGP() const;
1670 AMDGPUOperand::Ptr defaultCBSZ() const;
1671 AMDGPUOperand::Ptr defaultABID() const;
1673 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1674 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1677 struct OptionalOperand {
1679 AMDGPUOperand::ImmTy Type;
1681 bool (*ConvertResult)(int64_t&);
1684 } // end anonymous namespace
1686 // May be called with integer type with equivalent bitwidth.
1687 static const fltSemantics *getFltSemantics(unsigned Size) {
1690 return &APFloat::IEEEsingle();
1692 return &APFloat::IEEEdouble();
1694 return &APFloat::IEEEhalf();
1696 llvm_unreachable("unsupported fp type");
1700 static const fltSemantics *getFltSemantics(MVT VT) {
1701 return getFltSemantics(VT.getSizeInBits() / 8);
1704 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1705 switch (OperandType) {
1706 case AMDGPU::OPERAND_REG_IMM_INT32:
1707 case AMDGPU::OPERAND_REG_IMM_FP32:
1708 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1709 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1710 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1711 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1712 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1713 case AMDGPU::OPERAND_REG_IMM_V2FP32:
1714 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1715 case AMDGPU::OPERAND_REG_IMM_V2INT32:
1716 return &APFloat::IEEEsingle();
1717 case AMDGPU::OPERAND_REG_IMM_INT64:
1718 case AMDGPU::OPERAND_REG_IMM_FP64:
1719 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1720 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1721 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1722 return &APFloat::IEEEdouble();
1723 case AMDGPU::OPERAND_REG_IMM_INT16:
1724 case AMDGPU::OPERAND_REG_IMM_FP16:
1725 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1726 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1727 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1728 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1729 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1730 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1731 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1732 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1733 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1734 case AMDGPU::OPERAND_REG_IMM_V2FP16:
1735 return &APFloat::IEEEhalf();
1737 llvm_unreachable("unsupported fp type");
1741 //===----------------------------------------------------------------------===//
1743 //===----------------------------------------------------------------------===//
1745 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1748 // Convert literal to single precision
1749 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1750 APFloat::rmNearestTiesToEven,
1752 // We allow precision lost but not overflow or underflow
1753 if (Status != APFloat::opOK &&
1755 ((Status & APFloat::opOverflow) != 0 ||
1756 (Status & APFloat::opUnderflow) != 0)) {
1763 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1764 return isUIntN(Size, Val) || isIntN(Size, Val);
1767 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1768 if (VT.getScalarType() == MVT::i16) {
1769 // FP immediate values are broken.
1770 return isInlinableIntLiteral(Val);
1773 // f16/v2f16 operands work correctly for all values.
1774 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1777 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1779 // This is a hack to enable named inline values like
1780 // shared_base with both 32-bit and 64-bit operands.
1781 // Note that these values are defined as
1782 // 32-bit operands only.
1783 if (isInlineValue()) {
1787 if (!isImmTy(ImmTyNone)) {
1788 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1791 // TODO: We should avoid using host float here. It would be better to
1792 // check the float bit values which is what a few other places do.
1793 // We've had bot failures before due
to weird NaN
support on mips hosts.
1800 AsmParser->hasInv2PiInlineImm());
1807 if (
type.getScalarSizeInBits() == 16) {
1809 static_cast<int16_t
>(FPLiteral.bitcastToAPInt().getZExtValue()),
1810 type, AsmParser->hasInv2PiInlineImm());
1815 static_cast<int32_t
>(FPLiteral.bitcastToAPInt().getZExtValue()),
1816 AsmParser->hasInv2PiInlineImm());
1822 AsmParser->hasInv2PiInlineImm());
1829 if (
type.getScalarSizeInBits() == 16) {
1831 static_cast<int16_t
>(
Literal.getLoBits(16).getSExtValue()),
1832 type, AsmParser->hasInv2PiInlineImm());
1836 static_cast<int32_t
>(
Literal.getLoBits(32).getZExtValue()),
1837 AsmParser->hasInv2PiInlineImm());
1840 bool AMDGPUOperand::isLiteralImm(
MVT type)
const {
1842 if (!isImmTy(ImmTyNone)) {
1856 unsigned Size =
type.getSizeInBits();
1888 bool AMDGPUOperand::isRegClass(
unsigned RCID)
const {
1889 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(
getReg());
1892 bool AMDGPUOperand::isVRegWithInputMods()
const {
1893 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1895 (isRegClass(AMDGPU::VReg_64RegClassID) &&
1896 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1899 bool AMDGPUOperand::isSDWAOperand(
MVT type)
const {
1900 if (AsmParser->isVI())
1902 else if (AsmParser->isGFX9Plus())
1903 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(
type);
1908 bool AMDGPUOperand::isSDWAFP16Operand()
const {
1912 bool AMDGPUOperand::isSDWAFP32Operand()
const {
1916 bool AMDGPUOperand::isSDWAInt16Operand()
const {
1920 bool AMDGPUOperand::isSDWAInt32Operand()
const {
1924 bool AMDGPUOperand::isBoolReg()
const {
1925 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1926 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1929 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val,
unsigned Size)
const
1931 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1934 const uint64_t FpSignMask = (1ULL << (
Size * 8 - 1));
1946 void AMDGPUOperand::addImmOperands(
MCInst &Inst,
unsigned N,
bool ApplyModifiers)
const {
1949 addLiteralImmOperand(Inst, Imm.Val,
1951 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1953 assert(!isImmTy(ImmTyNone) || !hasModifiers());
1959 void AMDGPUOperand::addLiteralImmOperand(
MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const {
1960 const auto& InstDesc = AsmParser->getMII()->get(Inst.
getOpcode());
1965 if (ApplyModifiers) {
1968 Val = applyInputFPModifiers(Val,
Size);
1972 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1982 AsmParser->hasInv2PiInlineImm())) {
1991 if (
Literal.getLoBits(32) != 0) {
1992 const_cast<AMDGPUAsmParser *
>(AsmParser)->
Warning(Inst.
getLoc(),
1993 "Can't encode literal as exact 64-bit floating-point operand. "
1994 "Low 32-bits will be set to zero");
1998 setImmKindLiteral();
2037 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2039 setImmKindLiteral();
2066 AsmParser->hasInv2PiInlineImm())) {
2073 setImmKindLiteral();
2088 setImmKindLiteral();
2099 AsmParser->hasInv2PiInlineImm())) {
2106 setImmKindLiteral();
2115 AsmParser->hasInv2PiInlineImm()));
2125 template <
unsigned Bitw
idth>
2126 void AMDGPUOperand::addKImmFPOperands(
MCInst &Inst,
unsigned N)
const {
2143 void AMDGPUOperand::addRegOperands(
MCInst &Inst,
unsigned N)
const {
2149 case AMDGPU::SRC_SHARED_BASE:
2150 case AMDGPU::SRC_SHARED_LIMIT:
2151 case AMDGPU::SRC_PRIVATE_BASE:
2152 case AMDGPU::SRC_PRIVATE_LIMIT:
2153 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2155 case AMDGPU::SRC_VCCZ:
2156 case AMDGPU::SRC_EXECZ:
2157 case AMDGPU::SRC_SCC:
2159 case AMDGPU::SGPR_NULL:
2175 if (Is == IS_VGPR) {
2178 case 1:
return AMDGPU::VGPR_32RegClassID;
2179 case 2:
return AMDGPU::VReg_64RegClassID;
2180 case 3:
return AMDGPU::VReg_96RegClassID;
2181 case 4:
return AMDGPU::VReg_128RegClassID;
2182 case 5:
return AMDGPU::VReg_160RegClassID;
2183 case 6:
return AMDGPU::VReg_192RegClassID;
2184 case 8:
return AMDGPU::VReg_256RegClassID;
2185 case 16:
return AMDGPU::VReg_512RegClassID;
2186 case 32:
return AMDGPU::VReg_1024RegClassID;
2188 }
else if (Is == IS_TTMP) {
2191 case 1:
return AMDGPU::TTMP_32RegClassID;
2192 case 2:
return AMDGPU::TTMP_64RegClassID;
2193 case 4:
return AMDGPU::TTMP_128RegClassID;
2194 case 8:
return AMDGPU::TTMP_256RegClassID;
2195 case 16:
return AMDGPU::TTMP_512RegClassID;
2197 }
else if (Is == IS_SGPR) {
2200 case 1:
return AMDGPU::SGPR_32RegClassID;
2201 case 2:
return AMDGPU::SGPR_64RegClassID;
2202 case 3:
return AMDGPU::SGPR_96RegClassID;
2203 case 4:
return AMDGPU::SGPR_128RegClassID;
2204 case 5:
return AMDGPU::SGPR_160RegClassID;
2205 case 6:
return AMDGPU::SGPR_192RegClassID;
2206 case 8:
return AMDGPU::SGPR_256RegClassID;
2207 case 16:
return AMDGPU::SGPR_512RegClassID;
2209 }
else if (Is == IS_AGPR) {
2212 case 1:
return AMDGPU::AGPR_32RegClassID;
2213 case 2:
return AMDGPU::AReg_64RegClassID;
2214 case 3:
return AMDGPU::AReg_96RegClassID;
2215 case 4:
return AMDGPU::AReg_128RegClassID;
2216 case 5:
return AMDGPU::AReg_160RegClassID;
2217 case 6:
return AMDGPU::AReg_192RegClassID;
2218 case 8:
return AMDGPU::AReg_256RegClassID;
2219 case 16:
return AMDGPU::AReg_512RegClassID;
2220 case 32:
return AMDGPU::AReg_1024RegClassID;
2228 .
Case(
"exec", AMDGPU::EXEC)
2229 .
Case(
"vcc", AMDGPU::VCC)
2230 .
Case(
"flat_scratch", AMDGPU::FLAT_SCR)
2231 .
Case(
"xnack_mask", AMDGPU::XNACK_MASK)
2232 .
Case(
"shared_base", AMDGPU::SRC_SHARED_BASE)
2233 .
Case(
"src_shared_base", AMDGPU::SRC_SHARED_BASE)
2234 .
Case(
"shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2235 .
Case(
"src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2236 .
Case(
"private_base", AMDGPU::SRC_PRIVATE_BASE)
2237 .
Case(
"src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2238 .
Case(
"private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2239 .
Case(
"src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2240 .
Case(
"pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2241 .
Case(
"src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2242 .
Case(
"lds_direct", AMDGPU::LDS_DIRECT)
2243 .
Case(
"src_lds_direct", AMDGPU::LDS_DIRECT)
2245 .
Case(
"vccz", AMDGPU::SRC_VCCZ)
2246 .
Case(
"src_vccz", AMDGPU::SRC_VCCZ)
2247 .
Case(
"execz", AMDGPU::SRC_EXECZ)
2248 .
Case(
"src_execz", AMDGPU::SRC_EXECZ)
2249 .
Case(
"scc", AMDGPU::SRC_SCC)
2250 .
Case(
"src_scc", AMDGPU::SRC_SCC)
2251 .
Case(
"tba", AMDGPU::TBA)
2252 .
Case(
"tma", AMDGPU::TMA)
2253 .
Case(
"flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2254 .
Case(
"flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2255 .
Case(
"xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2256 .
Case(
"xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2257 .
Case(
"vcc_lo", AMDGPU::VCC_LO)
2258 .
Case(
"vcc_hi", AMDGPU::VCC_HI)
2259 .
Case(
"exec_lo", AMDGPU::EXEC_LO)
2260 .
Case(
"exec_hi", AMDGPU::EXEC_HI)
2261 .
Case(
"tma_lo", AMDGPU::TMA_LO)
2262 .
Case(
"tma_hi", AMDGPU::TMA_HI)
2263 .
Case(
"tba_lo", AMDGPU::TBA_LO)
2264 .
Case(
"tba_hi", AMDGPU::TBA_HI)
2265 .
Case(
"pc", AMDGPU::PC_REG)
2266 .
Case(
"null", AMDGPU::SGPR_NULL)
2270 bool AMDGPUAsmParser::ParseRegister(
unsigned &RegNo,
SMLoc &StartLoc,
2271 SMLoc &EndLoc,
bool RestoreOnFailure) {
2272 auto R = parseRegister();
2273 if (!R)
return true;
2275 RegNo =
R->getReg();
2276 StartLoc =
R->getStartLoc();
2277 EndLoc =
R->getEndLoc();
2281 bool AMDGPUAsmParser::ParseRegister(
unsigned &RegNo,
SMLoc &StartLoc,
2283 return ParseRegister(RegNo, StartLoc, EndLoc,
false);
2290 ParseRegister(RegNo, StartLoc, EndLoc,
true);
2291 bool PendingErrors = getParser().hasPendingError();
2292 getParser().clearPendingErrors();
2300 bool AMDGPUAsmParser::AddNextRegisterToList(
unsigned &
Reg,
unsigned &RegWidth,
2301 RegisterKind RegKind,
unsigned Reg1,
2305 if (
Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2310 if (
Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2311 Reg = AMDGPU::FLAT_SCR;
2315 if (
Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2316 Reg = AMDGPU::XNACK_MASK;
2320 if (
Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2325 if (
Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2330 if (
Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2335 Error(Loc,
"register does not fit in the list");
2341 if (Reg1 !=
Reg + RegWidth) {
2342 Error(Loc,
"registers in a list must have consecutive indices");
2360 {{
"ttmp"}, IS_TTMP},
2366 return Kind == IS_VGPR ||
2384 AMDGPUAsmParser::isRegister(
const AsmToken &Token,
2401 if (!RegSuffix.
empty()) {
2417 AMDGPUAsmParser::isRegister()
2419 return isRegister(getToken(), peekToken());
2423 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2430 unsigned AlignSize = 1;
2431 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2434 AlignSize =
std::min(RegWidth, 4u);
2437 if (RegNum % AlignSize != 0) {
2438 Error(Loc,
"invalid register alignment");
2439 return AMDGPU::NoRegister;
2442 unsigned RegIdx = RegNum / AlignSize;
2445 Error(Loc,
"invalid or unsupported register size");
2446 return AMDGPU::NoRegister;
2452 Error(Loc,
"register index is out of range");
2453 return AMDGPU::NoRegister;
2460 AMDGPUAsmParser::ParseRegRange(
unsigned& Num,
unsigned&
Width) {
2461 int64_t RegLo, RegHi;
2465 SMLoc FirstIdxLoc = getLoc();
2468 if (!parseExpr(RegLo))
2472 SecondIdxLoc = getLoc();
2473 if (!parseExpr(RegHi))
2483 Error(FirstIdxLoc,
"invalid register index");
2488 Error(SecondIdxLoc,
"invalid register index");
2492 if (RegLo > RegHi) {
2493 Error(FirstIdxLoc,
"first register index should not exceed second index");
2497 Num =
static_cast<unsigned>(RegLo);
2498 Width = (RegHi - RegLo) + 1;
2502 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2503 unsigned &RegNum,
unsigned &RegWidth,
2510 RegKind = IS_SPECIAL;
2511 Tokens.push_back(getToken());
2517 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2518 unsigned &RegNum,
unsigned &RegWidth,
2522 auto Loc = getLoc();
2526 Error(Loc,
"invalid register name");
2527 return AMDGPU::NoRegister;
2530 Tokens.push_back(getToken());
2535 if (!RegSuffix.
empty()) {
2538 Error(Loc,
"invalid register index");
2539 return AMDGPU::NoRegister;
2544 if (!ParseRegRange(RegNum, RegWidth))
2545 return AMDGPU::NoRegister;
2548 return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2551 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
unsigned &RegNum,
2554 unsigned Reg = AMDGPU::NoRegister;
2555 auto ListLoc = getLoc();
2558 "expected a register or a list of registers")) {
2559 return AMDGPU::NoRegister;
2564 auto Loc = getLoc();
2565 if (!ParseAMDGPURegister(RegKind,
Reg, RegNum, RegWidth))
2566 return AMDGPU::NoRegister;
2567 if (RegWidth != 1) {
2568 Error(Loc,
"expected a single 32-bit register");
2569 return AMDGPU::NoRegister;
2573 RegisterKind NextRegKind;
2574 unsigned NextReg, NextRegNum, NextRegWidth;
2577 if (!ParseAMDGPURegister(NextRegKind, NextReg,
2578 NextRegNum, NextRegWidth,
2580 return AMDGPU::NoRegister;
2582 if (NextRegWidth != 1) {
2583 Error(Loc,
"expected a single 32-bit register");
2584 return AMDGPU::NoRegister;
2586 if (NextRegKind != RegKind) {
2587 Error(Loc,
"registers in a list must be of the same kind");
2588 return AMDGPU::NoRegister;
2590 if (!AddNextRegisterToList(
Reg, RegWidth, RegKind, NextReg, Loc))
2591 return AMDGPU::NoRegister;
2595 "expected a comma or a closing square bracket")) {
2596 return AMDGPU::NoRegister;
2600 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2605 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &
Reg,
2606 unsigned &RegNum,
unsigned &RegWidth,
2608 auto Loc = getLoc();
2609 Reg = AMDGPU::NoRegister;
2612 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2613 if (
Reg == AMDGPU::NoRegister)
2614 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2616 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2620 if (
Reg == AMDGPU::NoRegister) {
2621 assert(Parser.hasPendingError());
2625 if (!subtargetHasRegister(*
TRI,
Reg)) {
2626 if (
Reg == AMDGPU::SGPR_NULL) {
2627 Error(Loc,
"'null' operand is not supported on this GPU");
2629 Error(Loc,
"register not available on this GPU");
2637 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
unsigned &
Reg,
2638 unsigned &RegNum,
unsigned &RegWidth,
2639 bool RestoreOnFailure ) {
2640 Reg = AMDGPU::NoRegister;
2643 if (ParseAMDGPURegister(RegKind,
Reg, RegNum, RegWidth, Tokens)) {
2644 if (RestoreOnFailure) {
2645 while (!Tokens.empty()) {
2655 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2658 return StringRef(
".amdgcn.next_free_vgpr");
2660 return StringRef(
".amdgcn.next_free_sgpr");
2666 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2667 auto SymbolName = getGprCountSymbolName(RegKind);
2673 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2674 unsigned DwordRegIndex,
2675 unsigned RegWidth) {
2680 auto SymbolName = getGprCountSymbolName(RegKind);
2685 int64_t NewMax = DwordRegIndex + RegWidth - 1;
2689 return !
Error(getLoc(),
2690 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2694 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2696 if (OldCount <= NewMax)
2702 std::unique_ptr<AMDGPUOperand>
2703 AMDGPUAsmParser::parseRegister(
bool RestoreOnFailure) {
2704 const auto &Tok = getToken();
2705 SMLoc StartLoc = Tok.getLoc();
2706 SMLoc EndLoc = Tok.getEndLoc();
2707 RegisterKind RegKind;
2708 unsigned Reg, RegNum, RegWidth;
2710 if (!ParseAMDGPURegister(RegKind,
Reg, RegNum, RegWidth)) {
2714 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2717 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2718 return AMDGPUOperand::CreateReg(
this,
Reg, StartLoc, EndLoc);
2728 const auto& Tok = getToken();
2729 const auto& NextTok = peekToken();
2732 bool Negate =
false;
2750 if (
errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2754 RealVal.changeSign();
2757 AMDGPUOperand::CreateImm(
this, RealVal.bitcastToAPInt().getZExtValue(),
S,
2758 AMDGPUOperand::ImmTyNone,
true));
2767 if (HasSP3AbsModifier) {
2776 if (getParser().parsePrimaryExpr(Expr, EndLoc,
nullptr))
2779 if (Parser.parseExpression(Expr))
2783 if (Expr->evaluateAsAbsolute(
IntVal)) {
2786 Operands.push_back(AMDGPUOperand::CreateExpr(
this, Expr,
S));
2800 if (
auto R = parseRegister()) {
2813 }
else if (isModifier()) {
2816 return parseImm(
Operands, HasSP3AbsMod);
2821 AMDGPUAsmParser::isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
2824 return str ==
"abs" || str ==
"neg" || str ==
"sext";
2830 AMDGPUAsmParser::isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const {
2835 AMDGPUAsmParser::isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
2836 return isNamedOperandModifier(Token, NextToken) || Token.
is(
AsmToken::Pipe);
2840 AMDGPUAsmParser::isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
2841 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2860 AMDGPUAsmParser::isModifier() {
2864 peekTokens(NextToken);
2866 return isOperandModifier(Tok, NextToken[0]) ||
2867 (Tok.
is(
AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2868 isOpcodeModifierWithVal(Tok, NextToken[0]);
2894 AMDGPUAsmParser::parseSP3NegModifier() {
2897 peekTokens(NextToken);
2900 (isRegister(NextToken[0], NextToken[1]) ||
2902 isId(NextToken[0],
"abs"))) {
2919 Error(getLoc(),
"invalid syntax, expected 'neg' modifier");
2923 SP3Neg = parseSP3NegModifier();
2926 Neg = trySkipId(
"neg");
2927 if (Neg && SP3Neg) {
2928 Error(Loc,
"expected register or immediate");
2934 Abs = trySkipId(
"abs");
2940 if (Abs && SP3Abs) {
2941 Error(Loc,
"expected register or immediate");
2947 Res = parseRegOrImm(
Operands, SP3Abs);
2955 if (SP3Abs && !skipToken(
AsmToken::Pipe,
"expected vertical bar"))
2962 AMDGPUOperand::Modifiers Mods;
2963 Mods.Abs = Abs || SP3Abs;
2964 Mods.Neg = Neg || SP3Neg;
2966 if (Mods.hasFPModifiers()) {
2967 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
2969 Error(
Op.getStartLoc(),
"expected an absolute expression");
2972 Op.setModifiers(Mods);
2980 bool Sext = trySkipId(
"sext");
2981 if (Sext && !skipToken(
AsmToken::LParen,
"expected left paren after sext"))
2997 AMDGPUOperand::Modifiers Mods;
3000 if (Mods.hasIntModifiers()) {
3001 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands.back());
3003 Error(
Op.getStartLoc(),
"expected an absolute expression");
3006 Op.setModifiers(Mods);
3014 return parseRegOrImmWithFPInputMods(
Operands,
false);
3019 return parseRegOrImmWithIntInputMods(
Operands,
false);
3023 auto Loc = getLoc();
3024 if (trySkipId(
"off")) {
3025 Operands.push_back(AMDGPUOperand::CreateImm(
this, 0, Loc,
3026 AMDGPUOperand::ImmTyOff,
false));
3033 std::unique_ptr<AMDGPUOperand>
Reg = parseRegister();
3043 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(
MCInst &Inst) {
3044 uint64_t TSFlags = MII.get(Inst.
getOpcode()).TSFlags;
3050 return Match_InvalidOperand;
3054 getForcedEncodingSize() != 64)
3055 return Match_PreferE32;
3057 if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3058 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3064 return Match_InvalidOperand;
3068 return Match_Success;
3072 static const unsigned Variants[] = {
3082 if (getForcedEncodingSize() == 32) {
3087 if (isForcedVOP3()) {
3092 if (isForcedSDWA()) {
3098 if (isForcedDPP()) {
3106 StringRef AMDGPUAsmParser::getMatchedVariantName()
const {
3107 if (getForcedEncodingSize() == 32)
3122 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(
const MCInst &Inst)
const {
3125 for (
unsigned i = 0;
i < Num; ++
i) {
3128 case AMDGPU::FLAT_SCR:
3130 case AMDGPU::VCC_LO:
3131 case AMDGPU::VCC_HI:
3138 return AMDGPU::NoRegister;
3145 bool AMDGPUAsmParser::isInlineConstant(
const MCInst &Inst,
3146 unsigned OpIdx)
const {
3155 int64_t Val = MO.
getImm();
3187 unsigned AMDGPUAsmParser::getConstantBusLimit(
unsigned Opcode)
const {
3193 case AMDGPU::V_LSHLREV_B64_e64:
3194 case AMDGPU::V_LSHLREV_B64_gfx10:
3195 case AMDGPU::V_LSHRREV_B64_e64:
3196 case AMDGPU::V_LSHRREV_B64_gfx10:
3197 case AMDGPU::V_ASHRREV_I64_e64:
3198 case AMDGPU::V_ASHRREV_I64_gfx10:
3199 case AMDGPU::V_LSHL_B64_e64:
3200 case AMDGPU::V_LSHR_B64_e64:
3201 case AMDGPU::V_ASHR_I64_e64:
3208 bool AMDGPUAsmParser::usesConstantBus(
const MCInst &Inst,
unsigned OpIdx) {
3211 return !isInlineConstant(Inst, OpIdx);
3212 }
else if (MO.
isReg()) {
3216 return isSGPR(PReg,
TRI) && PReg != SGPR_NULL;
3223 AMDGPUAsmParser::validateConstantBusLimitations(
const MCInst &Inst,
3225 const unsigned Opcode = Inst.
getOpcode();
3227 unsigned LastSGPR = AMDGPU::NoRegister;
3228 unsigned ConstantBusUseCount = 0;
3229 unsigned NumLiterals = 0;
3230 unsigned LiteralSize;
3239 ++ConstantBusUseCount;
3243 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3244 if (SGPRUsed != AMDGPU::NoRegister) {
3245 SGPRsUsed.
insert(SGPRUsed);
3246 ++ConstantBusUseCount;
3253 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3255 for (
int OpIdx : OpIndices) {
3256 if (OpIdx == -1)
break;
3259 if (usesConstantBus(Inst, OpIdx)) {
3268 if (!SGPRsUsed.
count(LastSGPR)) {
3269 SGPRsUsed.
insert(LastSGPR);
3270 ++ConstantBusUseCount;
3289 if (NumLiterals == 0) {
3292 }
else if (LiteralSize !=
Size) {
3299 ConstantBusUseCount += NumLiterals;
3301 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3307 Error(Loc,
"invalid operand (violates constant bus restrictions)");
3312 AMDGPUAsmParser::validateEarlyClobberLimitations(
const MCInst &Inst,
3314 const unsigned Opcode = Inst.
getOpcode();
3334 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3336 for (
int SrcIdx : SrcIndices) {
3337 if (SrcIdx == -1)
break;
3343 "destination must be different than all sources");
3352 bool AMDGPUAsmParser::validateIntClampSupported(
const MCInst &Inst) {
3366 bool AMDGPUAsmParser::validateMIMGDataSize(
const MCInst &Inst) {
3380 if (DMaskIdx == -1 || TFEIdx == -1)
3384 unsigned TFESize = (TFEIdx != -1 && Inst.
getOperand(TFEIdx).
getImm()) ? 1 : 0;
3394 DataSize = (DataSize + 1) / 2;
3397 return (VDataSize / 4) == DataSize + TFESize;
3400 bool AMDGPUAsmParser::validateMIMGAddrSize(
const MCInst &Inst) {
3417 assert(SrsrcIdx > VAddr0Idx);
3424 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3425 unsigned VAddrSize =
3426 IsNSA ? SrsrcIdx - VAddr0Idx
3436 else if (AddrSize > 4)
3440 return VAddrSize == AddrSize;
3443 bool AMDGPUAsmParser::validateMIMGAtomicDMask(
const MCInst &Inst) {
3460 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3463 bool AMDGPUAsmParser::validateMIMGGatherDMask(
const MCInst &Inst) {
3479 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3482 bool AMDGPUAsmParser::validateMIMGMSAA(
const MCInst &Inst) {
3493 if (!BaseOpcode->
MSAA)
3502 return DimInfo->
MSAA;
3508 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3509 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3510 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3520 bool AMDGPUAsmParser::validateMovrels(
const MCInst &Inst,
3544 Error(ErrLoc,
"source operand must be a VGPR");
3548 bool AMDGPUAsmParser::validateMAIAccWrite(
const MCInst &Inst,
3553 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3567 "source operand must be either a VGPR or an inline constant");
3574 bool AMDGPUAsmParser::validateDivScale(
const MCInst &Inst) {
3578 case V_DIV_SCALE_F32_gfx6_gfx7:
3579 case V_DIV_SCALE_F32_vi:
3580 case V_DIV_SCALE_F32_gfx10:
3581 case V_DIV_SCALE_F64_gfx6_gfx7:
3582 case V_DIV_SCALE_F64_vi:
3583 case V_DIV_SCALE_F64_gfx10:
3589 for (
auto Name : {AMDGPU::OpName::src0_modifiers,
3590 AMDGPU::OpName::src2_modifiers,
3591 AMDGPU::OpName::src2_modifiers}) {
3602 bool AMDGPUAsmParser::validateMIMGD16(
const MCInst &Inst) {
3619 bool AMDGPUAsmParser::validateMIMGDim(
const MCInst &Inst) {
3631 if (Imm < 0 || Imm >= 8)
3640 case AMDGPU::V_SUBREV_F32_e32:
3641 case AMDGPU::V_SUBREV_F32_e64:
3642 case AMDGPU::V_SUBREV_F32_e32_gfx10:
3643 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3644 case AMDGPU::V_SUBREV_F32_e32_vi:
3645 case AMDGPU::V_SUBREV_F32_e64_gfx10:
3646 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3647 case AMDGPU::V_SUBREV_F32_e64_vi:
3649 case AMDGPU::V_SUBREV_CO_U32_e32:
3650 case AMDGPU::V_SUBREV_CO_U32_e64:
3651 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3652 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3654 case AMDGPU::V_SUBBREV_U32_e32:
3655 case AMDGPU::V_SUBBREV_U32_e64:
3656 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3657 case AMDGPU::V_SUBBREV_U32_e32_vi:
3658 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3659 case AMDGPU::V_SUBBREV_U32_e64_vi:
3661 case AMDGPU::V_SUBREV_U32_e32:
3662 case AMDGPU::V_SUBREV_U32_e64:
3663 case AMDGPU::V_SUBREV_U32_e32_gfx9:
3664 case AMDGPU::V_SUBREV_U32_e32_vi:
3665 case AMDGPU::V_SUBREV_U32_e64_gfx9:
3666 case AMDGPU::V_SUBREV_U32_e64_vi:
3668 case AMDGPU::V_SUBREV_F16_e32:
3669 case AMDGPU::V_SUBREV_F16_e64:
3670 case AMDGPU::V_SUBREV_F16_e32_gfx10:
3671 case AMDGPU::V_SUBREV_F16_e32_vi:
3672 case AMDGPU::V_SUBREV_F16_e64_gfx10:
3673 case AMDGPU::V_SUBREV_F16_e64_vi:
3675 case AMDGPU::V_SUBREV_U16_e32:
3676 case AMDGPU::V_SUBREV_U16_e64:
3677 case AMDGPU::V_SUBREV_U16_e32_vi:
3678 case AMDGPU::V_SUBREV_U16_e64_vi:
3680 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3681 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3682 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3684 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3685 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3687 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3688 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3690 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3691 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3693 case AMDGPU::V_LSHRREV_B32_e32:
3694 case AMDGPU::V_LSHRREV_B32_e64:
3695 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3696 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3697 case AMDGPU::V_LSHRREV_B32_e32_vi:
3698 case AMDGPU::V_LSHRREV_B32_e64_vi:
3699 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3700 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3702 case AMDGPU::V_ASHRREV_I32_e32:
3703 case AMDGPU::V_ASHRREV_I32_e64:
3704 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3705 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3706 case AMDGPU::V_ASHRREV_I32_e32_vi:
3707 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3708 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3709 case AMDGPU::V_ASHRREV_I32_e64_vi:
3711 case AMDGPU::V_LSHLREV_B32_e32:
3712 case AMDGPU::V_LSHLREV_B32_e64:
3713 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3714 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3715 case AMDGPU::V_LSHLREV_B32_e32_vi:
3716 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3717 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3718 case AMDGPU::V_LSHLREV_B32_e64_vi:
3720 case AMDGPU::V_LSHLREV_B16_e32:
3721 case AMDGPU::V_LSHLREV_B16_e64:
3722 case AMDGPU::V_LSHLREV_B16_e32_vi:
3723 case AMDGPU::V_LSHLREV_B16_e64_vi:
3724 case AMDGPU::V_LSHLREV_B16_gfx10:
3726 case AMDGPU::V_LSHRREV_B16_e32:
3727 case AMDGPU::V_LSHRREV_B16_e64:
3728 case AMDGPU::V_LSHRREV_B16_e32_vi:
3729 case AMDGPU::V_LSHRREV_B16_e64_vi:
3730 case AMDGPU::V_LSHRREV_B16_gfx10:
3732 case AMDGPU::V_ASHRREV_I16_e32:
3733 case AMDGPU::V_ASHRREV_I16_e64:
3734 case AMDGPU::V_ASHRREV_I16_e32_vi:
3735 case AMDGPU::V_ASHRREV_I16_e64_vi:
3736 case AMDGPU::V_ASHRREV_I16_gfx10:
3738 case AMDGPU::V_LSHLREV_B64_e64:
3739 case AMDGPU::V_LSHLREV_B64_gfx10:
3740 case AMDGPU::V_LSHLREV_B64_vi:
3742 case AMDGPU::V_LSHRREV_B64_e64:
3743 case AMDGPU::V_LSHRREV_B64_gfx10:
3744 case AMDGPU::V_LSHRREV_B64_vi:
3746 case AMDGPU::V_ASHRREV_I64_e64:
3747 case AMDGPU::V_ASHRREV_I64_gfx10:
3748 case AMDGPU::V_ASHRREV_I64_vi:
3750 case AMDGPU::V_PK_LSHLREV_B16:
3751 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3752 case AMDGPU::V_PK_LSHLREV_B16_vi:
3754 case AMDGPU::V_PK_LSHRREV_B16:
3755 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3756 case AMDGPU::V_PK_LSHRREV_B16_vi:
3757 case AMDGPU::V_PK_ASHRREV_I16:
3758 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3759 case AMDGPU::V_PK_ASHRREV_I16_vi:
3768 using namespace SIInstrFlags;
3769 const unsigned Opcode = Inst.
getOpcode();
3775 if ((Desc.
TSFlags & Enc) == 0)
3778 for (
auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3786 return StringRef(
"lds_direct is not supported on this GPU");
3789 return StringRef(
"lds_direct cannot be used with this instruction");
3791 if (SrcName != OpName::src0)
3792 return StringRef(
"lds_direct may be used as src0 only");
3801 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
i]);
3802 if (
Op.isFlatOffset())
3803 return Op.getStartLoc();
3808 bool AMDGPUAsmParser::validateFlatOffset(
const MCInst &Inst,
3810 uint64_t TSFlags = MII.get(Inst.
getOpcode()).TSFlags;
3819 if (!hasFlatOffsets() &&
Op.getImm() != 0) {
3821 "flat offset modifier is not supported on this GPU");
3829 if (!
isIntN(OffsetSize,
Op.getImm())) {
3831 Twine(
"expected a ") +
Twine(OffsetSize) +
"-bit signed offset");
3836 if (!
isUIntN(OffsetSize,
Op.getImm())) {
3838 Twine(
"expected a ") +
Twine(OffsetSize) +
"-bit unsigned offset");
3849 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
i]);
3850 if (
Op.isSMEMOffset())
3851 return Op.getStartLoc();
3856 bool AMDGPUAsmParser::validateSMEMOffset(
const MCInst &Inst,
3861 uint64_t TSFlags = MII.get(Inst.
getOpcode()).TSFlags;
3881 (
isVI() || IsBuffer) ?
"expected a 20-bit unsigned offset" :
3882 "expected a 21-bit signed offset");
3887 bool AMDGPUAsmParser::validateSOPLiteral(
const MCInst &Inst)
const {
3896 const int OpIndices[] = { Src0Idx, Src1Idx };
3898 unsigned NumExprs = 0;
3899 unsigned NumLiterals = 0;
3902 for (
int OpIdx : OpIndices) {
3903 if (OpIdx == -1)
break;
3908 if (MO.
isImm() && !isInlineConstant(Inst, OpIdx)) {
3910 if (NumLiterals == 0 || LiteralValue !=
Value) {
3911 LiteralValue =
Value;
3914 }
else if (MO.
isExpr()) {
3920 return NumLiterals + NumExprs <= 1;
3923 bool AMDGPUAsmParser::validateOpSel(
const MCInst &Inst) {
3925 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3926 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3937 bool AMDGPUAsmParser::validateVccOperand(
unsigned Reg)
const {
3938 auto FB = getFeatureBits();
3939 return (FB[AMDGPU::FeatureWavefrontSize64] &&
Reg == AMDGPU::VCC) ||
3940 (FB[AMDGPU::FeatureWavefrontSize32] &&
Reg == AMDGPU::VCC_LO);
3944 bool AMDGPUAsmParser::validateVOP3Literal(
const MCInst &Inst,
3955 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3957 unsigned NumExprs = 0;
3958 unsigned NumLiterals = 0;
3961 for (
int OpIdx : OpIndices) {
3962 if (OpIdx == -1)
break;
3971 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
3973 "inline constants are not allowed for this operand");
3977 if (MO.
isImm() && !isInlineConstant(Inst, OpIdx)) {
3979 if (NumLiterals == 0 || LiteralValue !=
Value) {
3980 LiteralValue =
Value;
3983 }
else if (MO.
isExpr()) {
3987 NumLiterals += NumExprs;
3992 if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
3993 Error(getLitLoc(
Operands),
"literal operands are not supported");
3997 if (NumLiterals > 1) {
3998 Error(getLitLoc(
Operands),
"only one literal operand is allowed");
4016 unsigned Sub =
MRI->getSubReg(
Op.getReg(), AMDGPU::sub0);
4017 auto Reg = Sub ? Sub :
Op.getReg();
4022 bool AMDGPUAsmParser::validateAGPRLdSt(
const MCInst &Inst)
const {
4023 uint64_t TSFlags = MII.get(Inst.
getOpcode()).TSFlags;
4030 : AMDGPU::OpName::vdata;
4038 if (Data2Areg >= 0 && Data2Areg != DataAreg)
4042 auto FB = getFeatureBits();
4043 if (FB[AMDGPU::FeatureGFX90AInsts]) {
4044 if (DataAreg < 0 || DstAreg < 0)
4046 return DstAreg == DataAreg;
4049 return DstAreg < 1 && DataAreg < 1;
4052 bool AMDGPUAsmParser::validateVGPRAlign(
const MCInst &Inst)
const {
4053 auto FB = getFeatureBits();
4054 if (!FB[AMDGPU::FeatureGFX90AInsts])
4065 unsigned Sub =
MRI->getSubReg(
Op.getReg(), AMDGPU::sub0);
4069 if (VGRP32.
contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4071 if (AGRP32.
contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4078 bool AMDGPUAsmParser::validateCoherencyBits(
const MCInst &Inst,
4080 const SMLoc &IDLoc) {
4082 AMDGPU::OpName::cpol);
4088 uint64_t TSFlags = MII.get(Inst.
getOpcode()).TSFlags;
4091 Error(IDLoc,
"invalid cache policy for SMRD instruction");
4099 Error(
S,
"scc is not supported on this GPU");
4108 Error(IDLoc,
"instruction must use glc");
4116 Error(
S,
"instruction must not use glc");
4124 bool AMDGPUAsmParser::validateInstruction(
const MCInst &Inst,
4127 if (
auto ErrMsg = validateLdsDirect(Inst)) {
4131 if (!validateSOPLiteral(Inst)) {
4133 "only one literal operand is allowed");
4136 if (!validateVOP3Literal(Inst,
Operands)) {
4139 if (!validateConstantBusLimitations(Inst,
Operands)) {
4142 if (!validateEarlyClobberLimitations(Inst,
Operands)) {
4145 if (!validateIntClampSupported(Inst)) {
4147 "integer clamping is not supported on this GPU");
4150 if (!validateOpSel(Inst)) {
4152 "invalid op_sel operand");
4156 if (!validateMIMGD16(Inst)) {
4158 "d16 modifier is not supported on this GPU");
4161 if (!validateMIMGDim(Inst)) {
4162 Error(IDLoc,
"dim modifier is required on this GPU");
4165 if (!validateMIMGMSAA(Inst)) {
4167 "invalid dim; must be MSAA type");
4170 if (!validateMIMGDataSize(Inst)) {
4172 "image data size does not match dmask and tfe");
4175 if (!validateMIMGAddrSize(Inst)) {
4177 "image address size does not match dim and a16");
4180 if (!validateMIMGAtomicDMask(Inst)) {
4182 "invalid atomic image dmask");
4185 if (!validateMIMGGatherDMask(Inst)) {
4187 "invalid image_gather dmask: only one bit must be set");
4190 if (!validateMovrels(Inst,
Operands)) {
4193 if (!validateFlatOffset(Inst,
Operands)) {
4196 if (!validateSMEMOffset(Inst,
Operands)) {
4199 if (!validateMAIAccWrite(Inst,
Operands)) {
4202 if (!validateCoherencyBits(Inst,
Operands, IDLoc)) {
4206 if (!validateAGPRLdSt(Inst)) {
4207 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4208 ?
"invalid register class: data and dst should be all VGPR or AGPR"
4209 :
"invalid register class: agpr loads and stores not supported on this GPU"
4213 if (!validateVGPRAlign(Inst)) {
4215 "invalid register class: vgpr tuples must be 64 bit aligned");
4219 if (!validateDivScale(Inst)) {
4220 Error(IDLoc,
"ABS not allowed in VOP3B instructions");
4223 if (!validateCoherencyBits(Inst,
Operands, IDLoc)) {
4232 unsigned VariantID = 0);
4236 unsigned VariantID);
4238 bool AMDGPUAsmParser::isSupportedMnemo(
StringRef Mnemo,
4243 bool AMDGPUAsmParser::isSupportedMnemo(
StringRef Mnemo,
4246 for (
auto Variant : Variants) {
4254 bool AMDGPUAsmParser::checkUnsupportedInstruction(
StringRef Mnemo,
4255 const SMLoc &IDLoc) {
4256 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4259 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4264 getParser().clearPendingErrors();
4268 StringRef VariantName = getMatchedVariantName();
4269 if (!VariantName.
empty() && isSupportedMnemo(Mnemo, FBS)) {
4272 " variant of this instruction is not supported"));
4277 return Error(IDLoc,
"instruction not supported on this GPU");
4282 return Error(IDLoc,
"invalid instruction" + Suggestion);
4285 bool AMDGPUAsmParser::MatchAndEmitInstruction(
SMLoc IDLoc,
unsigned &Opcode,
4289 bool MatchingInlineAsm) {
4291 unsigned Result = Match_Success;
4292 for (
auto Variant : getMatchedVariants()) {
4294 auto R = MatchInstructionImpl(
Operands, Inst, EI, MatchingInlineAsm,
4299 if ((R == Match_Success) ||
4300 (R == Match_PreferE32) ||
4301 (R == Match_MissingFeature && Result != Match_PreferE32) ||
4302 (R == Match_InvalidOperand && Result != Match_MissingFeature
4303 && Result != Match_PreferE32) ||
4304 (R == Match_MnemonicFail && Result != Match_InvalidOperand
4305 && Result != Match_MissingFeature
4306 && Result != Match_PreferE32)) {
4310 if (R == Match_Success)
4314 if (Result == Match_Success) {
4315 if (!validateInstruction(Inst, IDLoc,
Operands)) {
4324 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4330 case Match_MissingFeature:
4334 return Error(IDLoc,
"operands are not valid for this GPU or mode");
4336 case Match_InvalidOperand: {
4337 SMLoc ErrorLoc = IDLoc;
4340 return Error(IDLoc,
"too few operands for instruction");
4343 if (ErrorLoc ==
SMLoc())
4346 return Error(ErrorLoc,
"invalid operand for instruction");
4349 case Match_PreferE32:
4350 return Error(IDLoc,
"internal error: instruction without _e64 suffix "
4351 "should be encoded as e32");
4352 case Match_MnemonicFail:
4358 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(
uint32_t &
Ret) {
4363 if (getParser().parseAbsoluteExpression(Tmp)) {
4370 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(
uint32_t &Major,
4372 if (ParseAsAbsoluteExpression(Major))
4373 return TokError(
"invalid major version");
4376 return TokError(
"minor version number required, comma expected");
4378 if (ParseAsAbsoluteExpression(Minor))
4379 return TokError(
"invalid minor version");
4384 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4386 return TokError(
"directive only supported for amdgcn architecture");
4388 std::string TargetIDDirective;
4389 SMLoc TargetStart = getTok().getLoc();
4390 if (getParser().parseEscapedString(TargetIDDirective))
4394 if (getTargetStreamer().getTargetID()->
toString() != TargetIDDirective)
4395 return getParser().Error(TargetRange.
Start,
4396 (
Twine(
".amdgcn_target directive's target id ") +
4397 Twine(TargetIDDirective) +
4398 Twine(
" does not match the specified target id ") +
4399 Twine(getTargetStreamer().getTargetID()->
toString())).str());
4404 bool AMDGPUAsmParser::OutOfRangeError(
SMRange Range) {
4405 return Error(Range.
Start,
"value out of range", Range);
4408 bool AMDGPUAsmParser::calculateGPRBlocks(
4409 const FeatureBitset &Features,
bool VCCUsed,
bool FlatScrUsed,
4410 bool XNACKUsed,
Optional<bool> EnableWavefrontSize32,
unsigned NextFreeVGPR,
4412 unsigned &VGPRBlocks,
unsigned &SGPRBlocks) {
4423 unsigned MaxAddressableNumSGPRs =
4426 if (
Version.Major >= 8 && !Features.
test(FeatureSGPRInitBug) &&
4428 return OutOfRangeError(SGPRRange);
4433 if ((
Version.Major <= 7 || Features.
test(FeatureSGPRInitBug)) &&
4435 return OutOfRangeError(SGPRRange);
4437 if (Features.
test(FeatureSGPRInitBug))
4448 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4450 return TokError(
"directive only supported for amdgcn architecture");
4453 return TokError(
"directive only supported for amdhsa OS");
4456 if (getParser().parseIdentifier(KernelName))
4466 uint64_t NextFreeVGPR = 0;
4467 uint64_t AccumOffset = 0;
4469 uint64_t NextFreeSGPR = 0;
4470 unsigned UserSGPRCount = 0;
4471 bool ReserveVCC =
true;
4472 bool ReserveFlatScr =
true;
4479 SMRange IDRange = getTok().getLocRange();
4480 if (!parseId(
ID,
"expected .amdhsa_ directive or .end_amdhsa_kernel"))
4483 if (
ID ==
".end_amdhsa_kernel")
4487 return TokError(
".amdhsa_ directives cannot be repeated");
4490 SMLoc ValStart = getLoc();
4492 if (getParser().parseAbsoluteExpression(IVal))
4494 SMLoc ValEnd = getLoc();
4498 return OutOfRangeError(ValRange);
4500 uint64_t Val = IVal;
4502 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
4503 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
4504 return OutOfRangeError(RANGE); \
4505 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4507 if (
ID ==
".amdhsa_group_segment_fixed_size") {
4509 return OutOfRangeError(ValRange);
4511 }
else if (
ID ==
".amdhsa_private_segment_fixed_size") {
4513 return OutOfRangeError(ValRange);
4515 }
else if (
ID ==
".amdhsa_kernarg_size") {
4517 return OutOfRangeError(ValRange);
4519 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_buffer") {
4521 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4525 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_ptr") {
4527 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4531 }
else if (
ID ==
".amdhsa_user_sgpr_queue_ptr") {
4533 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4537 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_segment_ptr") {
4539 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4543 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_id") {
4545 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4549 }
else if (
ID ==
".amdhsa_user_sgpr_flat_scratch_init") {
4551 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4555 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_size") {
4557 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4561 }
else if (
ID ==
".amdhsa_wavefront_size32") {
4562 if (IVersion.
Major < 10)
4563 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
4564 EnableWavefrontSize32 = Val;
4566 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4568 }
else if (
ID ==
".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4571 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val,
4573 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_x") {
4575 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4577 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_y") {
4579 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4581 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_z") {
4583 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4585 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_info") {
4587 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4589 }
else if (
ID ==
".amdhsa_system_vgpr_workitem_id") {
4591 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4593 }
else if (
ID ==
".amdhsa_next_free_vgpr") {
4594 VGPRRange = ValRange;
4596 }
else if (
ID ==
".amdhsa_next_free_sgpr") {
4597 SGPRRange = ValRange;
4599 }
else if (
ID ==
".amdhsa_accum_offset") {
4601 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
4603 }
else if (
ID ==
".amdhsa_reserve_vcc") {
4604 if (!isUInt<1>(Val))
4605 return OutOfRangeError(ValRange);
4607 }
else if (
ID ==
".amdhsa_reserve_flat_scratch") {
4608 if (IVersion.
Major < 7)
4609 return Error(IDRange.
Start,
"directive requires gfx7+", IDRange);
4610 if (!isUInt<1>(Val))
4611 return OutOfRangeError(ValRange);
4612 ReserveFlatScr = Val;
4613 }
else if (
ID ==
".amdhsa_reserve_xnack_mask") {
4614 if (IVersion.
Major < 8)
4615 return Error(IDRange.
Start,
"directive requires gfx8+", IDRange);
4616 if (!isUInt<1>(Val))
4617 return OutOfRangeError(ValRange);
4618 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
4619 return getParser().Error(IDRange.
Start,
".amdhsa_reserve_xnack_mask does not match target id",
4621 }
else if (
ID ==
".amdhsa_float_round_mode_32") {
4623 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4624 }
else if (
ID ==
".amdhsa_float_round_mode_16_64") {
4626 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4627 }
else if (
ID ==
".amdhsa_float_denorm_mode_32") {
4629 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4630 }
else if (
ID ==
".amdhsa_float_denorm_mode_16_64") {
4632 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4634 }
else if (
ID ==
".amdhsa_dx10_clamp") {
4636 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4637 }
else if (
ID ==
".amdhsa_ieee_mode") {
4640 }
else if (
ID ==
".amdhsa_fp16_overflow") {
4641 if (IVersion.
Major < 9)
4642 return Error(IDRange.
Start,
"directive requires gfx9+", IDRange);
4645 }
else if (
ID ==
".amdhsa_tg_split") {
4647 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
4650 }
else if (
ID ==
".amdhsa_workgroup_processor_mode") {
4651 if (IVersion.
Major < 10)
4652 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
4655 }
else if (
ID ==
".amdhsa_memory_ordered") {
4656 if (IVersion.
Major < 10)
4657 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
4660 }
else if (
ID ==
".amdhsa_forward_progress") {
4661 if (IVersion.
Major < 10)
4662 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
4665 }
else if (
ID ==
".amdhsa_exception_fp_ieee_invalid_op") {
4668 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4670 }
else if (
ID ==
".amdhsa_exception_fp_denorm_src") {
4672 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4674 }
else if (
ID ==
".amdhsa_exception_fp_ieee_div_zero") {
4677 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4679 }
else if (
ID ==
".amdhsa_exception_fp_ieee_overflow") {
4681 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4683 }
else if (
ID ==
".amdhsa_exception_fp_ieee_underflow") {
4685 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4687 }
else if (
ID ==
".amdhsa_exception_fp_ieee_inexact") {
4689 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4691 }
else if (
ID ==
".amdhsa_exception_int_div_zero") {
4693 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4696 return Error(IDRange.
Start,
"unknown .amdhsa_kernel directive", IDRange);
4699 #undef PARSE_BITS_ENTRY
4702 if (Seen.
find(
".amdhsa_next_free_vgpr") == Seen.
end())
4703 return TokError(
".amdhsa_next_free_vgpr directive is required");
4705 if (Seen.
find(
".amdhsa_next_free_sgpr") == Seen.
end())
4706 return TokError(
".amdhsa_next_free_sgpr directive is required");
4708 unsigned VGPRBlocks;
4709 unsigned SGPRBlocks;
4710 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4711 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
4712 EnableWavefrontSize32, NextFreeVGPR,
4713 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4717 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4719 return OutOfRangeError(VGPRRange);
4721 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4723 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4725 return OutOfRangeError(SGPRRange);
4727 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4730 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4731 return TokError(
"too many user SGPRs enabled");
4736 if (Seen.
find(
".amdhsa_accum_offset") == Seen.
end())
4737 return TokError(
".amdhsa_accum_offset directive is required");
4738 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
4739 return TokError(
"accum_offset should be in range [4..256] in "
4742 return TokError(
"accum_offset exceeds total VGPR allocation");
4744 (AccumOffset / 4 - 1));
4747 getTargetStreamer().EmitAmdhsaKernelDescriptor(
4748 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4753 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4757 if (ParseDirectiveMajorMinor(Major, Minor))
4760 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4764 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4775 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.
Major, ISA.
Minor,
4781 if (ParseDirectiveMajorMinor(Major, Minor))
4785 return TokError(
"stepping version number required, comma expected");
4787 if (ParseAsAbsoluteExpression(Stepping))
4788 return TokError(
"invalid stepping version");
4791 return TokError(
"vendor name required, comma expected");
4793 if (!parseString(VendorName,
"invalid vendor name"))
4797 return TokError(
"arch name required, comma expected");
4799 if (!parseString(ArchName,
"invalid arch name"))
4802 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
4803 VendorName, ArchName);
4807 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(
StringRef ID,
4811 if (
ID ==
"max_scratch_backing_memory_byte_size") {
4812 Parser.eatToEndOfStatement();
4819 return TokError(Err.str());
4823 if (
ID ==
"enable_wavefront_size32") {
4826 return TokError(
"enable_wavefront_size32=1 is only allowed on GFX10+");
4827 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4828 return TokError(
"enable_wavefront_size32=1 requires +WavefrontSize32");
4830 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4831 return TokError(
"enable_wavefront_size32=0 requires +WavefrontSize64");
4835 if (
ID ==
"wavefront_size") {
4838 return TokError(
"wavefront_size=5 is only allowed on GFX10+");
4839 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4840 return TokError(
"wavefront_size=5 requires +WavefrontSize32");
4842 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4843 return TokError(
"wavefront_size=6 requires +WavefrontSize64");
4847 if (
ID ==
"enable_wgp_mode") {
4850 return TokError(
"enable_wgp_mode=1 is only allowed on GFX10+");
4853 if (
ID ==
"enable_mem_ordered") {
4856 return TokError(
"enable_mem_ordered=1 is only allowed on GFX10+");
4859 if (
ID ==
"enable_fwd_progress") {
4862 return TokError(
"enable_fwd_progress=1 is only allowed on GFX10+");
4868 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4878 if (!parseId(
ID,
"expected value identifier or .end_amd_kernel_code_t"))
4881 if (
ID ==
".end_amd_kernel_code_t")
4884 if (ParseAMDKernelCodeTValue(
ID, Header))
4888 getTargetStreamer().EmitAMDKernelCodeT(Header);
4893 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4895 if (!parseId(KernelName,
"expected symbol name"))
4898 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4901 KernelScope.initialize(getContext());
4905 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4907 return Error(getLoc(),
4908 ".amd_amdgpu_isa directive is not available on non-amdgcn "
4912 auto TargetIDDirective = getLexer().getTok().getStringContents();
4913 if (getTargetStreamer().getTargetID()->
toString() != TargetIDDirective)
4914 return Error(getParser().getTok().getLoc(),
"target id must match options");
4916 getTargetStreamer().EmitISAVersion();
4922 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4933 return Error(getLoc(),
4935 "not available on non-amdhsa OSes")).str());
4938 std::string HSAMetadataString;
4944 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4945 return Error(getLoc(),
"invalid HSA metadata");
4947 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4948 return Error(getLoc(),
"invalid HSA metadata");
4958 std::string &CollectString) {
4962 getLexer().setSkipSpace(
false);
4964 bool FoundEnd =
false;
4967 CollectStream << getTokenStr();
4976 CollectStream << Parser.parseStringToEndOfStatement()
4977 << getContext().getAsmInfo()->getSeparatorString();
4979 Parser.eatToEndOfStatement();
4982 getLexer().setSkipSpace(
true);
4985 return TokError(
Twine(
"expected directive ") +
4989 CollectStream.flush();
4994 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5000 auto PALMetadata = getTargetStreamer().getPALMetadata();
5001 if (!PALMetadata->setFromString(String))
5002 return Error(getLoc(),
"invalid PAL metadata");
5007 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5009 return Error(getLoc(),
5011 "not available on non-amdpal OSes")).str());
5014 auto PALMetadata = getTargetStreamer().getPALMetadata();
5015 PALMetadata->setLegacy();
5018 if (ParseAsAbsoluteExpression(
Key)) {
5019 return TokError(
Twine(
"invalid value in ") +
5023 return TokError(
Twine(
"expected an even number of values in ") +
5026 if (ParseAsAbsoluteExpression(
Value)) {
5027 return TokError(
Twine(
"invalid value in ") +
5030 PALMetadata->setRegister(
Key,
Value);
5039 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5040 if (getParser().checkForValidSection())
5044 SMLoc NameLoc = getLoc();
5045 if (getParser().parseIdentifier(
Name))
5046 return TokError(
"expected identifier in directive");
5055 SMLoc SizeLoc = getLoc();
5056 if (getParser().parseAbsoluteExpression(
Size))
5059 return Error(SizeLoc,
"size must be non-negative");
5060 if (
Size > LocalMemorySize)
5061 return Error(SizeLoc,
"size is too large");
5063 int64_t Alignment = 4;
5065 SMLoc AlignLoc = getLoc();
5066 if (getParser().parseAbsoluteExpression(Alignment))
5069 return Error(AlignLoc,
"alignment must be a power of two");
5074 if (Alignment >= 1u << 31)
5075 return Error(AlignLoc,
"alignment is too large");
5079 "unexpected token in '.amdgpu_lds' directive"))
5082 Symbol->redefineIfPossible();
5083 if (!
Symbol->isUndefined())
5084 return Error(NameLoc,
"invalid symbol redefinition");
5090 bool AMDGPUAsmParser::ParseDirective(
AsmToken DirectiveID) {
5094 if (IDVal ==
".amdhsa_kernel")
5095 return ParseDirectiveAMDHSAKernel();
5099 return ParseDirectiveHSAMetadata();
5101 if (IDVal ==
".hsa_code_object_version")
5102 return ParseDirectiveHSACodeObjectVersion();
5104 if (IDVal ==
".hsa_code_object_isa")
5105 return ParseDirectiveHSACodeObjectISA();
5107 if (IDVal ==
".amd_kernel_code_t")
5108 return ParseDirectiveAMDKernelCodeT();
5110 if (IDVal ==
".amdgpu_hsa_kernel")
5111 return ParseDirectiveAMDGPUHsaKernel();
5113 if (IDVal ==
".amd_amdgpu_isa")
5114 return ParseDirectiveISAVersion();
5117 return ParseDirectiveHSAMetadata();
5120 if (IDVal ==
".amdgcn_target")
5121 return ParseDirectiveAMDGCNTarget();
5123 if (IDVal ==
".amdgpu_lds")
5124 return ParseDirectiveAMDGPULDS();
5127 return ParseDirectivePALMetadataBegin();
5130 return ParseDirectivePALMetadata();
5148 return hasSGPR104_SGPR105();
5152 case AMDGPU::SRC_SHARED_BASE:
5153 case AMDGPU::SRC_SHARED_LIMIT:
5154 case AMDGPU::SRC_PRIVATE_BASE:
5155 case AMDGPU::SRC_PRIVATE_LIMIT:
5156 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5159 case AMDGPU::TBA_LO:
5160 case AMDGPU::TBA_HI:
5162 case AMDGPU::TMA_LO:
5163 case AMDGPU::TMA_HI:
5165 case AMDGPU::XNACK_MASK:
5166 case AMDGPU::XNACK_MASK_LO:
5167 case AMDGPU::XNACK_MASK_HI:
5168 return (
isVI() ||
isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5169 case AMDGPU::SGPR_NULL:
5183 case AMDGPU::FLAT_SCR:
5184 case AMDGPU::FLAT_SCR_LO:
5185 case AMDGPU::FLAT_SCR_HI:
5197 return hasSGPR102_SGPR103();
5220 SMLoc LBraceLoc = getLoc();
5225 auto Loc = getLoc();
5228 Error(Loc,
"expected a register");
5232 RBraceLoc = getLoc();
5237 "expected a comma or a closing square bracket")) {
5244 AMDGPUOperand::CreateToken(
this,
"[", LBraceLoc));
5245 Operands.push_back(AMDGPUOperand::CreateToken(
this,
"]", RBraceLoc));
5256 setForcedEncodingSize(0);
5257 setForcedDPP(
false);
5258 setForcedSDWA(
false);
5260 if (
Name.endswith(
"_e64")) {
5261 setForcedEncodingSize(64);
5262 return Name.substr(0,
Name.size() - 4);
5263 }
else if (
Name.endswith(
"_e32")) {
5264 setForcedEncodingSize(32);
5265 return Name.substr(0,
Name.size() - 4);
5266 }
else if (
Name.endswith(
"_dpp")) {
5268 return Name.substr(0,
Name.size() - 4);
5269 }
else if (
Name.endswith(
"_sdwa")) {
5270 setForcedSDWA(
true);
5271 return Name.substr(0,
Name.size() - 5);
5281 Operands.push_back(AMDGPUOperand::CreateToken(
this,
Name, NameLoc));
5283 bool IsMIMG =
Name.startswith(
"image_");
5286 OperandMode
Mode = OperandMode_Default;
5288 Mode = OperandMode_NSA;
5293 checkUnsupportedInstruction(
Name, NameLoc);
5294 if (!Parser.hasPendingError()) {
5298 "not a valid operand.";
5299 Error(getLoc(), Msg);
5319 AMDGPUAsmParser::parseIntWithPrefix(
const char *
Prefix, int64_t &
IntVal) {
5329 AMDGPUOperand::ImmTy ImmTy,
5330 bool (*ConvertResult)(int64_t&)) {
5338 if (ConvertResult && !ConvertResult(
Value)) {
5342 Operands.push_back(AMDGPUOperand::CreateImm(
this,
Value,
S, ImmTy));
5347 AMDGPUAsmParser::parseOperandArrayWithPrefix(
const char *
Prefix,
5349 AMDGPUOperand::ImmTy ImmTy,
5350 bool (*ConvertResult)(int64_t&)) {
5359 const unsigned MaxSize = 4;
5363 for (
int I = 0; ; ++
I) {
5365 SMLoc Loc = getLoc();
5369 if (
Op != 0 &&
Op != 1) {
5379 if (
I + 1 == MaxSize) {
5380 Error(getLoc(),
"expected a closing square bracket");
5388 Operands.push_back(AMDGPUOperand::CreateImm(
this, Val,
S, ImmTy));
5394 AMDGPUOperand::ImmTy ImmTy) {
5398 if (trySkipId(
Name)) {
5400 }
else if (trySkipId(
"no",
Name)) {
5407 Error(
S,
"r128 modifier is not supported on this GPU");
5411 Error(
S,
"a16 modifier is not supported on this GPU");
5415 if (
isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5416 ImmTy = AMDGPUOperand::ImmTyR128A16;
5418 Operands.push_back(AMDGPUOperand::CreateImm(
this,
Bit,
S, ImmTy));
5424 unsigned CPolOn = 0;
5425 unsigned CPolOff = 0;
5428 if (trySkipId(
"glc"))
5430 else if (trySkipId(
"noglc"))
5432 else if (trySkipId(
"slc"))
5434 else if (trySkipId(
"noslc"))
5436 else if (trySkipId(
"dlc"))
5438 else if (trySkipId(
"nodlc"))
5440 else if (trySkipId(
"scc"))
5442 else if (trySkipId(
"noscc"))
5448 Error(
S,
"dlc modifier is not supported on this GPU");
5453 Error(
S,
"scc modifier is not supported on this GPU");
5457 if (CPolSeen & (CPolOn | CPolOff)) {
5458 Error(
S,
"duplicate cache policy modifier");
5462 CPolSeen |= (CPolOn | CPolOff);
5465 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
I]);
5467 Op.setImm((
Op.getImm() | CPolOn) & ~CPolOff);
5472 Operands.push_back(AMDGPUOperand::CreateImm(
this, CPolOn,
S,
5473 AMDGPUOperand::ImmTyCPol));
5480 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5481 AMDGPUOperand::ImmTy ImmT,
5482 int64_t Default = 0) {
5483 auto i = OptionalIdx.find(ImmT);
5484 if (
i != OptionalIdx.end()) {
5485 unsigned Idx =
i->second;
5486 ((AMDGPUOperand &)*
Operands[Idx]).addImmOperands(Inst, 1);
5499 StringLoc = getLoc();
5508 bool AMDGPUAsmParser::tryParseFmt(
const char *Pref,
5512 SMLoc Loc = getLoc();
5514 auto Res = parseIntWithPrefix(Pref, Val);
5520 if (Val < 0 || Val > MaxVal) {
5532 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5539 for (
int I = 0;
I < 2; ++
I) {
5565 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5570 if (!tryParseFmt(
"format",
UFMT_MAX, Fmt))
5580 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5599 Error(Loc,
"unsupported format");
5604 AMDGPUAsmParser::parseSymbolicSplitFormat(
StringRef FormatStr,
5611 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5616 SMLoc Loc = getLoc();
5617 if (!parseId(Str,
"expected a format string") ||
5618 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5622 Error(Loc,
"duplicate numeric format");
5625 Error(Loc,
"duplicate data format");
5636 Error(FormatLoc,
"unsupported format");
5648 AMDGPUAsmParser::parseSymbolicUnifiedFormat(
StringRef FormatStr,
5658 Error(Loc,
"unified format is not supported on this GPU");
5667 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5669 SMLoc Loc = getLoc();
5671 if (!parseExpr(Format))
5674 Error(Loc,
"out of range format");
5682 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5690 SMLoc Loc = getLoc();
5691 if (!parseId(FormatStr,
"expected a format string"))
5694 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5696 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5706 return parseNumericFormat(Format);
5715 SMLoc Loc = getLoc();
5725 AMDGPUOperand::CreateImm(
this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5744 Res = parseSymbolicOrNumericFormat(Format);
5749 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*
Operands[
Size - 2]);
5750 assert(
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5757 Error(getLoc(),
"duplicate format");
5767 void AMDGPUAsmParser::cvtDSOffset01(
MCInst &Inst,
5769 OptionalImmIndexMap OptionalIdx;
5772 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
i]);
5776 Op.addRegOperands(Inst, 1);
5781 OptionalIdx[
Op.getImmTy()] =
i;
5792 bool IsGdsHardcoded) {
5793 OptionalImmIndexMap OptionalIdx;
5796 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
i]);
5800 Op.addRegOperands(Inst, 1);
5804 if (
Op.isToken() &&
Op.getToken() ==
"gds") {
5805 IsGdsHardcoded =
true;
5810 OptionalIdx[
Op.getImmTy()] =
i;
5813 AMDGPUOperand::ImmTy OffsetType =
5814 (Inst.
getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5815 Inst.
getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5816 Inst.
getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5817 AMDGPUOperand::ImmTyOffset;
5821 if (!IsGdsHardcoded) {
5828 OptionalImmIndexMap OptionalIdx;
5830 unsigned OperandIdx[4];
5831 unsigned EnMask = 0;
5835 AMDGPUOperand &
Op = ((AMDGPUOperand &)*
Operands[
i]);
5840 OperandIdx[SrcIdx] = Inst.
size();
5841 Op.addRegOperands(Inst, 1);
5848 OperandIdx[SrcIdx] = Inst.
size();
5854 if (
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
5855 Op.addImmOperands(Inst, 1);
5859 if (
Op.isToken() &&
Op.getToken() ==
"done")
5863 OptionalIdx[
Op.getImmTy()] =
i;
5869 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {