36#define DEBUG_TYPE "si-memory-legalizer"
37#define PASS_NAME "SI Memory Legalizer"
41 cl::desc(
"Use this to skip inserting cache invalidating instructions."));
63enum class SIAtomicScope {
75enum class SIAtomicAddrSpace {
84 FLAT = GLOBAL |
LDS | SCRATCH,
87 ATOMIC = GLOBAL |
LDS | SCRATCH | GDS,
90 ALL = GLOBAL |
LDS | SCRATCH | GDS | OTHER,
98 case SIAtomicScope::NONE:
100 case SIAtomicScope::SINGLETHREAD:
101 return "singlethread";
102 case SIAtomicScope::WAVEFRONT:
104 case SIAtomicScope::WORKGROUP:
106 case SIAtomicScope::CLUSTER:
108 case SIAtomicScope::AGENT:
110 case SIAtomicScope::SYSTEM:
117 if (AS == SIAtomicAddrSpace::NONE) {
122 if ((AS & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE)
123 OS <<
LS <<
"global";
124 if ((AS & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE)
126 if ((AS & SIAtomicAddrSpace::SCRATCH) != SIAtomicAddrSpace::NONE)
127 OS <<
LS <<
"scratch";
128 if ((AS & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE)
130 if ((AS & SIAtomicAddrSpace::OTHER) != SIAtomicAddrSpace::NONE)
136class SIMemOpInfo final {
139 friend class SIMemOpAccess;
143 SIAtomicScope Scope = SIAtomicScope::SYSTEM;
144 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
145 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
146 bool IsCrossAddressSpaceOrdering =
false;
147 bool IsVolatile =
false;
148 bool IsNonTemporal =
false;
149 bool IsLastUse =
false;
150 bool IsCooperative =
false;
154 const GCNSubtarget &ST,
156 SIAtomicScope Scope = SIAtomicScope::SYSTEM,
157 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::ATOMIC,
158 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::ALL,
159 bool IsCrossAddressSpaceOrdering =
true,
160 AtomicOrdering FailureOrdering = AtomicOrdering::SequentiallyConsistent,
161 bool IsVolatile =
false,
bool IsNonTemporal =
false,
162 bool IsLastUse =
false,
bool IsCooperative =
false,
163 bool CanDemoteWorkgroupToWavefront =
false)
164 : Ordering(Ordering), FailureOrdering(FailureOrdering), Scope(Scope),
165 OrderingAddrSpace(OrderingAddrSpace), InstrAddrSpace(InstrAddrSpace),
166 IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
167 IsVolatile(IsVolatile), IsNonTemporal(IsNonTemporal),
168 IsLastUse(IsLastUse), IsCooperative(IsCooperative) {
170 if (Ordering == AtomicOrdering::NotAtomic) {
171 assert(!IsCooperative &&
"Cannot be cooperative & non-atomic!");
172 assert(Scope == SIAtomicScope::NONE &&
173 OrderingAddrSpace == SIAtomicAddrSpace::NONE &&
174 !IsCrossAddressSpaceOrdering &&
175 FailureOrdering == AtomicOrdering::NotAtomic);
179 assert(Scope != SIAtomicScope::NONE &&
180 (OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
181 SIAtomicAddrSpace::NONE &&
182 (InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
183 SIAtomicAddrSpace::NONE);
188 if ((OrderingAddrSpace == InstrAddrSpace) &&
190 this->IsCrossAddressSpaceOrdering =
false;
194 if ((InstrAddrSpace & ~SIAtomicAddrSpace::SCRATCH) ==
195 SIAtomicAddrSpace::NONE) {
196 this->Scope = std::min(Scope, SIAtomicScope::SINGLETHREAD);
197 }
else if ((InstrAddrSpace &
198 ~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS)) ==
199 SIAtomicAddrSpace::NONE) {
200 this->Scope = std::min(Scope, SIAtomicScope::WORKGROUP);
201 }
else if ((InstrAddrSpace &
202 ~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS |
203 SIAtomicAddrSpace::GDS)) == SIAtomicAddrSpace::NONE) {
204 this->Scope = std::min(Scope, SIAtomicScope::AGENT);
209 if (this->Scope == SIAtomicScope::CLUSTER && !
ST.hasClusters())
210 this->Scope = SIAtomicScope::AGENT;
216 if (CanDemoteWorkgroupToWavefront &&
217 this->Scope == SIAtomicScope::WORKGROUP &&
220 AtomicOrdering::Monotonic)))
221 this->Scope = SIAtomicScope::WAVEFRONT;
227 SIAtomicScope getScope()
const {
240 return FailureOrdering;
245 SIAtomicAddrSpace getInstrAddrSpace()
const {
246 return InstrAddrSpace;
251 SIAtomicAddrSpace getOrderingAddrSpace()
const {
252 return OrderingAddrSpace;
257 bool getIsCrossAddressSpaceOrdering()
const {
258 return IsCrossAddressSpaceOrdering;
263 bool isVolatile()
const {
269 bool isNonTemporal()
const {
270 return IsNonTemporal;
275 bool isLastUse()
const {
return IsLastUse; }
278 bool isCooperative()
const {
return IsCooperative; }
282 bool isAtomic()
const {
283 return Ordering != AtomicOrdering::NotAtomic;
288class SIMemOpAccess final {
290 const AMDGPUMachineModuleInfo *MMI =
nullptr;
291 const GCNSubtarget &ST;
292 const bool CanDemoteWorkgroupToWavefront;
296 const char *Msg)
const;
302 std::optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
303 toSIAtomicScope(
SyncScope::ID SSID, SIAtomicAddrSpace InstrAddrSpace)
const;
306 SIAtomicAddrSpace toSIAtomicAddrSpace(
unsigned AS)
const;
310 std::optional<SIMemOpInfo>
316 SIMemOpAccess(
const AMDGPUMachineModuleInfo &MMI,
const GCNSubtarget &ST,
320 std::optional<SIMemOpInfo>
325 std::optional<SIMemOpInfo>
330 std::optional<SIMemOpInfo>
335 std::optional<SIMemOpInfo>
341 std::optional<SIMemOpInfo>
345class SICacheControl {
349 const GCNSubtarget &ST;
352 const SIInstrInfo *TII =
nullptr;
359 SICacheControl(
const GCNSubtarget &ST);
364 unsigned Bits)
const;
368 bool canAffectGlobalAddrSpace(SIAtomicAddrSpace AS)
const;
374 static std::unique_ptr<SICacheControl> create(
const GCNSubtarget &ST);
381 SIAtomicAddrSpace AddrSpace)
const = 0;
388 SIAtomicAddrSpace AddrSpace)
const = 0;
395 SIAtomicAddrSpace AddrSpace)
const = 0;
401 SIAtomicAddrSpace AddrSpace,
402 SIMemOp
Op,
bool IsVolatile,
404 bool IsLastUse =
false)
const = 0;
411 virtual bool finalizeStore(MachineInstr &
MI,
bool Atomic)
const {
416 virtual bool handleCooperativeAtomic(MachineInstr &
MI)
const {
418 "cooperative atomics are not available on this architecture");
431 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
432 bool IsCrossAddrSpaceOrdering, Position Pos,
442 SIAtomicAddrSpace AddrSpace,
443 Position Pos)
const = 0;
451 SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace,
452 Position Pos)
const = 0;
457 SIAtomicAddrSpace AddrSpace,
bool IsCrossAddrSpaceOrdering,
458 Position Pos)
const {
459 bool Changed = insertWriteback(
MI, Scope, AddrSpace, Pos);
460 Changed |= insertWait(
MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
461 IsCrossAddrSpaceOrdering, Pos,
462 AtomicOrdering::Release,
false);
468 virtual bool handleNonVolatile(MachineInstr &
MI)
const {
return false; }
471 virtual ~SICacheControl() =
default;
476class SIGfx6CacheControl final :
public SICacheControl {
479 SIGfx6CacheControl(
const GCNSubtarget &ST) : SICacheControl(
ST) {}
483 SIAtomicAddrSpace AddrSpace)
const override;
487 SIAtomicAddrSpace AddrSpace)
const override;
491 SIAtomicAddrSpace AddrSpace)
const override;
494 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
495 bool IsVolatile,
bool IsNonTemporal,
496 bool IsLastUse)
const override;
499 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
500 bool IsCrossAddrSpaceOrdering, Position Pos,
505 SIAtomicAddrSpace AddrSpace,
506 Position Pos)
const override;
509 SIAtomicAddrSpace AddrSpace,
510 Position Pos)
const override;
514class SIGfx10CacheControl final :
public SICacheControl {
516 SIGfx10CacheControl(
const GCNSubtarget &ST) : SICacheControl(
ST) {}
520 SIAtomicAddrSpace AddrSpace)
const override;
524 SIAtomicAddrSpace AddrSpace)
const override {
530 SIAtomicAddrSpace AddrSpace)
const override {
535 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
536 bool IsVolatile,
bool IsNonTemporal,
537 bool IsLastUse)
const override;
540 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
541 bool IsCrossAddrSpaceOrdering, Position Pos,
545 SIAtomicAddrSpace AddrSpace, Position Pos)
const override;
548 SIAtomicAddrSpace AddrSpace,
549 Position Pos)
const override {
554class SIGfx12CacheControl final :
public SICacheControl {
576 SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace)
const;
579 SIGfx12CacheControl(
const GCNSubtarget &ST) : SICacheControl(
ST) {
582 assert(!
ST.hasGFX1250Insts() ||
ST.hasGFX13Insts() ||
ST.isCuModeEnabled());
586 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
587 bool IsCrossAddrSpaceOrdering, Position Pos,
591 SIAtomicAddrSpace AddrSpace, Position Pos)
const override;
594 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
595 bool IsVolatile,
bool IsNonTemporal,
596 bool IsLastUse)
const override;
598 bool finalizeStore(MachineInstr &
MI,
bool Atomic)
const override;
600 bool handleCooperativeAtomic(MachineInstr &
MI)
const override;
603 SIAtomicAddrSpace AddrSpace,
604 Position Pos)
const override;
608 SIAtomicAddrSpace AddrSpace)
const override {
609 return setAtomicScope(
MI, Scope, AddrSpace);
614 SIAtomicAddrSpace AddrSpace)
const override {
615 return setAtomicScope(
MI, Scope, AddrSpace);
620 SIAtomicAddrSpace AddrSpace)
const override {
621 return setAtomicScope(
MI, Scope, AddrSpace);
624 bool handleNonVolatile(MachineInstr &
MI)
const override;
627class SIMemoryLegalizer final {
629 const MachineModuleInfo &MMI;
631 std::unique_ptr<SICacheControl> CC =
nullptr;
634 std::list<MachineBasicBlock::iterator> AtomicPseudoMIs;
638 bool isAtomicRet(
const MachineInstr &
MI)
const {
644 bool removeAtomicPseudoMIs();
648 bool expandLoad(
const SIMemOpInfo &MOI,
652 bool expandStore(
const SIMemOpInfo &MOI,
656 bool expandAtomicFence(
const SIMemOpInfo &MOI,
660 bool expandAtomicCmpxchgOrRmw(
const SIMemOpInfo &MOI,
667 SIMemoryLegalizer(
const MachineModuleInfo &MMI) : MMI(MMI) {};
668 bool run(MachineFunction &MF);
675 SIMemoryLegalizerLegacy() : MachineFunctionPass(ID) {}
677 void getAnalysisUsage(AnalysisUsage &AU)
const override {
682 StringRef getPassName()
const override {
686 bool runOnMachineFunction(MachineFunction &MF)
override;
690 {
"global", SIAtomicAddrSpace::GLOBAL},
691 {
"local", SIAtomicAddrSpace::LDS},
699 OS <<
"unknown address space '" << AS <<
"'; expected one of ";
701 for (
const auto &[Name, Val] : ASNames)
702 OS <<
LS <<
'\'' <<
Name <<
'\'';
710static std::optional<SIAtomicAddrSpace>
712 static constexpr StringLiteral FenceASPrefix =
"amdgpu-synchronize-as";
718 SIAtomicAddrSpace
Result = SIAtomicAddrSpace::NONE;
719 for (
const auto &[Prefix, Suffix] : MMRA) {
720 if (Prefix != FenceASPrefix)
723 if (
auto It = ASNames.find(Suffix); It != ASNames.end())
726 diagnoseUnknownMMRAASName(
MI, Suffix);
729 if (Result == SIAtomicAddrSpace::NONE)
738 const char *Msg)
const {
740 Func.getContext().diagnose(
741 DiagnosticInfoUnsupported(Func, Msg,
MI->getDebugLoc()));
744std::optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
746 SIAtomicAddrSpace InstrAddrSpace)
const {
748 return std::tuple(SIAtomicScope::SYSTEM, SIAtomicAddrSpace::ATOMIC,
true);
750 return std::tuple(SIAtomicScope::AGENT, SIAtomicAddrSpace::ATOMIC,
true);
752 return std::tuple(SIAtomicScope::CLUSTER, SIAtomicAddrSpace::ATOMIC,
true);
754 return std::tuple(SIAtomicScope::WORKGROUP, SIAtomicAddrSpace::ATOMIC,
757 return std::tuple(SIAtomicScope::WAVEFRONT, SIAtomicAddrSpace::ATOMIC,
760 return std::tuple(SIAtomicScope::SINGLETHREAD, SIAtomicAddrSpace::ATOMIC,
763 return std::tuple(SIAtomicScope::SYSTEM,
764 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
766 return std::tuple(SIAtomicScope::AGENT,
767 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
769 return std::tuple(SIAtomicScope::CLUSTER,
770 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
772 return std::tuple(SIAtomicScope::WORKGROUP,
773 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
775 return std::tuple(SIAtomicScope::WAVEFRONT,
776 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
778 return std::tuple(SIAtomicScope::SINGLETHREAD,
779 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
783SIAtomicAddrSpace SIMemOpAccess::toSIAtomicAddrSpace(
unsigned AS)
const {
785 return SIAtomicAddrSpace::FLAT;
787 return SIAtomicAddrSpace::GLOBAL;
789 return SIAtomicAddrSpace::LDS;
791 return SIAtomicAddrSpace::SCRATCH;
793 return SIAtomicAddrSpace::GDS;
796 return SIAtomicAddrSpace::GLOBAL;
798 return SIAtomicAddrSpace::OTHER;
804SIMemOpAccess::SIMemOpAccess(
const AMDGPUMachineModuleInfo &MMI_,
805 const GCNSubtarget &ST,
const Function &
F)
806 : MMI(&MMI_),
ST(
ST),
807 CanDemoteWorkgroupToWavefront(
ST.isSingleWavefrontWorkgroup(
F)) {}
809std::optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
811 assert(
MI->getNumMemOperands() > 0);
816 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
817 bool IsNonTemporal =
true;
819 bool IsLastUse =
false;
820 bool IsCooperative =
false;
824 for (
const auto &MMO :
MI->memoperands()) {
825 IsNonTemporal &= MMO->isNonTemporal();
827 IsLastUse |= MMO->getFlags() &
MOLastUse;
830 toSIAtomicAddrSpace(MMO->getPointerInfo().getAddrSpace());
832 if (OpOrdering != AtomicOrdering::NotAtomic) {
833 const auto &IsSyncScopeInclusion =
835 if (!IsSyncScopeInclusion) {
836 reportUnsupported(
MI,
837 "Unsupported non-inclusive atomic synchronization scope");
841 SSID = *IsSyncScopeInclusion ? SSID : MMO->getSyncScopeID();
843 assert(MMO->getFailureOrdering() != AtomicOrdering::Release &&
844 MMO->getFailureOrdering() != AtomicOrdering::AcquireRelease);
855 Ordering = AtomicOrdering::Monotonic;
857 SIAtomicScope
Scope = SIAtomicScope::NONE;
858 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
859 bool IsCrossAddressSpaceOrdering =
false;
860 if (Ordering != AtomicOrdering::NotAtomic) {
861 auto ScopeOrNone = toSIAtomicScope(SSID, InstrAddrSpace);
863 reportUnsupported(
MI,
"Unsupported atomic synchronization scope");
866 std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
868 if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
869 ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace) ||
870 ((InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) == SIAtomicAddrSpace::NONE)) {
871 reportUnsupported(
MI,
"Unsupported atomic address space");
875 return SIMemOpInfo(ST, Ordering, Scope, OrderingAddrSpace, InstrAddrSpace,
876 IsCrossAddressSpaceOrdering, FailureOrdering, IsVolatile,
877 IsNonTemporal, IsLastUse, IsCooperative,
878 CanDemoteWorkgroupToWavefront);
881std::optional<SIMemOpInfo>
885 if (!(
MI->mayLoad() && !
MI->mayStore()))
889 if (
MI->getNumMemOperands() == 0)
890 return SIMemOpInfo(ST);
892 return constructFromMIWithMMO(
MI);
895std::optional<SIMemOpInfo>
899 if (!(!
MI->mayLoad() &&
MI->mayStore()))
903 if (
MI->getNumMemOperands() == 0)
904 return SIMemOpInfo(ST);
906 return constructFromMIWithMMO(
MI);
909std::optional<SIMemOpInfo>
913 if (
MI->getOpcode() != AMDGPU::ATOMIC_FENCE)
920 auto ScopeOrNone = toSIAtomicScope(SSID, SIAtomicAddrSpace::ATOMIC);
922 reportUnsupported(
MI,
"Unsupported atomic synchronization scope");
926 SIAtomicScope
Scope = SIAtomicScope::NONE;
927 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
928 bool IsCrossAddressSpaceOrdering =
false;
929 std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
932 if (OrderingAddrSpace != SIAtomicAddrSpace::ATOMIC) {
937 reportUnsupported(
MI,
"Unsupported atomic address space");
941 auto SynchronizeAS = getSynchronizeAddrSpaceMD(*
MI);
943 OrderingAddrSpace = *SynchronizeAS;
945 return SIMemOpInfo(ST, Ordering, Scope, OrderingAddrSpace,
946 SIAtomicAddrSpace::ATOMIC, IsCrossAddressSpaceOrdering,
947 AtomicOrdering::NotAtomic,
false,
false,
false,
false,
948 CanDemoteWorkgroupToWavefront);
951std::optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
955 if (!(
MI->mayLoad() &&
MI->mayStore()))
959 if (
MI->getNumMemOperands() == 0)
960 return SIMemOpInfo(ST);
962 return constructFromMIWithMMO(
MI);
965std::optional<SIMemOpInfo>
972 return constructFromMIWithMMO(
MI);
980 if (
MI.getNumMemOperands() == 0)
983 return MMO->getFlags() & (MOThreadPrivate | MachineMemOperand::MOInvariant);
987SICacheControl::SICacheControl(
const GCNSubtarget &ST) :
ST(
ST) {
988 TII =
ST.getInstrInfo();
994 unsigned Bits)
const {
995 MachineOperand *CPol =
TII->getNamedOperand(*
MI, AMDGPU::OpName::cpol);
999 CPol->setImm(
CPol->getImm() | Bits);
1003bool SICacheControl::canAffectGlobalAddrSpace(SIAtomicAddrSpace AS)
const {
1004 assert((!
ST.hasGloballyAddressableScratch() ||
1005 (AS & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ||
1006 (AS & SIAtomicAddrSpace::SCRATCH) == SIAtomicAddrSpace::NONE) &&
1007 "scratch instructions should already be replaced by flat "
1008 "instructions if GloballyAddressableScratch is enabled");
1009 return (AS & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE;
1013std::unique_ptr<SICacheControl> SICacheControl::create(
const GCNSubtarget &ST) {
1014 GCNSubtarget::Generation Generation =
ST.getGeneration();
1015 if (Generation < AMDGPUSubtarget::GFX10)
1016 return std::make_unique<SIGfx6CacheControl>(ST);
1017 if (Generation < AMDGPUSubtarget::GFX12)
1018 return std::make_unique<SIGfx10CacheControl>(ST);
1019 return std::make_unique<SIGfx12CacheControl>(ST);
1022bool SIGfx6CacheControl::enableLoadCacheBypass(
1024 SIAtomicScope Scope,
1025 SIAtomicAddrSpace AddrSpace)
const {
1028 if (!canAffectGlobalAddrSpace(AddrSpace)) {
1040 case SIAtomicScope::SYSTEM:
1041 if (
ST.hasGFX940Insts()) {
1047 case SIAtomicScope::AGENT:
1048 if (
ST.hasGFX940Insts()) {
1057 case SIAtomicScope::WORKGROUP:
1058 if (
ST.hasGFX940Insts()) {
1065 }
else if (
ST.hasGFX90AInsts()) {
1070 if (
ST.isTgSplitEnabled())
1074 case SIAtomicScope::WAVEFRONT:
1075 case SIAtomicScope::SINGLETHREAD:
1085bool SIGfx6CacheControl::enableStoreCacheBypass(
1087 SIAtomicScope Scope,
1088 SIAtomicAddrSpace AddrSpace)
const {
1096 if (
ST.hasGFX940Insts() && canAffectGlobalAddrSpace(AddrSpace)) {
1098 case SIAtomicScope::SYSTEM:
1102 case SIAtomicScope::AGENT:
1106 case SIAtomicScope::WORKGROUP:
1110 case SIAtomicScope::WAVEFRONT:
1111 case SIAtomicScope::SINGLETHREAD:
1129bool SIGfx6CacheControl::enableRMWCacheBypass(
1131 SIAtomicScope Scope,
1132 SIAtomicAddrSpace AddrSpace)
const {
1142 if (
ST.hasGFX940Insts() && canAffectGlobalAddrSpace(AddrSpace)) {
1144 case SIAtomicScope::SYSTEM:
1148 case SIAtomicScope::AGENT:
1149 case SIAtomicScope::WORKGROUP:
1150 case SIAtomicScope::WAVEFRONT:
1151 case SIAtomicScope::SINGLETHREAD:
1165bool SIGfx6CacheControl::enableVolatileAndOrNonTemporal(
1167 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
1177 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
1182 if (
ST.hasGFX940Insts()) {
1185 }
else if (
Op == SIMemOp::LOAD) {
1197 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1198 Position::AFTER, AtomicOrdering::Unordered,
1204 if (IsNonTemporal) {
1205 if (
ST.hasGFX940Insts()) {
1219 SIAtomicScope Scope,
1220 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
1221 bool IsCrossAddrSpaceOrdering, Position Pos,
1223 bool AtomicsOnly)
const {
1226 MachineBasicBlock &
MBB = *
MI->getParent();
1229 if (Pos == Position::AFTER)
1233 if (
ST.hasGFX90AInsts() &&
ST.isTgSplitEnabled()) {
1241 if (((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH |
1242 SIAtomicAddrSpace::GDS)) != SIAtomicAddrSpace::NONE) &&
1243 (Scope == SIAtomicScope::WORKGROUP)) {
1245 Scope = SIAtomicScope::AGENT;
1249 AddrSpace &= ~SIAtomicAddrSpace
::LDS;
1253 bool LGKMCnt =
false;
1255 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
1256 SIAtomicAddrSpace::NONE) {
1258 case SIAtomicScope::SYSTEM:
1259 case SIAtomicScope::AGENT:
1262 case SIAtomicScope::WORKGROUP:
1263 case SIAtomicScope::WAVEFRONT:
1264 case SIAtomicScope::SINGLETHREAD:
1273 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1275 case SIAtomicScope::SYSTEM:
1276 case SIAtomicScope::AGENT:
1277 case SIAtomicScope::WORKGROUP:
1284 LGKMCnt |= IsCrossAddrSpaceOrdering;
1286 case SIAtomicScope::WAVEFRONT:
1287 case SIAtomicScope::SINGLETHREAD:
1296 if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
1298 case SIAtomicScope::SYSTEM:
1299 case SIAtomicScope::AGENT:
1306 LGKMCnt |= IsCrossAddrSpaceOrdering;
1308 case SIAtomicScope::WORKGROUP:
1309 case SIAtomicScope::WAVEFRONT:
1310 case SIAtomicScope::SINGLETHREAD:
1319 if (VMCnt || LGKMCnt) {
1320 unsigned WaitCntImmediate =
1326 .
addImm(WaitCntImmediate);
1334 Scope == SIAtomicScope::WORKGROUP &&
1335 (AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1340 if (Pos == Position::AFTER)
1349 return !ST.isAmdPalOS() && !ST.isMesa3DOS();
1353 SIAtomicScope Scope,
1354 SIAtomicAddrSpace AddrSpace,
1355 Position Pos)
const {
1356 if (!InsertCacheInv)
1361 MachineBasicBlock &
MBB = *
MI->getParent();
1364 if (Pos == Position::AFTER)
1368 ? AMDGPU::BUFFER_WBINVL1_VOL
1369 : AMDGPU::BUFFER_WBINVL1;
1371 if (canAffectGlobalAddrSpace(AddrSpace)) {
1373 case SIAtomicScope::SYSTEM:
1374 if (
ST.hasGFX940Insts()) {
1390 if (
ST.hasGFX90AInsts()) {
1405 case SIAtomicScope::AGENT:
1406 if (
ST.hasGFX940Insts()) {
1421 case SIAtomicScope::WORKGROUP:
1422 if (
ST.isTgSplitEnabled()) {
1423 if (
ST.hasGFX940Insts()) {
1442 }
else if (
ST.hasGFX90AInsts()) {
1448 case SIAtomicScope::WAVEFRONT:
1449 case SIAtomicScope::SINGLETHREAD:
1466 if (Pos == Position::AFTER)
1473 SIAtomicScope Scope,
1474 SIAtomicAddrSpace AddrSpace,
1475 Position Pos)
const {
1476 if (!
ST.hasGFX90AInsts())
1480 MachineBasicBlock &
MBB = *
MI->getParent();
1483 if (Pos == Position::AFTER)
1486 if (canAffectGlobalAddrSpace(AddrSpace)) {
1488 case SIAtomicScope::SYSTEM:
1500 case SIAtomicScope::AGENT:
1501 if (
ST.hasGFX940Insts()) {
1508 case SIAtomicScope::WORKGROUP:
1509 case SIAtomicScope::WAVEFRONT:
1510 case SIAtomicScope::SINGLETHREAD:
1520 if (Pos == Position::AFTER)
1526bool SIGfx10CacheControl::enableLoadCacheBypass(
1528 SIAtomicAddrSpace AddrSpace)
const {
1532 if (canAffectGlobalAddrSpace(AddrSpace)) {
1534 case SIAtomicScope::SYSTEM:
1535 case SIAtomicScope::AGENT:
1542 case SIAtomicScope::WORKGROUP:
1547 if (!
ST.isCuModeEnabled())
1550 case SIAtomicScope::WAVEFRONT:
1551 case SIAtomicScope::SINGLETHREAD:
1569bool SIGfx10CacheControl::enableVolatileAndOrNonTemporal(
1571 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
1582 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
1590 if (
Op == SIMemOp::LOAD) {
1603 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1604 Position::AFTER, AtomicOrdering::Unordered,
1609 if (IsNonTemporal) {
1614 if (
Op == SIMemOp::STORE)
1629 SIAtomicScope Scope,
1630 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
1631 bool IsCrossAddrSpaceOrdering,
1633 bool AtomicsOnly)
const {
1636 MachineBasicBlock &
MBB = *
MI->getParent();
1639 if (Pos == Position::AFTER)
1644 bool LGKMCnt =
false;
1646 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
1647 SIAtomicAddrSpace::NONE) {
1649 case SIAtomicScope::SYSTEM:
1650 case SIAtomicScope::AGENT:
1651 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
1653 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
1656 case SIAtomicScope::WORKGROUP:
1666 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
1668 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
1672 case SIAtomicScope::WAVEFRONT:
1673 case SIAtomicScope::SINGLETHREAD:
1682 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1684 case SIAtomicScope::SYSTEM:
1685 case SIAtomicScope::AGENT:
1686 case SIAtomicScope::WORKGROUP:
1693 LGKMCnt |= IsCrossAddrSpaceOrdering;
1695 case SIAtomicScope::WAVEFRONT:
1696 case SIAtomicScope::SINGLETHREAD:
1705 if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
1707 case SIAtomicScope::SYSTEM:
1708 case SIAtomicScope::AGENT:
1715 LGKMCnt |= IsCrossAddrSpaceOrdering;
1717 case SIAtomicScope::WORKGROUP:
1718 case SIAtomicScope::WAVEFRONT:
1719 case SIAtomicScope::SINGLETHREAD:
1728 if (VMCnt || LGKMCnt) {
1729 unsigned WaitCntImmediate =
1735 .
addImm(WaitCntImmediate);
1743 Scope == SIAtomicScope::WORKGROUP &&
1744 (AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1751 .
addReg(AMDGPU::SGPR_NULL, RegState::Undef)
1756 if (Pos == Position::AFTER)
1763 SIAtomicScope Scope,
1764 SIAtomicAddrSpace AddrSpace,
1765 Position Pos)
const {
1766 if (!InsertCacheInv)
1771 MachineBasicBlock &
MBB = *
MI->getParent();
1774 if (Pos == Position::AFTER)
1777 if (canAffectGlobalAddrSpace(AddrSpace)) {
1779 case SIAtomicScope::SYSTEM:
1780 case SIAtomicScope::AGENT:
1788 case SIAtomicScope::WORKGROUP:
1793 if (!
ST.isCuModeEnabled()) {
1798 case SIAtomicScope::WAVEFRONT:
1799 case SIAtomicScope::SINGLETHREAD:
1814 if (Pos == Position::AFTER)
1822 MachineOperand *
CPol =
TII->getNamedOperand(*
MI, OpName::cpol);
1837 MachineOperand *
CPol =
TII->getNamedOperand(*
MI, OpName::cpol);
1850bool SIGfx12CacheControl::insertWaitsBeforeSystemScopeStore(
1854 MachineBasicBlock &
MBB = *
MI->getParent();
1858 if (
ST.hasImageInsts()) {
1869 SIAtomicScope Scope,
1870 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
1871 bool IsCrossAddrSpaceOrdering,
1873 bool AtomicsOnly)
const {
1876 MachineBasicBlock &
MBB = *
MI->getParent();
1879 bool LOADCnt =
false;
1881 bool STORECnt =
false;
1883 if (Pos == Position::AFTER)
1886 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
1887 SIAtomicAddrSpace::NONE) {
1889 case SIAtomicScope::SYSTEM:
1890 case SIAtomicScope::AGENT:
1891 case SIAtomicScope::CLUSTER:
1892 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
1894 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
1897 case SIAtomicScope::WORKGROUP:
1914 if (!
ST.isCuModeEnabled() ||
ST.hasGFX1250Insts() ||
1916 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
1918 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
1922 case SIAtomicScope::WAVEFRONT:
1923 case SIAtomicScope::SINGLETHREAD:
1932 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1934 case SIAtomicScope::SYSTEM:
1935 case SIAtomicScope::AGENT:
1936 case SIAtomicScope::CLUSTER:
1937 case SIAtomicScope::WORKGROUP:
1944 DSCnt |= IsCrossAddrSpaceOrdering;
1946 case SIAtomicScope::WAVEFRONT:
1947 case SIAtomicScope::SINGLETHREAD:
1968 if (!AtomicsOnly &&
ST.hasImageInsts()) {
1986 if (Pos == Position::AFTER)
1993 SIAtomicScope Scope,
1994 SIAtomicAddrSpace AddrSpace,
1995 Position Pos)
const {
1996 if (!InsertCacheInv)
1999 MachineBasicBlock &
MBB = *
MI->getParent();
2008 if (!canAffectGlobalAddrSpace(AddrSpace))
2013 case SIAtomicScope::SYSTEM:
2016 case SIAtomicScope::AGENT:
2019 case SIAtomicScope::CLUSTER:
2022 case SIAtomicScope::WORKGROUP:
2030 if (
ST.isCuModeEnabled())
2035 case SIAtomicScope::WAVEFRONT:
2036 case SIAtomicScope::SINGLETHREAD:
2043 if (Pos == Position::AFTER)
2048 if (Pos == Position::AFTER)
2053 if (
ST.hasINVWBL2WaitCntRequirement() && Scope > SIAtomicScope::CLUSTER) {
2054 insertWait(
MI, Scope, AddrSpace, SIMemOp::LOAD,
2055 false, Pos, AtomicOrdering::Acquire,
2058 if (Pos == Position::AFTER)
2066 SIAtomicScope Scope,
2067 SIAtomicAddrSpace AddrSpace,
2068 Position Pos)
const {
2073 if (!canAffectGlobalAddrSpace(AddrSpace))
2077 MachineBasicBlock &
MBB = *
MI->getParent();
2080 if (Pos == Position::AFTER)
2089 std::optional<AMDGPU::CPol::CPol> NeedsWB;
2091 case SIAtomicScope::SYSTEM:
2094 case SIAtomicScope::AGENT:
2096 if (
ST.hasGFX1250Insts())
2099 case SIAtomicScope::CLUSTER:
2100 case SIAtomicScope::WORKGROUP:
2101 case SIAtomicScope::WAVEFRONT:
2102 case SIAtomicScope::SINGLETHREAD:
2104 case SIAtomicScope::NONE:
2113 if (
ST.hasINVWBL2WaitCntRequirement()) {
2114 insertWait(
MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
2116 AtomicOrdering::Release,
2124 if (Pos == Position::AFTER)
2130bool SIGfx12CacheControl::handleNonVolatile(MachineInstr &
MI)
const {
2132 if (!
ST.hasGFX1250Insts())
2134 MachineOperand *
CPol =
TII->getNamedOperand(
MI, OpName::cpol);
2141bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal(
2143 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
2152 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
2159 }
else if (IsNonTemporal) {
2167 if (
ST.requiresWaitXCntForSingleAccessInstructions() &&
2169 MachineBasicBlock &
MBB = *
MI->getParent();
2179 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
2180 Position::AFTER, AtomicOrdering::Unordered,
2187bool SIGfx12CacheControl::finalizeStore(MachineInstr &
MI,
bool Atomic)
const {
2188 assert(
MI.mayStore() &&
"Not a Store inst");
2189 const bool IsRMW = (
MI.mayLoad() &&
MI.mayStore());
2192 if (Atomic &&
ST.requiresWaitXCntForSingleAccessInstructions() &&
2194 MachineBasicBlock &
MBB = *
MI.getParent();
2203 MachineOperand *
CPol =
TII->getNamedOperand(
MI, OpName::cpol);
2209 if (
ST.requiresWaitsBeforeSystemScopeStores() && !Atomic &&
2211 Changed |= insertWaitsBeforeSystemScopeStore(
MI.getIterator());
2216bool SIGfx12CacheControl::handleCooperativeAtomic(MachineInstr &
MI)
const {
2217 if (!
ST.hasGFX1250Insts())
2221 MachineOperand *
CPol =
TII->getNamedOperand(
MI, OpName::cpol);
2222 assert(CPol &&
"No CPol operand?");
2230 SIAtomicScope Scope,
2231 SIAtomicAddrSpace AddrSpace)
const {
2234 if (canAffectGlobalAddrSpace(AddrSpace)) {
2236 case SIAtomicScope::SYSTEM:
2239 case SIAtomicScope::AGENT:
2242 case SIAtomicScope::CLUSTER:
2245 case SIAtomicScope::WORKGROUP:
2248 if (!
ST.isCuModeEnabled())
2251 case SIAtomicScope::WAVEFRONT:
2252 case SIAtomicScope::SINGLETHREAD:
2270bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
2271 if (AtomicPseudoMIs.empty())
2274 for (
auto &
MI : AtomicPseudoMIs)
2275 MI->eraseFromParent();
2277 AtomicPseudoMIs.clear();
2281bool SIMemoryLegalizer::expandLoad(
const SIMemOpInfo &MOI,
2289 if (MOI.isAtomic()) {
2291 <<
", scope=" <<
toString(MOI.getScope())
2292 <<
", ordering-AS=" << MOI.getOrderingAddrSpace()
2293 <<
", instr-AS=" << MOI.getInstrAddrSpace() <<
"\n");
2295 if (Order == AtomicOrdering::Monotonic ||
2296 Order == AtomicOrdering::Acquire ||
2297 Order == AtomicOrdering::SequentiallyConsistent) {
2298 Changed |= CC->enableLoadCacheBypass(
MI, MOI.getScope(),
2299 MOI.getOrderingAddrSpace());
2304 if (MOI.isCooperative())
2305 Changed |= CC->handleCooperativeAtomic(*
MI);
2307 if (Order == AtomicOrdering::SequentiallyConsistent)
2308 Changed |= CC->insertWait(
MI, MOI.getScope(), MOI.getOrderingAddrSpace(),
2309 SIMemOp::LOAD | SIMemOp::STORE,
2310 MOI.getIsCrossAddressSpaceOrdering(),
2311 Position::BEFORE, Order,
false);
2313 if (Order == AtomicOrdering::Acquire ||
2314 Order == AtomicOrdering::SequentiallyConsistent) {
2317 CC->insertWait(
MI, MOI.getScope(), MOI.getInstrAddrSpace(),
2318 SIMemOp::LOAD, MOI.getIsCrossAddressSpaceOrdering(),
2319 Position::AFTER, Order,
true);
2320 Changed |= CC->insertAcquire(
MI, MOI.getScope(),
2321 MOI.getOrderingAddrSpace(),
2331 Changed |= CC->enableVolatileAndOrNonTemporal(
2332 MI, MOI.getInstrAddrSpace(), SIMemOp::LOAD, MOI.isVolatile(),
2333 MOI.isNonTemporal(), MOI.isLastUse());
2338bool SIMemoryLegalizer::expandStore(
const SIMemOpInfo &MOI,
2346 MachineInstr &StoreMI = *
MI;
2348 if (MOI.isAtomic()) {
2350 <<
", scope=" <<
toString(MOI.getScope())
2351 <<
", ordering-AS=" << MOI.getOrderingAddrSpace()
2352 <<
", instr-AS=" << MOI.getInstrAddrSpace() <<
"\n");
2353 if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
2354 MOI.getOrdering() == AtomicOrdering::Release ||
2355 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
2356 Changed |= CC->enableStoreCacheBypass(
MI, MOI.getScope(),
2357 MOI.getOrderingAddrSpace());
2362 if (MOI.isCooperative())
2363 Changed |= CC->handleCooperativeAtomic(*
MI);
2365 if (MOI.getOrdering() == AtomicOrdering::Release ||
2366 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
2367 Changed |= CC->insertRelease(
MI, MOI.getScope(),
2368 MOI.getOrderingAddrSpace(),
2369 MOI.getIsCrossAddressSpaceOrdering(),
2372 Changed |= CC->finalizeStore(StoreMI,
true);
2379 Changed |= CC->enableVolatileAndOrNonTemporal(
2380 MI, MOI.getInstrAddrSpace(), SIMemOp::STORE, MOI.isVolatile(),
2381 MOI.isNonTemporal());
2385 Changed |= CC->finalizeStore(StoreMI,
false);
2389bool SIMemoryLegalizer::expandAtomicFence(
const SIMemOpInfo &MOI,
2391 assert(
MI->getOpcode() == AMDGPU::ATOMIC_FENCE);
2395 AtomicPseudoMIs.push_back(
MI);
2398 const SIAtomicAddrSpace OrderingAddrSpace = MOI.getOrderingAddrSpace();
2400 if (MOI.isAtomic()) {
2402 <<
", scope=" <<
toString(MOI.getScope())
2403 <<
", ordering-AS=" << OrderingAddrSpace <<
"\n");
2405 if (Order == AtomicOrdering::Acquire) {
2407 Changed |= CC->insertWait(
MI, MOI.getScope(), OrderingAddrSpace,
2408 SIMemOp::LOAD | SIMemOp::STORE,
2409 MOI.getIsCrossAddressSpaceOrdering(),
2410 Position::BEFORE, Order,
true);
2413 if (Order == AtomicOrdering::Release ||
2414 Order == AtomicOrdering::AcquireRelease ||
2415 Order == AtomicOrdering::SequentiallyConsistent)
2423 Changed |= CC->insertRelease(
MI, MOI.getScope(), OrderingAddrSpace,
2424 MOI.getIsCrossAddressSpaceOrdering(),
2432 if (Order == AtomicOrdering::Acquire ||
2433 Order == AtomicOrdering::AcquireRelease ||
2434 Order == AtomicOrdering::SequentiallyConsistent)
2435 Changed |= CC->insertAcquire(
MI, MOI.getScope(), OrderingAddrSpace,
2444bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(
const SIMemOpInfo &MOI,
2451 MachineInstr &RMWMI = *
MI;
2453 if (MOI.isAtomic()) {
2455 <<
", failure-ordering="
2457 <<
", scope=" <<
toString(MOI.getScope())
2458 <<
", ordering-AS=" << MOI.getOrderingAddrSpace()
2459 <<
", instr-AS=" << MOI.getInstrAddrSpace() <<
"\n");
2461 if (Order == AtomicOrdering::Monotonic ||
2462 Order == AtomicOrdering::Acquire || Order == AtomicOrdering::Release ||
2463 Order == AtomicOrdering::AcquireRelease ||
2464 Order == AtomicOrdering::SequentiallyConsistent) {
2465 Changed |= CC->enableRMWCacheBypass(
MI, MOI.getScope(),
2466 MOI.getInstrAddrSpace());
2469 if (Order == AtomicOrdering::Release ||
2470 Order == AtomicOrdering::AcquireRelease ||
2471 Order == AtomicOrdering::SequentiallyConsistent ||
2472 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent)
2473 Changed |= CC->insertRelease(
MI, MOI.getScope(),
2474 MOI.getOrderingAddrSpace(),
2475 MOI.getIsCrossAddressSpaceOrdering(),
2478 if (Order == AtomicOrdering::Acquire ||
2479 Order == AtomicOrdering::AcquireRelease ||
2480 Order == AtomicOrdering::SequentiallyConsistent ||
2481 MOI.getFailureOrdering() == AtomicOrdering::Acquire ||
2482 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) {
2485 CC->insertWait(
MI, MOI.getScope(), MOI.getInstrAddrSpace(),
2486 isAtomicRet(*
MI) ? SIMemOp::LOAD : SIMemOp::STORE,
2487 MOI.getIsCrossAddressSpaceOrdering(), Position::AFTER,
2489 Changed |= CC->insertAcquire(
MI, MOI.getScope(),
2490 MOI.getOrderingAddrSpace(),
2494 Changed |= CC->finalizeStore(RMWMI,
true);
2501bool SIMemoryLegalizer::expandLDSDMA(
const SIMemOpInfo &MOI,
2515 return CC->enableVolatileAndOrNonTemporal(
2516 MI, MOI.getInstrAddrSpace(), OpKind, MOI.isVolatile(),
2517 MOI.isNonTemporal(), MOI.isLastUse());
2520bool SIMemoryLegalizerLegacy::runOnMachineFunction(MachineFunction &MF) {
2521 const MachineModuleInfo &MMI =
2522 getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
2523 return SIMemoryLegalizer(MMI).run(MF);
2530 .getCachedResult<MachineModuleAnalysis>(
2532 assert(MMI &&
"MachineModuleAnalysis must be available");
2533 if (!SIMemoryLegalizer(MMI->getMMI()).run(MF))
2544 CC = SICacheControl::create(ST);
2546 for (
auto &
MBB : MF) {
2550 if (
MI->isBundle() &&
MI->mayLoadOrStore()) {
2553 I != E &&
I->isBundledWithPred(); ++
I) {
2554 I->unbundleFromPred();
2557 MO.setIsInternalRead(
false);
2560 MI =
MI->eraseFromParent();
2564 if (
const auto &MOI = MOA.getLoadInfo(
MI))
2566 else if (
const auto &MOI = MOA.getStoreInfo(
MI))
2568 else if (
const auto &MOI = MOA.getLDSDMAInfo(
MI))
2570 else if (
const auto &MOI = MOA.getAtomicFenceInfo(
MI))
2572 else if (
const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(
MI))
2573 Changed |= expandAtomicCmpxchgOrRmw(*MOI,
MI);
2581 Changed |= removeAtomicPseudoMIs();
2587char SIMemoryLegalizerLegacy::
ID = 0;
2591 return new SIMemoryLegalizerLegacy();
static std::optional< LoadInfo > getLoadInfo(const MachineInstr &MI)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
Provides AMDGPU specific target descriptions.
AMDGPU Machine Module Info.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
This header defines various interfaces for pass management in LLVM.
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static cl::opt< bool > AmdgcnSkipCacheInvalidations("amdgcn-skip-cache-invalidations", cl::init(false), cl::Hidden, cl::desc("Use this to skip inserting cache invalidating instructions."))
static bool isNonVolatileMemoryAccess(const MachineInstr &MI)
static bool canUseBUFFER_WBINVL1_VOL(const GCNSubtarget &ST)
static const uint32_t IV[8]
SyncScope::ID getWorkgroupSSID() const
SyncScope::ID getWavefrontSSID() const
SyncScope::ID getAgentSSID() const
SyncScope::ID getClusterOneAddressSpaceSSID() const
SyncScope::ID getClusterSSID() const
std::optional< bool > isSyncScopeInclusion(SyncScope::ID A, SyncScope::ID B) const
In AMDGPU target synchronization scopes are inclusive, meaning a larger synchronization scope is incl...
SyncScope::ID getAgentOneAddressSpaceSSID() const
SyncScope::ID getSingleThreadOneAddressSpaceSSID() const
SyncScope::ID getWavefrontOneAddressSpaceSSID() const
SyncScope::ID getSystemOneAddressSpaceSSID() const
SyncScope::ID getWorkgroupOneAddressSpaceSSID() const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Represents analyses that only rely on functions' control flow.
Diagnostic information for unsupported feature in backend.
FunctionPass class - This class is used to implement most global optimizations.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
A helper class to return the specified delimiter string after the first invocation of operator String...
Instructions::iterator instr_iterator
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Representation of each machine instruction.
A description of a memory reference used in the backend.
Ty & getObjFileInfo()
Keep track of various per-module pieces of information for backends that would like to do so.
MachineOperand class - Representation of each machine instruction operand.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
static bool isVMEM(const MachineInstr &MI)
static bool mayWriteLDSThroughDMA(const MachineInstr &MI)
static bool isBUF(const MachineInstr &MI)
static bool isAtomicRet(const MachineInstr &MI)
static bool isAtomic(const MachineInstr &MI)
static bool isLDSDMA(const MachineInstr &MI)
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
Represent a constant reference to a string, i.e.
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ BUFFER_STRIDED_POINTER
Address space for 192-bit fat buffer pointers with an additional index.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ BUFFER_FAT_POINTER
Address space for 160-bit buffer fat pointers.
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUFFER_RESOURCE
Address space for 128-bit buffer resources.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
bool isGFX10(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded)
unsigned getVmcntBitMask(const IsaVersion &Version)
unsigned getLgkmcntBitMask(const IsaVersion &Version)
unsigned getExpcntBitMask(const IsaVersion &Version)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
@ System
Synchronized with respect to all concurrently executing threads.
initializer< Ty > init(const Ty &Val)
DXILDebugInfoMap run(Module &M)
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
OuterAnalysisManagerProxy< ModuleAnalysisManager, MachineFunction > ModuleAnalysisManagerMachineFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
char & SIMemoryLegalizerID
@ LLVM_MARK_AS_BITMASK_ENUM
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
bool isReleaseOrStronger(AtomicOrdering AO)
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
const char * toIRString(AtomicOrdering ao)
String used by LLVM IR to represent atomic ordering.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
AtomicOrdering getMergedAtomicOrdering(AtomicOrdering AO, AtomicOrdering Other)
Return a single atomic ordering that is at least as strong as both the AO and Other orderings for an ...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static const MachineMemOperand::Flags MOCooperative
Mark the MMO of cooperative load/store atomics.
AtomicOrdering
Atomic ordering for LLVM's memory model.
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
FunctionPass * createSIMemoryLegalizerPass()
bool isStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
Returns true if ao is stronger than other as defined by the AtomicOrdering lattice,...