30#define DEBUG_TYPE "si-memory-legalizer"
31#define PASS_NAME "SI Memory Legalizer"
35 cl::desc(
"Use this to skip inserting cache invalidating instructions."));
57enum class SIAtomicScope {
68enum class SIAtomicAddrSpace {
88class SIMemOpInfo final {
91 friend class SIMemOpAccess;
95 SIAtomicScope
Scope = SIAtomicScope::SYSTEM;
96 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
97 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
98 bool IsCrossAddressSpaceOrdering =
false;
100 bool IsNonTemporal =
false;
101 bool IsLastUse =
false;
105 SIAtomicScope Scope = SIAtomicScope::SYSTEM,
106 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::ATOMIC,
107 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::ALL,
108 bool IsCrossAddressSpaceOrdering =
true,
109 AtomicOrdering FailureOrdering = AtomicOrdering::SequentiallyConsistent,
110 bool IsVolatile =
false,
bool IsNonTemporal =
false,
111 bool IsLastUse =
false)
112 : Ordering(Ordering), FailureOrdering(FailureOrdering),
Scope(
Scope),
113 OrderingAddrSpace(OrderingAddrSpace), InstrAddrSpace(InstrAddrSpace),
114 IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
116 IsLastUse(IsLastUse) {
118 if (Ordering == AtomicOrdering::NotAtomic) {
119 assert(Scope == SIAtomicScope::NONE &&
120 OrderingAddrSpace == SIAtomicAddrSpace::NONE &&
121 !IsCrossAddressSpaceOrdering &&
122 FailureOrdering == AtomicOrdering::NotAtomic);
126 assert(Scope != SIAtomicScope::NONE &&
127 (OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
128 SIAtomicAddrSpace::NONE &&
129 (InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
130 SIAtomicAddrSpace::NONE);
135 if ((OrderingAddrSpace == InstrAddrSpace) &&
137 this->IsCrossAddressSpaceOrdering =
false;
141 if ((InstrAddrSpace & ~SIAtomicAddrSpace::SCRATCH) ==
142 SIAtomicAddrSpace::NONE) {
143 this->Scope = std::min(Scope, SIAtomicScope::SINGLETHREAD);
144 }
else if ((InstrAddrSpace &
145 ~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS)) ==
146 SIAtomicAddrSpace::NONE) {
147 this->Scope = std::min(Scope, SIAtomicScope::WORKGROUP);
148 }
else if ((InstrAddrSpace &
149 ~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS |
150 SIAtomicAddrSpace::GDS)) == SIAtomicAddrSpace::NONE) {
151 this->Scope = std::min(Scope, SIAtomicScope::AGENT);
171 return FailureOrdering;
176 SIAtomicAddrSpace getInstrAddrSpace()
const {
177 return InstrAddrSpace;
182 SIAtomicAddrSpace getOrderingAddrSpace()
const {
183 return OrderingAddrSpace;
188 bool getIsCrossAddressSpaceOrdering()
const {
189 return IsCrossAddressSpaceOrdering;
194 bool isVolatile()
const {
200 bool isNonTemporal()
const {
201 return IsNonTemporal;
206 bool isLastUse()
const {
return IsLastUse; }
210 bool isAtomic()
const {
211 return Ordering != AtomicOrdering::NotAtomic;
216class SIMemOpAccess final {
222 const char *Msg)
const;
228 std::optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
229 toSIAtomicScope(
SyncScope::ID SSID, SIAtomicAddrSpace InstrAddrSpace)
const;
232 SIAtomicAddrSpace toSIAtomicAddrSpace(
unsigned AS)
const;
236 std::optional<SIMemOpInfo>
245 std::optional<SIMemOpInfo>
250 std::optional<SIMemOpInfo>
255 std::optional<SIMemOpInfo>
260 std::optional<SIMemOpInfo>
264class SICacheControl {
288 static std::unique_ptr<SICacheControl> create(
const GCNSubtarget &ST);
295 SIAtomicAddrSpace AddrSpace)
const = 0;
302 SIAtomicAddrSpace AddrSpace)
const = 0;
309 SIAtomicAddrSpace AddrSpace)
const = 0;
315 SIAtomicAddrSpace AddrSpace,
316 SIMemOp
Op,
bool IsVolatile,
318 bool IsLastUse =
false)
const = 0;
333 SIAtomicAddrSpace AddrSpace,
335 bool IsCrossAddrSpaceOrdering,
336 Position Pos)
const = 0;
345 SIAtomicAddrSpace AddrSpace,
346 Position Pos)
const = 0;
356 SIAtomicAddrSpace AddrSpace,
357 bool IsCrossAddrSpaceOrdering,
358 Position Pos)
const = 0;
361 virtual ~SICacheControl() =
default;
363 virtual bool tryForceStoreSC0SC1(
const SIMemOpInfo &MOI,
369class SIGfx6CacheControl :
public SICacheControl {
386 SIGfx6CacheControl(
const GCNSubtarget &ST) : SICacheControl(
ST) {}
390 SIAtomicAddrSpace AddrSpace)
const override;
394 SIAtomicAddrSpace AddrSpace)
const override;
398 SIAtomicAddrSpace AddrSpace)
const override;
401 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
402 bool IsVolatile,
bool IsNonTemporal,
403 bool IsLastUse)
const override;
407 SIAtomicAddrSpace AddrSpace,
409 bool IsCrossAddrSpaceOrdering,
410 Position Pos)
const override;
414 SIAtomicAddrSpace AddrSpace,
415 Position Pos)
const override;
419 SIAtomicAddrSpace AddrSpace,
420 bool IsCrossAddrSpaceOrdering,
421 Position Pos)
const override;
424class SIGfx7CacheControl :
public SIGfx6CacheControl {
427 SIGfx7CacheControl(
const GCNSubtarget &ST) : SIGfx6CacheControl(
ST) {}
431 SIAtomicAddrSpace AddrSpace,
432 Position Pos)
const override;
436class SIGfx90ACacheControl :
public SIGfx7CacheControl {
439 SIGfx90ACacheControl(
const GCNSubtarget &ST) : SIGfx7CacheControl(
ST) {}
443 SIAtomicAddrSpace AddrSpace)
const override;
447 SIAtomicAddrSpace AddrSpace)
const override;
451 SIAtomicAddrSpace AddrSpace)
const override;
454 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
455 bool IsVolatile,
bool IsNonTemporal,
456 bool IsLastUse)
const override;
460 SIAtomicAddrSpace AddrSpace,
462 bool IsCrossAddrSpaceOrdering,
463 Position Pos)
const override;
467 SIAtomicAddrSpace AddrSpace,
468 Position Pos)
const override;
472 SIAtomicAddrSpace AddrSpace,
473 bool IsCrossAddrSpaceOrdering,
474 Position Pos)
const override;
477class SIGfx940CacheControl :
public SIGfx90ACacheControl {
500 SIGfx940CacheControl(
const GCNSubtarget &ST) : SIGfx90ACacheControl(
ST) {};
504 SIAtomicAddrSpace AddrSpace)
const override;
508 SIAtomicAddrSpace AddrSpace)
const override;
512 SIAtomicAddrSpace AddrSpace)
const override;
515 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
516 bool IsVolatile,
bool IsNonTemporal,
517 bool IsLastUse)
const override;
520 SIAtomicAddrSpace AddrSpace, Position Pos)
const override;
523 SIAtomicAddrSpace AddrSpace,
bool IsCrossAddrSpaceOrdering,
524 Position Pos)
const override;
526 bool tryForceStoreSC0SC1(
const SIMemOpInfo &MOI,
528 bool Changed =
false;
529 if (
ST.hasForceStoreSC0SC1() &&
530 (MOI.getInstrAddrSpace() & (SIAtomicAddrSpace::SCRATCH |
531 SIAtomicAddrSpace::GLOBAL |
532 SIAtomicAddrSpace::OTHER)) !=
533 SIAtomicAddrSpace::NONE) {
534 Changed |= enableSC0Bit(
MI);
535 Changed |= enableSC1Bit(
MI);
541class SIGfx10CacheControl :
public SIGfx7CacheControl {
552 SIGfx10CacheControl(
const GCNSubtarget &ST) : SIGfx7CacheControl(
ST) {}
556 SIAtomicAddrSpace AddrSpace)
const override;
559 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
560 bool IsVolatile,
bool IsNonTemporal,
561 bool IsLastUse)
const override;
565 SIAtomicAddrSpace AddrSpace,
567 bool IsCrossAddrSpaceOrdering,
568 Position Pos)
const override;
572 SIAtomicAddrSpace AddrSpace,
573 Position Pos)
const override;
576class SIGfx11CacheControl :
public SIGfx10CacheControl {
578 SIGfx11CacheControl(
const GCNSubtarget &ST) : SIGfx10CacheControl(
ST) {}
582 SIAtomicAddrSpace AddrSpace)
const override;
585 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
586 bool IsVolatile,
bool IsNonTemporal,
587 bool IsLastUse)
const override;
590class SIGfx12CacheControl :
public SIGfx11CacheControl {
611 SIGfx12CacheControl(
const GCNSubtarget &ST) : SIGfx11CacheControl(
ST) {}
614 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
615 bool IsCrossAddrSpaceOrdering, Position Pos)
const override;
618 SIAtomicAddrSpace AddrSpace, Position Pos)
const override;
621 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
622 bool IsVolatile,
bool IsNonTemporal,
623 bool IsLastUse)
const override;
632 std::unique_ptr<SICacheControl>
CC =
nullptr;
635 std::list<MachineBasicBlock::iterator> AtomicPseudoMIs;
645 bool removeAtomicPseudoMIs();
649 bool expandLoad(
const SIMemOpInfo &MOI,
653 bool expandStore(
const SIMemOpInfo &MOI,
657 bool expandAtomicFence(
const SIMemOpInfo &MOI,
661 bool expandAtomicCmpxchgOrRmw(
const SIMemOpInfo &MOI,
684 const char *Msg)
const {
685 const Function &
Func =
MI->getParent()->getParent()->getFunction();
687 Func.getContext().diagnose(Diag);
690std::optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
692 SIAtomicAddrSpace InstrAddrSpace)
const {
694 return std::tuple(SIAtomicScope::SYSTEM, SIAtomicAddrSpace::ATOMIC,
true);
695 if (SSID == MMI->getAgentSSID())
696 return std::tuple(SIAtomicScope::AGENT, SIAtomicAddrSpace::ATOMIC,
true);
697 if (SSID == MMI->getWorkgroupSSID())
698 return std::tuple(SIAtomicScope::WORKGROUP, SIAtomicAddrSpace::ATOMIC,
700 if (SSID == MMI->getWavefrontSSID())
701 return std::tuple(SIAtomicScope::WAVEFRONT, SIAtomicAddrSpace::ATOMIC,
704 return std::tuple(SIAtomicScope::SINGLETHREAD, SIAtomicAddrSpace::ATOMIC,
706 if (SSID == MMI->getSystemOneAddressSpaceSSID())
707 return std::tuple(SIAtomicScope::SYSTEM,
708 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
709 if (SSID == MMI->getAgentOneAddressSpaceSSID())
710 return std::tuple(SIAtomicScope::AGENT,
711 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
712 if (SSID == MMI->getWorkgroupOneAddressSpaceSSID())
713 return std::tuple(SIAtomicScope::WORKGROUP,
714 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
715 if (SSID == MMI->getWavefrontOneAddressSpaceSSID())
716 return std::tuple(SIAtomicScope::WAVEFRONT,
717 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
718 if (SSID == MMI->getSingleThreadOneAddressSpaceSSID())
719 return std::tuple(SIAtomicScope::SINGLETHREAD,
720 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
724SIAtomicAddrSpace SIMemOpAccess::toSIAtomicAddrSpace(
unsigned AS)
const {
726 return SIAtomicAddrSpace::FLAT;
728 return SIAtomicAddrSpace::GLOBAL;
730 return SIAtomicAddrSpace::LDS;
732 return SIAtomicAddrSpace::SCRATCH;
734 return SIAtomicAddrSpace::GDS;
736 return SIAtomicAddrSpace::OTHER;
743std::optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
745 assert(
MI->getNumMemOperands() > 0);
750 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
751 bool IsNonTemporal =
true;
753 bool IsLastUse =
false;
757 for (
const auto &MMO :
MI->memoperands()) {
758 IsNonTemporal &= MMO->isNonTemporal();
760 IsLastUse |= MMO->getFlags() &
MOLastUse;
762 toSIAtomicAddrSpace(MMO->getPointerInfo().getAddrSpace());
764 if (OpOrdering != AtomicOrdering::NotAtomic) {
765 const auto &IsSyncScopeInclusion =
766 MMI->isSyncScopeInclusion(SSID, MMO->getSyncScopeID());
767 if (!IsSyncScopeInclusion) {
768 reportUnsupported(
MI,
769 "Unsupported non-inclusive atomic synchronization scope");
773 SSID = *IsSyncScopeInclusion ? SSID : MMO->getSyncScopeID();
775 assert(MMO->getFailureOrdering() != AtomicOrdering::Release &&
776 MMO->getFailureOrdering() != AtomicOrdering::AcquireRelease);
782 SIAtomicScope
Scope = SIAtomicScope::NONE;
783 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
784 bool IsCrossAddressSpaceOrdering =
false;
785 if (Ordering != AtomicOrdering::NotAtomic) {
786 auto ScopeOrNone = toSIAtomicScope(SSID, InstrAddrSpace);
788 reportUnsupported(
MI,
"Unsupported atomic synchronization scope");
791 std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
793 if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
794 ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace) ||
795 ((InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) == SIAtomicAddrSpace::NONE)) {
796 reportUnsupported(
MI,
"Unsupported atomic address space");
800 return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, InstrAddrSpace,
801 IsCrossAddressSpaceOrdering, FailureOrdering, IsVolatile,
802 IsNonTemporal, IsLastUse);
805std::optional<SIMemOpInfo>
809 if (!(
MI->mayLoad() && !
MI->mayStore()))
813 if (
MI->getNumMemOperands() == 0)
814 return SIMemOpInfo();
816 return constructFromMIWithMMO(
MI);
819std::optional<SIMemOpInfo>
823 if (!(!
MI->mayLoad() &&
MI->mayStore()))
827 if (
MI->getNumMemOperands() == 0)
828 return SIMemOpInfo();
830 return constructFromMIWithMMO(
MI);
833std::optional<SIMemOpInfo>
837 if (
MI->getOpcode() != AMDGPU::ATOMIC_FENCE)
844 auto ScopeOrNone = toSIAtomicScope(SSID, SIAtomicAddrSpace::ATOMIC);
846 reportUnsupported(
MI,
"Unsupported atomic synchronization scope");
850 SIAtomicScope
Scope = SIAtomicScope::NONE;
851 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
852 bool IsCrossAddressSpaceOrdering =
false;
853 std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
856 if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
857 ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace)) {
858 reportUnsupported(
MI,
"Unsupported atomic address space");
862 return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, SIAtomicAddrSpace::ATOMIC,
863 IsCrossAddressSpaceOrdering, AtomicOrdering::NotAtomic);
866std::optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
870 if (!(
MI->mayLoad() &&
MI->mayStore()))
874 if (
MI->getNumMemOperands() == 0)
875 return SIMemOpInfo();
877 return constructFromMIWithMMO(
MI);
881 TII =
ST.getInstrInfo();
897std::unique_ptr<SICacheControl> SICacheControl::create(
const GCNSubtarget &ST) {
899 if (
ST.hasGFX940Insts())
900 return std::make_unique<SIGfx940CacheControl>(ST);
901 if (
ST.hasGFX90AInsts())
902 return std::make_unique<SIGfx90ACacheControl>(ST);
904 return std::make_unique<SIGfx6CacheControl>(ST);
906 return std::make_unique<SIGfx7CacheControl>(ST);
908 return std::make_unique<SIGfx10CacheControl>(ST);
910 return std::make_unique<SIGfx11CacheControl>(ST);
911 return std::make_unique<SIGfx12CacheControl>(ST);
914bool SIGfx6CacheControl::enableLoadCacheBypass(
917 SIAtomicAddrSpace AddrSpace)
const {
919 bool Changed =
false;
921 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
923 case SIAtomicScope::SYSTEM:
924 case SIAtomicScope::AGENT:
927 Changed |= enableGLCBit(
MI);
929 case SIAtomicScope::WORKGROUP:
930 case SIAtomicScope::WAVEFRONT:
931 case SIAtomicScope::SINGLETHREAD:
949bool SIGfx6CacheControl::enableStoreCacheBypass(
952 SIAtomicAddrSpace AddrSpace)
const {
954 bool Changed =
false;
962bool SIGfx6CacheControl::enableRMWCacheBypass(
965 SIAtomicAddrSpace AddrSpace)
const {
967 bool Changed =
false;
977bool SIGfx6CacheControl::enableVolatileAndOrNonTemporal(
979 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
989 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
991 bool Changed =
false;
997 if (
Op == SIMemOp::LOAD)
998 Changed |= enableGLCBit(
MI);
1005 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1011 if (IsNonTemporal) {
1014 Changed |= enableGLCBit(
MI);
1015 Changed |= enableSLCBit(
MI);
1023 SIAtomicScope Scope,
1024 SIAtomicAddrSpace AddrSpace,
1026 bool IsCrossAddrSpaceOrdering,
1027 Position Pos)
const {
1028 bool Changed =
false;
1033 if (Pos == Position::AFTER)
1037 bool LGKMCnt =
false;
1039 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
1040 SIAtomicAddrSpace::NONE) {
1042 case SIAtomicScope::SYSTEM:
1043 case SIAtomicScope::AGENT:
1046 case SIAtomicScope::WORKGROUP:
1047 case SIAtomicScope::WAVEFRONT:
1048 case SIAtomicScope::SINGLETHREAD:
1057 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1059 case SIAtomicScope::SYSTEM:
1060 case SIAtomicScope::AGENT:
1061 case SIAtomicScope::WORKGROUP:
1068 LGKMCnt |= IsCrossAddrSpaceOrdering;
1070 case SIAtomicScope::WAVEFRONT:
1071 case SIAtomicScope::SINGLETHREAD:
1080 if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
1082 case SIAtomicScope::SYSTEM:
1083 case SIAtomicScope::AGENT:
1090 LGKMCnt |= IsCrossAddrSpaceOrdering;
1092 case SIAtomicScope::WORKGROUP:
1093 case SIAtomicScope::WAVEFRONT:
1094 case SIAtomicScope::SINGLETHREAD:
1103 if (VMCnt || LGKMCnt) {
1104 unsigned WaitCntImmediate =
1110 .
addImm(WaitCntImmediate);
1114 if (Pos == Position::AFTER)
1121 SIAtomicScope Scope,
1122 SIAtomicAddrSpace AddrSpace,
1123 Position Pos)
const {
1124 if (!InsertCacheInv)
1127 bool Changed =
false;
1132 if (Pos == Position::AFTER)
1135 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1137 case SIAtomicScope::SYSTEM:
1138 case SIAtomicScope::AGENT:
1142 case SIAtomicScope::WORKGROUP:
1143 case SIAtomicScope::WAVEFRONT:
1144 case SIAtomicScope::SINGLETHREAD:
1159 if (Pos == Position::AFTER)
1166 SIAtomicScope Scope,
1167 SIAtomicAddrSpace AddrSpace,
1168 bool IsCrossAddrSpaceOrdering,
1169 Position Pos)
const {
1170 return insertWait(
MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
1171 IsCrossAddrSpaceOrdering, Pos);
1175 SIAtomicScope Scope,
1176 SIAtomicAddrSpace AddrSpace,
1177 Position Pos)
const {
1178 if (!InsertCacheInv)
1181 bool Changed =
false;
1189 ? AMDGPU::BUFFER_WBINVL1
1190 : AMDGPU::BUFFER_WBINVL1_VOL;
1192 if (Pos == Position::AFTER)
1195 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1197 case SIAtomicScope::SYSTEM:
1198 case SIAtomicScope::AGENT:
1202 case SIAtomicScope::WORKGROUP:
1203 case SIAtomicScope::WAVEFRONT:
1204 case SIAtomicScope::SINGLETHREAD:
1219 if (Pos == Position::AFTER)
1225bool SIGfx90ACacheControl::enableLoadCacheBypass(
1227 SIAtomicScope Scope,
1228 SIAtomicAddrSpace AddrSpace)
const {
1230 bool Changed =
false;
1232 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1234 case SIAtomicScope::SYSTEM:
1235 case SIAtomicScope::AGENT:
1238 Changed |= enableGLCBit(
MI);
1240 case SIAtomicScope::WORKGROUP:
1245 if (
ST.isTgSplitEnabled())
1246 Changed |= enableGLCBit(
MI);
1248 case SIAtomicScope::WAVEFRONT:
1249 case SIAtomicScope::SINGLETHREAD:
1267bool SIGfx90ACacheControl::enableStoreCacheBypass(
1269 SIAtomicScope Scope,
1270 SIAtomicAddrSpace AddrSpace)
const {
1272 bool Changed =
false;
1274 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1276 case SIAtomicScope::SYSTEM:
1277 case SIAtomicScope::AGENT:
1281 case SIAtomicScope::WORKGROUP:
1282 case SIAtomicScope::WAVEFRONT:
1283 case SIAtomicScope::SINGLETHREAD:
1302bool SIGfx90ACacheControl::enableRMWCacheBypass(
1304 SIAtomicScope Scope,
1305 SIAtomicAddrSpace AddrSpace)
const {
1307 bool Changed =
false;
1309 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1311 case SIAtomicScope::SYSTEM:
1312 case SIAtomicScope::AGENT:
1317 case SIAtomicScope::WORKGROUP:
1318 case SIAtomicScope::WAVEFRONT:
1319 case SIAtomicScope::SINGLETHREAD:
1330bool SIGfx90ACacheControl::enableVolatileAndOrNonTemporal(
1332 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
1342 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
1344 bool Changed =
false;
1350 if (
Op == SIMemOp::LOAD)
1351 Changed |= enableGLCBit(
MI);
1358 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1364 if (IsNonTemporal) {
1367 Changed |= enableGLCBit(
MI);
1368 Changed |= enableSLCBit(
MI);
1376 SIAtomicScope Scope,
1377 SIAtomicAddrSpace AddrSpace,
1379 bool IsCrossAddrSpaceOrdering,
1380 Position Pos)
const {
1381 if (
ST.isTgSplitEnabled()) {
1389 if (((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH |
1390 SIAtomicAddrSpace::GDS)) != SIAtomicAddrSpace::NONE) &&
1391 (Scope == SIAtomicScope::WORKGROUP)) {
1393 Scope = SIAtomicScope::AGENT;
1397 AddrSpace &= ~SIAtomicAddrSpace
::LDS;
1399 return SIGfx7CacheControl::insertWait(
MI, Scope, AddrSpace,
Op,
1400 IsCrossAddrSpaceOrdering, Pos);
1404 SIAtomicScope Scope,
1405 SIAtomicAddrSpace AddrSpace,
1406 Position Pos)
const {
1407 if (!InsertCacheInv)
1410 bool Changed =
false;
1415 if (Pos == Position::AFTER)
1418 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1420 case SIAtomicScope::SYSTEM:
1432 case SIAtomicScope::AGENT:
1435 case SIAtomicScope::WORKGROUP:
1440 if (
ST.isTgSplitEnabled()) {
1442 Scope = SIAtomicScope::AGENT;
1445 case SIAtomicScope::WAVEFRONT:
1446 case SIAtomicScope::SINGLETHREAD:
1461 if (Pos == Position::AFTER)
1464 Changed |= SIGfx7CacheControl::insertAcquire(
MI, Scope, AddrSpace, Pos);
1470 SIAtomicScope Scope,
1471 SIAtomicAddrSpace AddrSpace,
1472 bool IsCrossAddrSpaceOrdering,
1473 Position Pos)
const {
1474 bool Changed =
false;
1479 if (Pos == Position::AFTER)
1482 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1484 case SIAtomicScope::SYSTEM:
1498 case SIAtomicScope::AGENT:
1499 case SIAtomicScope::WORKGROUP:
1500 case SIAtomicScope::WAVEFRONT:
1501 case SIAtomicScope::SINGLETHREAD:
1509 if (Pos == Position::AFTER)
1513 SIGfx7CacheControl::insertRelease(
MI, Scope, AddrSpace,
1514 IsCrossAddrSpaceOrdering, Pos);
1519bool SIGfx940CacheControl::enableLoadCacheBypass(
1521 SIAtomicAddrSpace AddrSpace)
const {
1523 bool Changed =
false;
1525 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1527 case SIAtomicScope::SYSTEM:
1529 Changed |= enableSC0Bit(
MI);
1530 Changed |= enableSC1Bit(
MI);
1532 case SIAtomicScope::AGENT:
1534 Changed |= enableSC1Bit(
MI);
1536 case SIAtomicScope::WORKGROUP:
1542 Changed |= enableSC0Bit(
MI);
1544 case SIAtomicScope::WAVEFRONT:
1545 case SIAtomicScope::SINGLETHREAD:
1563bool SIGfx940CacheControl::enableStoreCacheBypass(
1565 SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace)
const {
1567 bool Changed =
false;
1569 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1571 case SIAtomicScope::SYSTEM:
1573 Changed |= enableSC0Bit(
MI);
1574 Changed |= enableSC1Bit(
MI);
1576 case SIAtomicScope::AGENT:
1578 Changed |= enableSC1Bit(
MI);
1580 case SIAtomicScope::WORKGROUP:
1582 Changed |= enableSC0Bit(
MI);
1584 case SIAtomicScope::WAVEFRONT:
1585 case SIAtomicScope::SINGLETHREAD:
1603bool SIGfx940CacheControl::enableRMWCacheBypass(
1605 SIAtomicAddrSpace AddrSpace)
const {
1607 bool Changed =
false;
1609 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1611 case SIAtomicScope::SYSTEM:
1613 Changed |= enableSC1Bit(
MI);
1615 case SIAtomicScope::AGENT:
1616 case SIAtomicScope::WORKGROUP:
1617 case SIAtomicScope::WAVEFRONT:
1618 case SIAtomicScope::SINGLETHREAD:
1632bool SIGfx940CacheControl::enableVolatileAndOrNonTemporal(
1634 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
1644 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
1646 bool Changed =
false;
1650 Changed |= enableSC0Bit(
MI);
1651 Changed |= enableSC1Bit(
MI);
1658 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1664 if (IsNonTemporal) {
1665 Changed |= enableNTBit(
MI);
1673 SIAtomicScope Scope,
1674 SIAtomicAddrSpace AddrSpace,
1675 Position Pos)
const {
1676 if (!InsertCacheInv)
1679 bool Changed =
false;
1684 if (Pos == Position::AFTER)
1687 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1689 case SIAtomicScope::SYSTEM:
1703 case SIAtomicScope::AGENT:
1716 case SIAtomicScope::WORKGROUP:
1721 if (
ST.isTgSplitEnabled()) {
1735 case SIAtomicScope::WAVEFRONT:
1736 case SIAtomicScope::SINGLETHREAD:
1752 if (Pos == Position::AFTER)
1759 SIAtomicScope Scope,
1760 SIAtomicAddrSpace AddrSpace,
1761 bool IsCrossAddrSpaceOrdering,
1762 Position Pos)
const {
1763 bool Changed =
false;
1768 if (Pos == Position::AFTER)
1771 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1773 case SIAtomicScope::SYSTEM:
1788 case SIAtomicScope::AGENT:
1798 case SIAtomicScope::WORKGROUP:
1799 case SIAtomicScope::WAVEFRONT:
1800 case SIAtomicScope::SINGLETHREAD:
1810 if (Pos == Position::AFTER)
1815 Changed |= insertWait(
MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
1816 IsCrossAddrSpaceOrdering, Pos);
1821bool SIGfx10CacheControl::enableLoadCacheBypass(
1823 SIAtomicScope Scope,
1824 SIAtomicAddrSpace AddrSpace)
const {
1826 bool Changed =
false;
1828 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1830 case SIAtomicScope::SYSTEM:
1831 case SIAtomicScope::AGENT:
1834 Changed |= enableGLCBit(
MI);
1835 Changed |= enableDLCBit(
MI);
1837 case SIAtomicScope::WORKGROUP:
1842 if (!
ST.isCuModeEnabled())
1843 Changed |= enableGLCBit(
MI);
1845 case SIAtomicScope::WAVEFRONT:
1846 case SIAtomicScope::SINGLETHREAD:
1864bool SIGfx10CacheControl::enableVolatileAndOrNonTemporal(
1866 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
1877 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
1879 bool Changed =
false;
1885 if (
Op == SIMemOp::LOAD) {
1886 Changed |= enableGLCBit(
MI);
1887 Changed |= enableDLCBit(
MI);
1895 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1900 if (IsNonTemporal) {
1905 if (
Op == SIMemOp::STORE)
1906 Changed |= enableGLCBit(
MI);
1907 Changed |= enableSLCBit(
MI);
1916 SIAtomicScope Scope,
1917 SIAtomicAddrSpace AddrSpace,
1919 bool IsCrossAddrSpaceOrdering,
1920 Position Pos)
const {
1921 bool Changed =
false;
1926 if (Pos == Position::AFTER)
1931 bool LGKMCnt =
false;
1933 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
1934 SIAtomicAddrSpace::NONE) {
1936 case SIAtomicScope::SYSTEM:
1937 case SIAtomicScope::AGENT:
1938 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
1940 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
1943 case SIAtomicScope::WORKGROUP:
1949 if (!
ST.isCuModeEnabled()) {
1950 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
1952 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
1956 case SIAtomicScope::WAVEFRONT:
1957 case SIAtomicScope::SINGLETHREAD:
1966 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1968 case SIAtomicScope::SYSTEM:
1969 case SIAtomicScope::AGENT:
1970 case SIAtomicScope::WORKGROUP:
1977 LGKMCnt |= IsCrossAddrSpaceOrdering;
1979 case SIAtomicScope::WAVEFRONT:
1980 case SIAtomicScope::SINGLETHREAD:
1989 if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
1991 case SIAtomicScope::SYSTEM:
1992 case SIAtomicScope::AGENT:
1999 LGKMCnt |= IsCrossAddrSpaceOrdering;
2001 case SIAtomicScope::WORKGROUP:
2002 case SIAtomicScope::WAVEFRONT:
2003 case SIAtomicScope::SINGLETHREAD:
2012 if (VMCnt || LGKMCnt) {
2013 unsigned WaitCntImmediate =
2019 .
addImm(WaitCntImmediate);
2030 if (Pos == Position::AFTER)
2037 SIAtomicScope Scope,
2038 SIAtomicAddrSpace AddrSpace,
2039 Position Pos)
const {
2040 if (!InsertCacheInv)
2043 bool Changed =
false;
2048 if (Pos == Position::AFTER)
2051 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
2053 case SIAtomicScope::SYSTEM:
2054 case SIAtomicScope::AGENT:
2062 case SIAtomicScope::WORKGROUP:
2067 if (!
ST.isCuModeEnabled()) {
2072 case SIAtomicScope::WAVEFRONT:
2073 case SIAtomicScope::SINGLETHREAD:
2088 if (Pos == Position::AFTER)
2094bool SIGfx11CacheControl::enableLoadCacheBypass(
2096 SIAtomicAddrSpace AddrSpace)
const {
2098 bool Changed =
false;
2100 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
2102 case SIAtomicScope::SYSTEM:
2103 case SIAtomicScope::AGENT:
2106 Changed |= enableGLCBit(
MI);
2108 case SIAtomicScope::WORKGROUP:
2113 if (!
ST.isCuModeEnabled())
2114 Changed |= enableGLCBit(
MI);
2116 case SIAtomicScope::WAVEFRONT:
2117 case SIAtomicScope::SINGLETHREAD:
2135bool SIGfx11CacheControl::enableVolatileAndOrNonTemporal(
2137 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
2148 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
2150 bool Changed =
false;
2156 if (
Op == SIMemOp::LOAD)
2157 Changed |= enableGLCBit(
MI);
2160 Changed |= enableDLCBit(
MI);
2167 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
2172 if (IsNonTemporal) {
2177 if (
Op == SIMemOp::STORE)
2178 Changed |= enableGLCBit(
MI);
2179 Changed |= enableSLCBit(
MI);
2182 Changed |= enableDLCBit(
MI);
2219bool SIGfx12CacheControl::insertWaitsBeforeSystemScopeStore(
2236 SIAtomicScope Scope,
2237 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
2238 bool IsCrossAddrSpaceOrdering,
2239 Position Pos)
const {
2240 bool Changed =
false;
2245 bool LOADCnt =
false;
2247 bool STORECnt =
false;
2249 if (Pos == Position::AFTER)
2252 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
2253 SIAtomicAddrSpace::NONE) {
2255 case SIAtomicScope::SYSTEM:
2256 case SIAtomicScope::AGENT:
2257 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
2259 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
2262 case SIAtomicScope::WORKGROUP:
2268 if (!
ST.isCuModeEnabled()) {
2269 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
2271 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
2275 case SIAtomicScope::WAVEFRONT:
2276 case SIAtomicScope::SINGLETHREAD:
2285 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
2287 case SIAtomicScope::SYSTEM:
2288 case SIAtomicScope::AGENT:
2289 case SIAtomicScope::WORKGROUP:
2296 DSCnt |= IsCrossAddrSpaceOrdering;
2298 case SIAtomicScope::WAVEFRONT:
2299 case SIAtomicScope::SINGLETHREAD:
2325 if (Pos == Position::AFTER)
2332 SIAtomicScope Scope,
2333 SIAtomicAddrSpace AddrSpace,
2334 Position Pos)
const {
2335 if (!InsertCacheInv)
2347 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) == SIAtomicAddrSpace::NONE)
2352 case SIAtomicScope::SYSTEM:
2355 case SIAtomicScope::AGENT:
2358 case SIAtomicScope::WORKGROUP:
2363 if (
ST.isCuModeEnabled())
2368 case SIAtomicScope::WAVEFRONT:
2369 case SIAtomicScope::SINGLETHREAD:
2376 if (Pos == Position::AFTER)
2381 if (Pos == Position::AFTER)
2387bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal(
2389 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
2398 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
2400 bool Changed =
false;
2405 }
else if (IsNonTemporal) {
2413 if (
Op == SIMemOp::STORE)
2414 Changed |= insertWaitsBeforeSystemScopeStore(
MI);
2421 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
2428bool SIGfx12CacheControl::expandSystemScopeStore(
2432 return insertWaitsBeforeSystemScopeStore(
MI);
2437bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
2438 if (AtomicPseudoMIs.empty())
2441 for (
auto &
MI : AtomicPseudoMIs)
2442 MI->eraseFromParent();
2444 AtomicPseudoMIs.clear();
2448bool SIMemoryLegalizer::expandLoad(
const SIMemOpInfo &MOI,
2452 bool Changed =
false;
2454 if (MOI.isAtomic()) {
2455 if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
2456 MOI.getOrdering() == AtomicOrdering::Acquire ||
2457 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
2458 Changed |=
CC->enableLoadCacheBypass(
MI, MOI.getScope(),
2459 MOI.getOrderingAddrSpace());
2462 if (MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
2463 Changed |=
CC->insertWait(
MI, MOI.getScope(),
2464 MOI.getOrderingAddrSpace(),
2465 SIMemOp::LOAD | SIMemOp::STORE,
2466 MOI.getIsCrossAddressSpaceOrdering(),
2469 if (MOI.getOrdering() == AtomicOrdering::Acquire ||
2470 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
2471 Changed |=
CC->insertWait(
MI, MOI.getScope(),
2472 MOI.getInstrAddrSpace(),
2474 MOI.getIsCrossAddressSpaceOrdering(),
2476 Changed |=
CC->insertAcquire(
MI, MOI.getScope(),
2477 MOI.getOrderingAddrSpace(),
2487 Changed |=
CC->enableVolatileAndOrNonTemporal(
2488 MI, MOI.getInstrAddrSpace(), SIMemOp::LOAD, MOI.isVolatile(),
2489 MOI.isNonTemporal(), MOI.isLastUse());
2494bool SIMemoryLegalizer::expandStore(
const SIMemOpInfo &MOI,
2498 bool Changed =
false;
2500 if (MOI.isAtomic()) {
2501 if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
2502 MOI.getOrdering() == AtomicOrdering::Release ||
2503 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
2504 Changed |=
CC->enableStoreCacheBypass(
MI, MOI.getScope(),
2505 MOI.getOrderingAddrSpace());
2508 if (MOI.getOrdering() == AtomicOrdering::Release ||
2509 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
2510 Changed |=
CC->insertRelease(
MI, MOI.getScope(),
2511 MOI.getOrderingAddrSpace(),
2512 MOI.getIsCrossAddressSpaceOrdering(),
2521 Changed |=
CC->enableVolatileAndOrNonTemporal(
2522 MI, MOI.getInstrAddrSpace(), SIMemOp::STORE, MOI.isVolatile(),
2523 MOI.isNonTemporal());
2527 Changed |=
CC->expandSystemScopeStore(
MI);
2531bool SIMemoryLegalizer::expandAtomicFence(
const SIMemOpInfo &MOI,
2533 assert(
MI->getOpcode() == AMDGPU::ATOMIC_FENCE);
2535 AtomicPseudoMIs.push_back(
MI);
2536 bool Changed =
false;
2538 if (MOI.isAtomic()) {
2539 if (MOI.getOrdering() == AtomicOrdering::Acquire)
2540 Changed |=
CC->insertWait(
MI, MOI.getScope(), MOI.getOrderingAddrSpace(),
2541 SIMemOp::LOAD | SIMemOp::STORE,
2542 MOI.getIsCrossAddressSpaceOrdering(),
2545 if (MOI.getOrdering() == AtomicOrdering::Release ||
2546 MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
2547 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
2555 Changed |=
CC->insertRelease(
MI, MOI.getScope(),
2556 MOI.getOrderingAddrSpace(),
2557 MOI.getIsCrossAddressSpaceOrdering(),
2565 if (MOI.getOrdering() == AtomicOrdering::Acquire ||
2566 MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
2567 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
2568 Changed |=
CC->insertAcquire(
MI, MOI.getScope(),
2569 MOI.getOrderingAddrSpace(),
2578bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(
const SIMemOpInfo &MOI,
2582 bool Changed =
false;
2584 if (MOI.isAtomic()) {
2585 if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
2586 MOI.getOrdering() == AtomicOrdering::Acquire ||
2587 MOI.getOrdering() == AtomicOrdering::Release ||
2588 MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
2589 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
2590 Changed |=
CC->enableRMWCacheBypass(
MI, MOI.getScope(),
2591 MOI.getInstrAddrSpace());
2594 if (MOI.getOrdering() == AtomicOrdering::Release ||
2595 MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
2596 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
2597 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent)
2598 Changed |=
CC->insertRelease(
MI, MOI.getScope(),
2599 MOI.getOrderingAddrSpace(),
2600 MOI.getIsCrossAddressSpaceOrdering(),
2603 if (MOI.getOrdering() == AtomicOrdering::Acquire ||
2604 MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
2605 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
2606 MOI.getFailureOrdering() == AtomicOrdering::Acquire ||
2607 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) {
2608 Changed |=
CC->insertWait(
MI, MOI.getScope(),
2609 MOI.getInstrAddrSpace(),
2610 isAtomicRet(*
MI) ? SIMemOp::LOAD :
2612 MOI.getIsCrossAddressSpaceOrdering(),
2614 Changed |=
CC->insertAcquire(
MI, MOI.getScope(),
2615 MOI.getOrderingAddrSpace(),
2626 bool Changed =
false;
2628 SIMemOpAccess MOA(MF);
2631 for (
auto &
MBB : MF) {
2635 if (
MI->isBundle() &&
MI->mayLoadOrStore()) {
2638 I != E &&
I->isBundledWithPred(); ++
I) {
2639 I->unbundleFromPred();
2642 MO.setIsInternalRead(
false);
2645 MI->eraseFromParent();
2646 MI = II->getIterator();
2652 if (
const auto &MOI = MOA.getLoadInfo(
MI))
2653 Changed |= expandLoad(*MOI,
MI);
2654 else if (
const auto &MOI = MOA.getStoreInfo(
MI)) {
2655 Changed |= expandStore(*MOI,
MI);
2656 Changed |=
CC->tryForceStoreSC0SC1(*MOI,
MI);
2657 }
else if (
const auto &MOI = MOA.getAtomicFenceInfo(
MI))
2658 Changed |= expandAtomicFence(*MOI,
MI);
2659 else if (
const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(
MI))
2660 Changed |= expandAtomicCmpxchgOrRmw(*MOI,
MI);
2664 Changed |= removeAtomicPseudoMIs();
2670char SIMemoryLegalizer::
ID = 0;
2674 return new SIMemoryLegalizer();
static std::optional< LoadInfo > getLoadInfo(const MachineInstr &MI)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Provides AMDGPU specific target descriptions.
AMDGPU Machine Module Info.
Atomic ordering constants.
#define LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE() pulls the operator overloads used by LLVM_MARK_AS_BITMASK_EN...
#define LLVM_MARK_AS_BITMASK_ENUM(LargestValue)
LLVM_MARK_AS_BITMASK_ENUM lets you opt in an individual enum type so you can perform bitwise operatio...
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static cl::opt< bool > AmdgcnSkipCacheInvalidations("amdgcn-skip-cache-invalidations", cl::init(false), cl::Hidden, cl::desc("Use this to skip inserting cache invalidating instructions."))
static SPIRV::Scope::Scope getScope(SyncScope::ID Ord, SPIRVMachineModuleInfo *MMI)
static const uint32_t IV[8]
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
This class represents an Operation in the Expression.
Diagnostic information for unsupported feature in backend.
FunctionPass class - This class is used to implement most global optimizations.
Instructions::iterator instr_iterator
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineModuleInfo & getMMI() const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Ty & getObjFileInfo()
Keep track of various per-module pieces of information for backends that would like to do so.
MachineOperand class - Representation of each machine instruction operand.
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
static bool isAtomicRet(const MachineInstr &MI)
StringRef - Represent a constant reference to a string, i.e.
LLVM Value Representation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
IsaVersion getIsaVersion(StringRef GPU)
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
unsigned getVmcntBitMask(const IsaVersion &Version)
unsigned getLgkmcntBitMask(const IsaVersion &Version)
unsigned getExpcntBitMask(const IsaVersion &Version)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Undef
Value of the register doesn't matter.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
@ System
Synchronized with respect to all concurrently executing threads.
initializer< Ty > init(const Ty &Val)
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
char & SIMemoryLegalizerID
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
AtomicOrdering getMergedAtomicOrdering(AtomicOrdering AO, AtomicOrdering Other)
Return a single atomic ordering that is at least as strong as both the AO and Other orderings for an ...
AtomicOrdering
Atomic ordering for LLVM's memory model.
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
FunctionPass * createSIMemoryLegalizerPass()
Instruction set architecture version.